diff options
Diffstat (limited to 'kernel/drivers/dma')
77 files changed, 13956 insertions, 6156 deletions
diff --git a/kernel/drivers/dma/Kconfig b/kernel/drivers/dma/Kconfig index bda2cb06d..e6cd1a320 100644 --- a/kernel/drivers/dma/Kconfig +++ b/kernel/drivers/dma/Kconfig @@ -33,27 +33,29 @@ if DMADEVICES comment "DMA Devices" -config INTEL_MIC_X100_DMA - tristate "Intel MIC X100 DMA Driver" - depends on 64BIT && X86 && INTEL_MIC_BUS - select DMA_ENGINE - help - This enables DMA support for the Intel Many Integrated Core - (MIC) family of PCIe form factor coprocessor X100 devices that - run a 64 bit Linux OS. This driver will be used by both MIC - host and card drivers. - - If you are building host kernel with a MIC device or a card - kernel for a MIC device, then say M (recommended) or Y, else - say N. If unsure say N. +#core +config ASYNC_TX_ENABLE_CHANNEL_SWITCH + bool - More information about the Intel MIC family as well as the Linux - OS and tools for MIC to use with this driver are available from - <http://software.intel.com/en-us/mic-developer>. +config ARCH_HAS_ASYNC_TX_FIND_CHANNEL + bool -config ASYNC_TX_ENABLE_CHANNEL_SWITCH +config DMA_ENGINE bool +config DMA_VIRTUAL_CHANNELS + tristate + +config DMA_ACPI + def_bool y + depends on ACPI + +config DMA_OF + def_bool y + depends on OF + select DMA_ENGINE + +#devices config AMBA_PL08X bool "ARM PrimeCell PL080 or PL081 support" depends on ARM_AMBA @@ -63,29 +65,15 @@ config AMBA_PL08X Platform has a PL08x DMAC device which can provide DMA engine support -config INTEL_IOATDMA - tristate "Intel I/OAT DMA support" - depends on PCI && X86 +config AMCC_PPC440SPE_ADMA + tristate "AMCC PPC440SPe ADMA support" + depends on 440SPe || 440SP select DMA_ENGINE select DMA_ENGINE_RAID - select DCA - help - Enable support for the Intel(R) I/OAT DMA engine present - in recent Intel Xeon chipsets. - - Say Y here if you have such a chipset. - - If unsure, say N. - -config INTEL_IOP_ADMA - tristate "Intel IOP ADMA support" - depends on ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX - select DMA_ENGINE + select ARCH_HAS_ASYNC_TX_FIND_CHANNEL select ASYNC_TX_ENABLE_CHANNEL_SWITCH help - Enable support for the Intel(R) IOP Series RAID engines. - -source "drivers/dma/dw/Kconfig" + Enable support for the AMCC PPC440SPe RAID engines. config AT_HDMAC tristate "Atmel AHB DMA support" @@ -101,6 +89,89 @@ config AT_XDMAC help Support the Atmel XDMA controller. +config AXI_DMAC + tristate "Analog Devices AXI-DMAC DMA support" + depends on MICROBLAZE || NIOS2 || ARCH_ZYNQ || ARCH_SOCFPGA || COMPILE_TEST + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Enable support for the Analog Devices AXI-DMAC peripheral. This DMA + controller is often used in Analog Device's reference designs for FPGA + platforms. + +config COH901318 + bool "ST-Ericsson COH901318 DMA support" + select DMA_ENGINE + depends on ARCH_U300 + help + Enable support for ST-Ericsson COH 901 318 DMA. + +config DMA_BCM2835 + tristate "BCM2835 DMA engine support" + depends on ARCH_BCM2835 + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + +config DMA_JZ4740 + tristate "JZ4740 DMA support" + depends on MACH_JZ4740 + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + +config DMA_JZ4780 + tristate "JZ4780 DMA support" + depends on MACH_JZ4780 + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + This selects support for the DMA controller in Ingenic JZ4780 SoCs. + If you have a board based on such a SoC and wish to use DMA for + devices which can use the DMA controller, say Y or M here. + +config DMA_OMAP + tristate "OMAP DMA support" + depends on ARCH_OMAP + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + select TI_DMA_CROSSBAR if SOC_DRA7XX + +config DMA_SA11X0 + tristate "SA-11x0 DMA support" + depends on ARCH_SA1100 + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Support the DMA engine found on Intel StrongARM SA-1100 and + SA-1110 SoCs. This DMA engine can only be used with on-chip + devices. + +config DMA_SUN4I + tristate "Allwinner A10 DMA SoCs support" + depends on MACH_SUN4I || MACH_SUN5I || MACH_SUN7I + default (MACH_SUN4I || MACH_SUN5I || MACH_SUN7I) + select DMA_ENGINE + select DMA_OF + select DMA_VIRTUAL_CHANNELS + help + Enable support for the DMA controller present in the sun4i, + sun5i and sun7i Allwinner ARM SoCs. + +config DMA_SUN6I + tristate "Allwinner A31 SoCs DMA support" + depends on MACH_SUN6I || MACH_SUN8I || COMPILE_TEST + depends on RESET_CONTROLLER + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Support for the DMA engine first found in Allwinner A31 SoCs. + +config EP93XX_DMA + bool "Cirrus Logic EP93xx DMA support" + depends on ARCH_EP93XX + select DMA_ENGINE + help + Enable support for the Cirrus Logic EP93xx M2P/M2M DMA controller. + config FSL_DMA tristate "Freescale Elo series DMA support" depends on FSL_SOC @@ -112,6 +183,16 @@ config FSL_DMA EloPlus is on mpc85xx and mpc86xx and Pxxx parts, and the Elo3 is on some Txxx and Bxxx parts. +config FSL_EDMA + tristate "Freescale eDMA engine support" + depends on OF + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Support the Freescale eDMA engine with programmable channel + multiplexing capability for DMA request sources(slot). + This module can be found on Freescale Vybrid and LS-1 SoCs. + config FSL_RAID tristate "Freescale RAID engine Support" depends on FSL_SOC && !ASYNC_TX_ENABLE_CHANNEL_SWITCH @@ -123,139 +204,175 @@ config FSL_RAID the capability to offload memcpy, xor and pq computation for raid5/6. -source "drivers/dma/hsu/Kconfig" - -config MPC512X_DMA - tristate "Freescale MPC512x built-in DMA engine support" - depends on PPC_MPC512x || PPC_MPC831x +config IMG_MDC_DMA + tristate "IMG MDC support" + depends on MIPS || COMPILE_TEST + depends on MFD_SYSCON select DMA_ENGINE - ---help--- - Enable support for the Freescale MPC512x built-in DMA engine. - -source "drivers/dma/bestcomm/Kconfig" + select DMA_VIRTUAL_CHANNELS + help + Enable support for the IMG multi-threaded DMA controller (MDC). -config MV_XOR - bool "Marvell XOR engine support" - depends on PLAT_ORION +config IMX_DMA + tristate "i.MX DMA support" + depends on ARCH_MXC select DMA_ENGINE - select DMA_ENGINE_RAID - select ASYNC_TX_ENABLE_CHANNEL_SWITCH - ---help--- - Enable support for the Marvell XOR engine. + help + Support the i.MX DMA engine. This engine is integrated into + Freescale i.MX1/21/27 chips. -config MX3_IPU - bool "MX3x Image Processing Unit support" +config IMX_SDMA + tristate "i.MX SDMA support" depends on ARCH_MXC select DMA_ENGINE - default y help - If you plan to use the Image Processing unit in the i.MX3x, say - Y here. If unsure, select Y. + Support the i.MX SDMA engine. This engine is integrated into + Freescale i.MX25/31/35/51/53/6 chips. -config MX3_IPU_IRQS - int "Number of dynamically mapped interrupts for IPU" - depends on MX3_IPU - range 2 137 - default 4 +config INTEL_IDMA64 + tristate "Intel integrated DMA 64-bit support" + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS help - Out of 137 interrupt sources on i.MX31 IPU only very few are used. - To avoid bloating the irq_desc[] array we allocate a sufficient - number of IRQ slots and map them dynamically to specific sources. + Enable DMA support for Intel Low Power Subsystem such as found on + Intel Skylake PCH. -config TXX9_DMAC - tristate "Toshiba TXx9 SoC DMA support" - depends on MACH_TX49XX || MACH_TX39XX +config INTEL_IOATDMA + tristate "Intel I/OAT DMA support" + depends on PCI && X86_64 select DMA_ENGINE + select DMA_ENGINE_RAID + select DCA help - Support the TXx9 SoC internal DMA controller. This can be - integrated in chips such as the Toshiba TX4927/38/39. + Enable support for the Intel(R) I/OAT DMA engine present + in recent Intel Xeon chipsets. -config TEGRA20_APB_DMA - bool "NVIDIA Tegra20 APB DMA support" - depends on ARCH_TEGRA + Say Y here if you have such a chipset. + + If unsure, say N. + +config INTEL_IOP_ADMA + tristate "Intel IOP ADMA support" + depends on ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX select DMA_ENGINE + select ASYNC_TX_ENABLE_CHANNEL_SWITCH help - Support for the NVIDIA Tegra20 APB DMA controller driver. The - DMA controller is having multiple DMA channel which can be - configured for different peripherals like audio, UART, SPI, - I2C etc which is in APB bus. - This DMA controller transfers data from memory to peripheral fifo - or vice versa. It does not support memory to memory data transfer. + Enable support for the Intel(R) IOP Series RAID engines. -config S3C24XX_DMAC - tristate "Samsung S3C24XX DMA support" - depends on ARCH_S3C24XX +config INTEL_MIC_X100_DMA + tristate "Intel MIC X100 DMA Driver" + depends on 64BIT && X86 && INTEL_MIC_BUS select DMA_ENGINE - select DMA_VIRTUAL_CHANNELS help - Support for the Samsung S3C24XX DMA controller driver. The - DMA controller is having multiple DMA channels which can be - configured for different peripherals like audio, UART, SPI. - The DMA controller can transfer data from memory to peripheral, - periphal to memory, periphal to periphal and memory to memory. + This enables DMA support for the Intel Many Integrated Core + (MIC) family of PCIe form factor coprocessor X100 devices that + run a 64 bit Linux OS. This driver will be used by both MIC + host and card drivers. -source "drivers/dma/sh/Kconfig" + If you are building host kernel with a MIC device or a card + kernel for a MIC device, then say M (recommended) or Y, else + say N. If unsure say N. -config COH901318 - bool "ST-Ericsson COH901318 DMA support" + More information about the Intel MIC family as well as the Linux + OS and tools for MIC to use with this driver are available from + <http://software.intel.com/en-us/mic-developer>. + +config K3_DMA + tristate "Hisilicon K3 DMA support" + depends on ARCH_HI3xxx select DMA_ENGINE - depends on ARCH_U300 + select DMA_VIRTUAL_CHANNELS help - Enable support for ST-Ericsson COH 901 318 DMA. + Support the DMA engine for Hisilicon K3 platform + devices. -config STE_DMA40 - bool "ST-Ericsson DMA40 support" - depends on ARCH_U8500 +config LPC18XX_DMAMUX + bool "NXP LPC18xx/43xx DMA MUX for PL080" + depends on ARCH_LPC18XX || COMPILE_TEST + depends on OF && AMBA_PL08X + select MFD_SYSCON + help + Enable support for DMA on NXP LPC18xx/43xx platforms + with PL080 and multiplexed DMA request lines. + +config MMP_PDMA + bool "MMP PDMA support" + depends on (ARCH_MMP || ARCH_PXA) select DMA_ENGINE help - Support for ST-Ericsson DMA40 controller + Support the MMP PDMA engine for PXA and MMP platform. -config AMCC_PPC440SPE_ADMA - tristate "AMCC PPC440SPe ADMA support" - depends on 440SPe || 440SP +config MMP_TDMA + bool "MMP Two-Channel DMA support" + depends on ARCH_MMP select DMA_ENGINE - select DMA_ENGINE_RAID - select ARCH_HAS_ASYNC_TX_FIND_CHANNEL - select ASYNC_TX_ENABLE_CHANNEL_SWITCH + select MMP_SRAM help - Enable support for the AMCC PPC440SPe RAID engines. + Support the MMP Two-Channel DMA engine. + This engine used for MMP Audio DMA and pxa910 SQU. + It needs sram driver under mach-mmp. -config TIMB_DMA - tristate "Timberdale FPGA DMA support" - depends on MFD_TIMBERDALE +config MOXART_DMA + tristate "MOXART DMA support" + depends on ARCH_MOXART select DMA_ENGINE + select DMA_OF + select DMA_VIRTUAL_CHANNELS help - Enable support for the Timberdale FPGA DMA engine. + Enable support for the MOXA ART SoC DMA controller. + + Say Y here if you enabled MMP ADMA, otherwise say N. -config SIRF_DMA - tristate "CSR SiRFprimaII/SiRFmarco DMA support" - depends on ARCH_SIRF +config MPC512X_DMA + tristate "Freescale MPC512x built-in DMA engine support" + depends on PPC_MPC512x || PPC_MPC831x + select DMA_ENGINE + ---help--- + Enable support for the Freescale MPC512x built-in DMA engine. + +config MV_XOR + bool "Marvell XOR engine support" + depends on PLAT_ORION + select DMA_ENGINE + select DMA_ENGINE_RAID + select ASYNC_TX_ENABLE_CHANNEL_SWITCH + ---help--- + Enable support for the Marvell XOR engine. + +config MXS_DMA + bool "MXS DMA support" + depends on SOC_IMX23 || SOC_IMX28 || SOC_IMX6Q + select STMP_DEVICE select DMA_ENGINE help - Enable support for the CSR SiRFprimaII DMA engine. + Support the MXS DMA engine. This engine including APBH-DMA + and APBX-DMA is integrated into Freescale i.MX23/28/MX6Q/MX6DL chips. -config TI_EDMA - bool "TI EDMA support" - depends on ARCH_DAVINCI || ARCH_OMAP || ARCH_KEYSTONE +config MX3_IPU + bool "MX3x Image Processing Unit support" + depends on ARCH_MXC select DMA_ENGINE - select DMA_VIRTUAL_CHANNELS - select TI_PRIV_EDMA - default n + default y help - Enable support for the TI EDMA controller. This DMA - engine is found on TI DaVinci and AM33xx parts. + If you plan to use the Image Processing unit in the i.MX3x, say + Y here. If unsure, select Y. -config ARCH_HAS_ASYNC_TX_FIND_CHANNEL - bool +config MX3_IPU_IRQS + int "Number of dynamically mapped interrupts for IPU" + depends on MX3_IPU + range 2 137 + default 4 + help + Out of 137 interrupt sources on i.MX31 IPU only very few are used. + To avoid bloating the irq_desc[] array we allocate a sufficient + number of IRQ slots and map them dynamically to specific sources. -config PL330_DMA - tristate "DMA API Driver for PL330" +config NBPFAXI_DMA + tristate "Renesas Type-AXI NBPF DMA support" select DMA_ENGINE - depends on ARM_AMBA + depends on ARM || COMPILE_TEST help - Select if your platform has one or more PL330 DMACs. - You need to provide platform specific settings via - platform_data for a dma-pl330 device. + Support for "Type-AXI" NBPF DMA IPs from Renesas config PCH_DMA tristate "Intel EG20T PCH / LAPIS Semicon IOH(ML7213/ML7223/ML7831) DMA" @@ -271,71 +388,87 @@ config PCH_DMA ML7213/ML7223/ML7831 is companion chip for Intel Atom E6xx series. ML7213/ML7223/ML7831 is completely compatible for Intel EG20T PCH. -config IMX_SDMA - tristate "i.MX SDMA support" - depends on ARCH_MXC +config PL330_DMA + tristate "DMA API Driver for PL330" select DMA_ENGINE + depends on ARM_AMBA help - Support the i.MX SDMA engine. This engine is integrated into - Freescale i.MX25/31/35/51/53/6 chips. + Select if your platform has one or more PL330 DMACs. + You need to provide platform specific settings via + platform_data for a dma-pl330 device. -config IMX_DMA - tristate "i.MX DMA support" - depends on ARCH_MXC +config PXA_DMA + bool "PXA DMA support" + depends on (ARCH_MMP || ARCH_PXA) select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS help - Support the i.MX DMA engine. This engine is integrated into - Freescale i.MX1/21/27 chips. + Support the DMA engine for PXA. It is also compatible with MMP PDMA + platform. The internal DMA IP of all PXA variants is supported, with + 16 to 32 channels for peripheral to memory or memory to memory + transfers. -config MXS_DMA - bool "MXS DMA support" - depends on SOC_IMX23 || SOC_IMX28 || SOC_IMX6Q - select STMP_DEVICE +config QCOM_BAM_DMA + tristate "QCOM BAM DMA support" + depends on ARCH_QCOM || (COMPILE_TEST && OF && ARM) + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + ---help--- + Enable support for the QCOM BAM DMA controller. This controller + provides DMA capabilities for a variety of on-chip devices. + +config SIRF_DMA + tristate "CSR SiRFprimaII/SiRFmarco DMA support" + depends on ARCH_SIRF select DMA_ENGINE help - Support the MXS DMA engine. This engine including APBH-DMA - and APBX-DMA is integrated into Freescale i.MX23/28/MX6Q/MX6DL chips. + Enable support for the CSR SiRFprimaII DMA engine. -config EP93XX_DMA - bool "Cirrus Logic EP93xx DMA support" - depends on ARCH_EP93XX +config STE_DMA40 + bool "ST-Ericsson DMA40 support" + depends on ARCH_U8500 select DMA_ENGINE help - Enable support for the Cirrus Logic EP93xx M2P/M2M DMA controller. + Support for ST-Ericsson DMA40 controller -config DMA_SA11X0 - tristate "SA-11x0 DMA support" - depends on ARCH_SA1100 +config S3C24XX_DMAC + tristate "Samsung S3C24XX DMA support" + depends on ARCH_S3C24XX select DMA_ENGINE select DMA_VIRTUAL_CHANNELS help - Support the DMA engine found on Intel StrongARM SA-1100 and - SA-1110 SoCs. This DMA engine can only be used with on-chip - devices. + Support for the Samsung S3C24XX DMA controller driver. The + DMA controller is having multiple DMA channels which can be + configured for different peripherals like audio, UART, SPI. + The DMA controller can transfer data from memory to peripheral, + periphal to memory, periphal to periphal and memory to memory. -config MMP_TDMA - bool "MMP Two-Channel DMA support" - depends on ARCH_MMP +config TXX9_DMAC + tristate "Toshiba TXx9 SoC DMA support" + depends on MACH_TX49XX || MACH_TX39XX select DMA_ENGINE - select MMP_SRAM help - Support the MMP Two-Channel DMA engine. - This engine used for MMP Audio DMA and pxa910 SQU. - It needs sram driver under mach-mmp. - - Say Y here if you enabled MMP ADMA, otherwise say N. + Support the TXx9 SoC internal DMA controller. This can be + integrated in chips such as the Toshiba TX4927/38/39. -config DMA_OMAP - tristate "OMAP DMA support" - depends on ARCH_OMAP +config TEGRA20_APB_DMA + bool "NVIDIA Tegra20 APB DMA support" + depends on ARCH_TEGRA select DMA_ENGINE - select DMA_VIRTUAL_CHANNELS + help + Support for the NVIDIA Tegra20 APB DMA controller driver. The + DMA controller is having multiple DMA channel which can be + configured for different peripherals like audio, UART, SPI, + I2C etc which is in APB bus. + This DMA controller transfers data from memory to peripheral fifo + or vice versa. It does not support memory to memory data transfer. -config DMA_BCM2835 - tristate "BCM2835 DMA engine support" - depends on ARCH_BCM2835 +config TIMB_DMA + tristate "Timberdale FPGA DMA support" + depends on MFD_TIMBERDALE select DMA_ENGINE - select DMA_VIRTUAL_CHANNELS + help + Enable support for the Timberdale FPGA DMA engine. config TI_CPPI41 tristate "AM33xx CPPI41 DMA support" @@ -345,56 +478,28 @@ config TI_CPPI41 The Communications Port Programming Interface (CPPI) 4.1 DMA engine is currently used by the USB driver on AM335x platforms. -config MMP_PDMA - bool "MMP PDMA support" - depends on (ARCH_MMP || ARCH_PXA) - select DMA_ENGINE - help - Support the MMP PDMA engine for PXA and MMP platform. - -config DMA_JZ4740 - tristate "JZ4740 DMA support" - depends on MACH_JZ4740 - select DMA_ENGINE - select DMA_VIRTUAL_CHANNELS - -config DMA_JZ4780 - tristate "JZ4780 DMA support" - depends on MACH_JZ4780 - select DMA_ENGINE - select DMA_VIRTUAL_CHANNELS - help - This selects support for the DMA controller in Ingenic JZ4780 SoCs. - If you have a board based on such a SoC and wish to use DMA for - devices which can use the DMA controller, say Y or M here. +config TI_DMA_CROSSBAR + bool -config K3_DMA - tristate "Hisilicon K3 DMA support" - depends on ARCH_HI3xxx +config TI_EDMA + bool "TI EDMA support" + depends on ARCH_DAVINCI || ARCH_OMAP || ARCH_KEYSTONE select DMA_ENGINE select DMA_VIRTUAL_CHANNELS + select TI_DMA_CROSSBAR if ARCH_OMAP + default n help - Support the DMA engine for Hisilicon K3 platform - devices. + Enable support for the TI EDMA controller. This DMA + engine is found on TI DaVinci and AM33xx parts. -config MOXART_DMA - tristate "MOXART DMA support" - depends on ARCH_MOXART - select DMA_ENGINE - select DMA_OF - select DMA_VIRTUAL_CHANNELS - help - Enable support for the MOXA ART SoC DMA controller. - -config FSL_EDMA - tristate "Freescale eDMA engine support" - depends on OF +config XGENE_DMA + tristate "APM X-Gene DMA support" + depends on ARCH_XGENE || COMPILE_TEST select DMA_ENGINE - select DMA_VIRTUAL_CHANNELS + select DMA_ENGINE_RAID + select ASYNC_TX_ENABLE_CHANNEL_SWITCH help - Support the Freescale eDMA engine with programmable channel - multiplexing capability for DMA request sources(slot). - This module can be found on Freescale Vybrid and LS-1 SoCs. + Enable support for the APM X-Gene SoC DMA engine. config XILINX_VDMA tristate "Xilinx AXI VDMA Engine" @@ -410,55 +515,25 @@ config XILINX_VDMA channels, Memory Mapped to Stream (MM2S) and Stream to Memory Mapped (S2MM) for the data transfers. -config DMA_SUN6I - tristate "Allwinner A31 SoCs DMA support" - depends on MACH_SUN6I || MACH_SUN8I || COMPILE_TEST - depends on RESET_CONTROLLER - select DMA_ENGINE - select DMA_VIRTUAL_CHANNELS - help - Support for the DMA engine first found in Allwinner A31 SoCs. - -config NBPFAXI_DMA - tristate "Renesas Type-AXI NBPF DMA support" - select DMA_ENGINE - depends on ARM || COMPILE_TEST - help - Support for "Type-AXI" NBPF DMA IPs from Renesas - -config IMG_MDC_DMA - tristate "IMG MDC support" - depends on MIPS || COMPILE_TEST - depends on MFD_SYSCON +config ZX_DMA + tristate "ZTE ZX296702 DMA support" + depends on ARCH_ZX select DMA_ENGINE select DMA_VIRTUAL_CHANNELS help - Enable support for the IMG multi-threaded DMA controller (MDC). + Support the DMA engine for ZTE ZX296702 platform devices. -config XGENE_DMA - tristate "APM X-Gene DMA support" - depends on ARCH_XGENE || COMPILE_TEST - select DMA_ENGINE - select DMA_ENGINE_RAID - select ASYNC_TX_ENABLE_CHANNEL_SWITCH - help - Enable support for the APM X-Gene SoC DMA engine. -config DMA_ENGINE - bool +# driver files +source "drivers/dma/bestcomm/Kconfig" -config DMA_VIRTUAL_CHANNELS - tristate +source "drivers/dma/dw/Kconfig" -config DMA_ACPI - def_bool y - depends on ACPI +source "drivers/dma/hsu/Kconfig" -config DMA_OF - def_bool y - depends on OF - select DMA_ENGINE +source "drivers/dma/sh/Kconfig" +# clients comment "DMA Clients" depends on DMA_ENGINE @@ -483,13 +558,4 @@ config DMATEST config DMA_ENGINE_RAID bool -config QCOM_BAM_DMA - tristate "QCOM BAM DMA support" - depends on ARCH_QCOM || (COMPILE_TEST && OF && ARM) - select DMA_ENGINE - select DMA_VIRTUAL_CHANNELS - ---help--- - Enable support for the QCOM BAM DMA controller. This controller - provides DMA capabilities for a variety of on-chip devices. - endif diff --git a/kernel/drivers/dma/Makefile b/kernel/drivers/dma/Makefile index 69f77d5ba..ef9c099bd 100644 --- a/kernel/drivers/dma/Makefile +++ b/kernel/drivers/dma/Makefile @@ -1,56 +1,69 @@ +#dmaengine debug flags subdir-ccflags-$(CONFIG_DMADEVICES_DEBUG) := -DDEBUG subdir-ccflags-$(CONFIG_DMADEVICES_VDEBUG) += -DVERBOSE_DEBUG +#core obj-$(CONFIG_DMA_ENGINE) += dmaengine.o obj-$(CONFIG_DMA_VIRTUAL_CHANNELS) += virt-dma.o obj-$(CONFIG_DMA_ACPI) += acpi-dma.o obj-$(CONFIG_DMA_OF) += of-dma.o +#dmatest obj-$(CONFIG_DMATEST) += dmatest.o -obj-$(CONFIG_INTEL_IOATDMA) += ioat/ -obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o + +#devices +obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o +obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/ +obj-$(CONFIG_AT_HDMAC) += at_hdmac.o +obj-$(CONFIG_AT_XDMAC) += at_xdmac.o +obj-$(CONFIG_AXI_DMAC) += dma-axi-dmac.o +obj-$(CONFIG_COH901318) += coh901318.o coh901318_lli.o +obj-$(CONFIG_DMA_BCM2835) += bcm2835-dma.o +obj-$(CONFIG_DMA_JZ4740) += dma-jz4740.o +obj-$(CONFIG_DMA_JZ4780) += dma-jz4780.o +obj-$(CONFIG_DMA_OMAP) += omap-dma.o +obj-$(CONFIG_DMA_SA11X0) += sa11x0-dma.o +obj-$(CONFIG_DMA_SUN4I) += sun4i-dma.o +obj-$(CONFIG_DMA_SUN6I) += sun6i-dma.o +obj-$(CONFIG_DW_DMAC_CORE) += dw/ +obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o obj-$(CONFIG_FSL_DMA) += fsldma.o +obj-$(CONFIG_FSL_EDMA) += fsl-edma.o +obj-$(CONFIG_FSL_RAID) += fsl_raid.o obj-$(CONFIG_HSU_DMA) += hsu/ +obj-$(CONFIG_IMG_MDC_DMA) += img-mdc-dma.o +obj-$(CONFIG_IMX_DMA) += imx-dma.o +obj-$(CONFIG_IMX_SDMA) += imx-sdma.o +obj-$(CONFIG_INTEL_IDMA64) += idma64.o +obj-$(CONFIG_INTEL_IOATDMA) += ioat/ +obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o +obj-$(CONFIG_INTEL_MIC_X100_DMA) += mic_x100_dma.o +obj-$(CONFIG_K3_DMA) += k3dma.o +obj-$(CONFIG_LPC18XX_DMAMUX) += lpc18xx-dmamux.o +obj-$(CONFIG_MMP_PDMA) += mmp_pdma.o +obj-$(CONFIG_MMP_TDMA) += mmp_tdma.o +obj-$(CONFIG_MOXART_DMA) += moxart-dma.o obj-$(CONFIG_MPC512X_DMA) += mpc512x_dma.o -obj-$(CONFIG_PPC_BESTCOMM) += bestcomm/ obj-$(CONFIG_MV_XOR) += mv_xor.o -obj-$(CONFIG_DW_DMAC_CORE) += dw/ -obj-$(CONFIG_AT_HDMAC) += at_hdmac.o -obj-$(CONFIG_AT_XDMAC) += at_xdmac.o +obj-$(CONFIG_MXS_DMA) += mxs-dma.o obj-$(CONFIG_MX3_IPU) += ipu/ -obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o +obj-$(CONFIG_NBPFAXI_DMA) += nbpfaxi.o +obj-$(CONFIG_PCH_DMA) += pch_dma.o +obj-$(CONFIG_PL330_DMA) += pl330.o +obj-$(CONFIG_PPC_BESTCOMM) += bestcomm/ +obj-$(CONFIG_PXA_DMA) += pxa_dma.o +obj-$(CONFIG_QCOM_BAM_DMA) += qcom_bam_dma.o obj-$(CONFIG_RENESAS_DMA) += sh/ -obj-$(CONFIG_COH901318) += coh901318.o coh901318_lli.o -obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/ -obj-$(CONFIG_IMX_SDMA) += imx-sdma.o -obj-$(CONFIG_IMX_DMA) += imx-dma.o -obj-$(CONFIG_MXS_DMA) += mxs-dma.o -obj-$(CONFIG_TIMB_DMA) += timb_dma.o obj-$(CONFIG_SIRF_DMA) += sirf-dma.o -obj-$(CONFIG_TI_EDMA) += edma.o obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o -obj-$(CONFIG_TEGRA20_APB_DMA) += tegra20-apb-dma.o obj-$(CONFIG_S3C24XX_DMAC) += s3c24xx-dma.o -obj-$(CONFIG_PL330_DMA) += pl330.o -obj-$(CONFIG_PCH_DMA) += pch_dma.o -obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o -obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o -obj-$(CONFIG_DMA_SA11X0) += sa11x0-dma.o -obj-$(CONFIG_MMP_TDMA) += mmp_tdma.o -obj-$(CONFIG_DMA_OMAP) += omap-dma.o -obj-$(CONFIG_DMA_BCM2835) += bcm2835-dma.o -obj-$(CONFIG_MMP_PDMA) += mmp_pdma.o -obj-$(CONFIG_DMA_JZ4740) += dma-jz4740.o -obj-$(CONFIG_DMA_JZ4780) += dma-jz4780.o +obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o +obj-$(CONFIG_TEGRA20_APB_DMA) += tegra20-apb-dma.o +obj-$(CONFIG_TIMB_DMA) += timb_dma.o obj-$(CONFIG_TI_CPPI41) += cppi41.o -obj-$(CONFIG_K3_DMA) += k3dma.o -obj-$(CONFIG_MOXART_DMA) += moxart-dma.o -obj-$(CONFIG_FSL_RAID) += fsl_raid.o -obj-$(CONFIG_FSL_EDMA) += fsl-edma.o -obj-$(CONFIG_QCOM_BAM_DMA) += qcom_bam_dma.o -obj-y += xilinx/ -obj-$(CONFIG_INTEL_MIC_X100_DMA) += mic_x100_dma.o -obj-$(CONFIG_NBPFAXI_DMA) += nbpfaxi.o -obj-$(CONFIG_DMA_SUN6I) += sun6i-dma.o -obj-$(CONFIG_IMG_MDC_DMA) += img-mdc-dma.o +obj-$(CONFIG_TI_DMA_CROSSBAR) += ti-dma-crossbar.o +obj-$(CONFIG_TI_EDMA) += edma.o obj-$(CONFIG_XGENE_DMA) += xgene-dma.o +obj-$(CONFIG_ZX_DMA) += zx296702_dma.o + +obj-y += xilinx/ diff --git a/kernel/drivers/dma/acpi-dma.c b/kernel/drivers/dma/acpi-dma.c index 5a635646e..16d0daa05 100644 --- a/kernel/drivers/dma/acpi-dma.c +++ b/kernel/drivers/dma/acpi-dma.c @@ -21,6 +21,7 @@ #include <linux/ioport.h> #include <linux/acpi.h> #include <linux/acpi_dma.h> +#include <linux/property.h> static LIST_HEAD(acpi_dma_list); static DEFINE_MUTEX(acpi_dma_lock); @@ -160,10 +161,8 @@ int acpi_dma_controller_register(struct device *dev, return -EINVAL; /* Check if the device was enumerated by ACPI */ - if (!ACPI_HANDLE(dev)) - return -EINVAL; - - if (acpi_bus_get_device(ACPI_HANDLE(dev), &adev)) + adev = ACPI_COMPANION(dev); + if (!adev) return -EINVAL; adma = kzalloc(sizeof(*adma), GFP_KERNEL); @@ -358,10 +357,11 @@ struct dma_chan *acpi_dma_request_slave_chan_by_index(struct device *dev, int found; /* Check if the device was enumerated by ACPI */ - if (!dev || !ACPI_HANDLE(dev)) + if (!dev) return ERR_PTR(-ENODEV); - if (acpi_bus_get_device(ACPI_HANDLE(dev), &adev)) + adev = ACPI_COMPANION(dev); + if (!adev) return ERR_PTR(-ENODEV); memset(&pdata, 0, sizeof(pdata)); @@ -413,21 +413,29 @@ EXPORT_SYMBOL_GPL(acpi_dma_request_slave_chan_by_index); * translate the names "tx" and "rx" here based on the most common case where * the first FixedDMA descriptor is TX and second is RX. * + * If the device has "dma-names" property the FixedDMA descriptor indices + * are retrieved based on those. Otherwise the function falls back using + * hardcoded indices. + * * Return: * Pointer to appropriate dma channel on success or an error pointer. */ struct dma_chan *acpi_dma_request_slave_chan_by_name(struct device *dev, const char *name) { - size_t index; - - if (!strcmp(name, "tx")) - index = 0; - else if (!strcmp(name, "rx")) - index = 1; - else - return ERR_PTR(-ENODEV); + int index; + + index = device_property_match_string(dev, "dma-names", name); + if (index < 0) { + if (!strcmp(name, "tx")) + index = 0; + else if (!strcmp(name, "rx")) + index = 1; + else + return ERR_PTR(-ENODEV); + } + dev_dbg(dev, "found DMA channel \"%s\" at index %d\n", name, index); return acpi_dma_request_slave_chan_by_index(dev, index); } EXPORT_SYMBOL_GPL(acpi_dma_request_slave_chan_by_name); diff --git a/kernel/drivers/dma/amba-pl08x.c b/kernel/drivers/dma/amba-pl08x.c index 49d396ec0..9b42c0588 100644 --- a/kernel/drivers/dma/amba-pl08x.c +++ b/kernel/drivers/dma/amba-pl08x.c @@ -83,6 +83,8 @@ #include <linux/init.h> #include <linux/interrupt.h> #include <linux/module.h> +#include <linux/of.h> +#include <linux/of_dma.h> #include <linux/pm_runtime.h> #include <linux/seq_file.h> #include <linux/slab.h> @@ -474,7 +476,7 @@ static void pl08x_terminate_phy_chan(struct pl08x_driver_data *pl08x, u32 val = readl(ch->reg_config); val &= ~(PL080_CONFIG_ENABLE | PL080_CONFIG_ERR_IRQ_MASK | - PL080_CONFIG_TC_IRQ_MASK); + PL080_CONFIG_TC_IRQ_MASK); writel(val, ch->reg_config); @@ -2030,10 +2032,188 @@ static inline void init_pl08x_debugfs(struct pl08x_driver_data *pl08x) } #endif +#ifdef CONFIG_OF +static struct dma_chan *pl08x_find_chan_id(struct pl08x_driver_data *pl08x, + u32 id) +{ + struct pl08x_dma_chan *chan; + + list_for_each_entry(chan, &pl08x->slave.channels, vc.chan.device_node) { + if (chan->signal == id) + return &chan->vc.chan; + } + + return NULL; +} + +static struct dma_chan *pl08x_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct pl08x_driver_data *pl08x = ofdma->of_dma_data; + struct pl08x_channel_data *data; + struct pl08x_dma_chan *chan; + struct dma_chan *dma_chan; + + if (!pl08x) + return NULL; + + if (dma_spec->args_count != 2) + return NULL; + + dma_chan = pl08x_find_chan_id(pl08x, dma_spec->args[0]); + if (dma_chan) + return dma_get_slave_channel(dma_chan); + + chan = devm_kzalloc(pl08x->slave.dev, sizeof(*chan) + sizeof(*data), + GFP_KERNEL); + if (!chan) + return NULL; + + data = (void *)&chan[1]; + data->bus_id = "(none)"; + data->periph_buses = dma_spec->args[1]; + + chan->cd = data; + chan->host = pl08x; + chan->slave = true; + chan->name = data->bus_id; + chan->state = PL08X_CHAN_IDLE; + chan->signal = dma_spec->args[0]; + chan->vc.desc_free = pl08x_desc_free; + + vchan_init(&chan->vc, &pl08x->slave); + + return dma_get_slave_channel(&chan->vc.chan); +} + +static int pl08x_of_probe(struct amba_device *adev, + struct pl08x_driver_data *pl08x, + struct device_node *np) +{ + struct pl08x_platform_data *pd; + u32 cctl_memcpy = 0; + u32 val; + int ret; + + pd = devm_kzalloc(&adev->dev, sizeof(*pd), GFP_KERNEL); + if (!pd) + return -ENOMEM; + + /* Eligible bus masters for fetching LLIs */ + if (of_property_read_bool(np, "lli-bus-interface-ahb1")) + pd->lli_buses |= PL08X_AHB1; + if (of_property_read_bool(np, "lli-bus-interface-ahb2")) + pd->lli_buses |= PL08X_AHB2; + if (!pd->lli_buses) { + dev_info(&adev->dev, "no bus masters for LLIs stated, assume all\n"); + pd->lli_buses |= PL08X_AHB1 | PL08X_AHB2; + } + + /* Eligible bus masters for memory access */ + if (of_property_read_bool(np, "mem-bus-interface-ahb1")) + pd->mem_buses |= PL08X_AHB1; + if (of_property_read_bool(np, "mem-bus-interface-ahb2")) + pd->mem_buses |= PL08X_AHB2; + if (!pd->mem_buses) { + dev_info(&adev->dev, "no bus masters for memory stated, assume all\n"); + pd->mem_buses |= PL08X_AHB1 | PL08X_AHB2; + } + + /* Parse the memcpy channel properties */ + ret = of_property_read_u32(np, "memcpy-burst-size", &val); + if (ret) { + dev_info(&adev->dev, "no memcpy burst size specified, using 1 byte\n"); + val = 1; + } + switch (val) { + default: + dev_err(&adev->dev, "illegal burst size for memcpy, set to 1\n"); + /* Fall through */ + case 1: + cctl_memcpy |= PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case 4: + cctl_memcpy |= PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case 8: + cctl_memcpy |= PL080_BSIZE_8 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_8 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case 16: + cctl_memcpy |= PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case 32: + cctl_memcpy |= PL080_BSIZE_32 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_32 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case 64: + cctl_memcpy |= PL080_BSIZE_64 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_64 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case 128: + cctl_memcpy |= PL080_BSIZE_128 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_128 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case 256: + cctl_memcpy |= PL080_BSIZE_256 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_256 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + } + + ret = of_property_read_u32(np, "memcpy-bus-width", &val); + if (ret) { + dev_info(&adev->dev, "no memcpy bus width specified, using 8 bits\n"); + val = 8; + } + switch (val) { + default: + dev_err(&adev->dev, "illegal bus width for memcpy, set to 8 bits\n"); + /* Fall through */ + case 8: + cctl_memcpy |= PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT | + PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT; + break; + case 16: + cctl_memcpy |= PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT | + PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT; + break; + case 32: + cctl_memcpy |= PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT | + PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT; + break; + } + + /* This is currently the only thing making sense */ + cctl_memcpy |= PL080_CONTROL_PROT_SYS; + + /* Set up memcpy channel */ + pd->memcpy_channel.bus_id = "memcpy"; + pd->memcpy_channel.cctl_memcpy = cctl_memcpy; + /* Use the buses that can access memory, obviously */ + pd->memcpy_channel.periph_buses = pd->mem_buses; + + pl08x->pd = pd; + + return of_dma_controller_register(adev->dev.of_node, pl08x_of_xlate, + pl08x); +} +#else +static inline int pl08x_of_probe(struct amba_device *adev, + struct pl08x_driver_data *pl08x, + struct device_node *np) +{ + return -EINVAL; +} +#endif + static int pl08x_probe(struct amba_device *adev, const struct amba_id *id) { struct pl08x_driver_data *pl08x; const struct vendor_data *vd = id->data; + struct device_node *np = adev->dev.of_node; u32 tsfr_size; int ret = 0; int i; @@ -2093,9 +2273,15 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id) /* Get the platform data */ pl08x->pd = dev_get_platdata(&adev->dev); if (!pl08x->pd) { - dev_err(&adev->dev, "no platform data supplied\n"); - ret = -EINVAL; - goto out_no_platdata; + if (np) { + ret = pl08x_of_probe(adev, pl08x, np); + if (ret) + goto out_no_platdata; + } else { + dev_err(&adev->dev, "no platform data supplied\n"); + ret = -EINVAL; + goto out_no_platdata; + } } /* Assign useful pointers to the driver state */ diff --git a/kernel/drivers/dma/at_hdmac.c b/kernel/drivers/dma/at_hdmac.c index 57b2141dd..53d22eb73 100644 --- a/kernel/drivers/dma/at_hdmac.c +++ b/kernel/drivers/dma/at_hdmac.c @@ -48,6 +48,8 @@ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |\ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)) +#define ATC_MAX_DSCR_TRIALS 10 + /* * Initial number of descriptors to allocate for each channel. This could * be increased during dma usage. @@ -247,6 +249,10 @@ static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first) channel_writel(atchan, CTRLA, 0); channel_writel(atchan, CTRLB, 0); channel_writel(atchan, DSCR, first->txd.phys); + channel_writel(atchan, SPIP, ATC_SPIP_HOLE(first->src_hole) | + ATC_SPIP_BOUNDARY(first->boundary)); + channel_writel(atchan, DPIP, ATC_DPIP_HOLE(first->dst_hole) | + ATC_DPIP_BOUNDARY(first->boundary)); dma_writel(atdma, CHER, atchan->mask); vdbg_dump_regs(atchan); @@ -281,28 +287,19 @@ static struct at_desc *atc_get_desc_by_cookie(struct at_dma_chan *atchan, * * @current_len: the number of bytes left before reading CTRLA * @ctrla: the value of CTRLA - * @desc: the descriptor containing the transfer width - */ -static inline int atc_calc_bytes_left(int current_len, u32 ctrla, - struct at_desc *desc) -{ - return current_len - ((ctrla & ATC_BTSIZE_MAX) << desc->tx_width); -} - -/** - * atc_calc_bytes_left_from_reg - calculates the number of bytes left according - * to the current value of CTRLA. - * - * @current_len: the number of bytes left before reading CTRLA - * @atchan: the channel to read CTRLA for - * @desc: the descriptor containing the transfer width */ -static inline int atc_calc_bytes_left_from_reg(int current_len, - struct at_dma_chan *atchan, struct at_desc *desc) +static inline int atc_calc_bytes_left(int current_len, u32 ctrla) { - u32 ctrla = channel_readl(atchan, CTRLA); + u32 btsize = (ctrla & ATC_BTSIZE_MAX); + u32 src_width = ATC_REG_TO_SRC_WIDTH(ctrla); - return atc_calc_bytes_left(current_len, ctrla, desc); + /* + * According to the datasheet, when reading the Control A Register + * (ctrla), the Buffer Transfer Size (btsize) bitfield refers to the + * number of transfers completed on the Source Interface. + * So btsize is always a number of source width transfers. + */ + return current_len - (btsize << src_width); } /** @@ -316,7 +313,7 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie) struct at_desc *desc_first = atc_first_active(atchan); struct at_desc *desc; int ret; - u32 ctrla, dscr; + u32 ctrla, dscr, trials; /* * If the cookie doesn't match to the currently running transfer then @@ -342,15 +339,82 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie) * the channel's DSCR register and compare it against the value * of the hardware linked list structure of each child * descriptor. + * + * The CTRLA register provides us with the amount of data + * already read from the source for the current child + * descriptor. So we can compute a more accurate residue by also + * removing the number of bytes corresponding to this amount of + * data. + * + * However, the DSCR and CTRLA registers cannot be read both + * atomically. Hence a race condition may occur: the first read + * register may refer to one child descriptor whereas the second + * read may refer to a later child descriptor in the list + * because of the DMA transfer progression inbetween the two + * reads. + * + * One solution could have been to pause the DMA transfer, read + * the DSCR and CTRLA then resume the DMA transfer. Nonetheless, + * this approach presents some drawbacks: + * - If the DMA transfer is paused, RX overruns or TX underruns + * are more likey to occur depending on the system latency. + * Taking the USART driver as an example, it uses a cyclic DMA + * transfer to read data from the Receive Holding Register + * (RHR) to avoid RX overruns since the RHR is not protected + * by any FIFO on most Atmel SoCs. So pausing the DMA transfer + * to compute the residue would break the USART driver design. + * - The atc_pause() function masks interrupts but we'd rather + * avoid to do so for system latency purpose. + * + * Then we'd rather use another solution: the DSCR is read a + * first time, the CTRLA is read in turn, next the DSCR is read + * a second time. If the two consecutive read values of the DSCR + * are the same then we assume both refers to the very same + * child descriptor as well as the CTRLA value read inbetween + * does. For cyclic tranfers, the assumption is that a full loop + * is "not so fast". + * If the two DSCR values are different, we read again the CTRLA + * then the DSCR till two consecutive read values from DSCR are + * equal or till the maxium trials is reach. + * This algorithm is very unlikely not to find a stable value for + * DSCR. */ - ctrla = channel_readl(atchan, CTRLA); - rmb(); /* ensure CTRLA is read before DSCR */ dscr = channel_readl(atchan, DSCR); + rmb(); /* ensure DSCR is read before CTRLA */ + ctrla = channel_readl(atchan, CTRLA); + for (trials = 0; trials < ATC_MAX_DSCR_TRIALS; ++trials) { + u32 new_dscr; + + rmb(); /* ensure DSCR is read after CTRLA */ + new_dscr = channel_readl(atchan, DSCR); + + /* + * If the DSCR register value has not changed inside the + * DMA controller since the previous read, we assume + * that both the dscr and ctrla values refers to the + * very same descriptor. + */ + if (likely(new_dscr == dscr)) + break; + + /* + * DSCR has changed inside the DMA controller, so the + * previouly read value of CTRLA may refer to an already + * processed descriptor hence could be outdated. + * We need to update ctrla to match the current + * descriptor. + */ + dscr = new_dscr; + rmb(); /* ensure DSCR is read before CTRLA */ + ctrla = channel_readl(atchan, CTRLA); + } + if (unlikely(trials >= ATC_MAX_DSCR_TRIALS)) + return -ETIMEDOUT; /* for the first descriptor we can be more accurate */ if (desc_first->lli.dscr == dscr) - return atc_calc_bytes_left(ret, ctrla, desc_first); + return atc_calc_bytes_left(ret, ctrla); ret -= desc_first->len; list_for_each_entry(desc, &desc_first->tx_list, desc_node) { @@ -361,16 +425,14 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie) } /* - * For the last descriptor in the chain we can calculate + * For the current descriptor in the chain we can calculate * the remaining bytes using the channel's register. - * Note that the transfer width of the first and last - * descriptor may differ. */ - if (!desc->lli.dscr) - ret = atc_calc_bytes_left_from_reg(ret, atchan, desc); + ret = atc_calc_bytes_left(ret, ctrla); } else { /* single transfer */ - ret = atc_calc_bytes_left_from_reg(ret, atchan, desc_first); + ctrla = channel_readl(atchan, CTRLA); + ret = atc_calc_bytes_left(ret, ctrla); } return ret; @@ -386,6 +448,7 @@ static void atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) { struct dma_async_tx_descriptor *txd = &desc->txd; + struct at_dma *atdma = to_at_dma(atchan->chan_common.device); dev_vdbg(chan2dev(&atchan->chan_common), "descriptor %u complete\n", txd->cookie); @@ -394,6 +457,13 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) if (!atc_chan_is_cyclic(atchan)) dma_cookie_complete(txd); + /* If the transfer was a memset, free our temporary buffer */ + if (desc->memset_buffer) { + dma_pool_free(atdma->memset_pool, desc->memset_vaddr, + desc->memset_paddr); + desc->memset_buffer = false; + } + /* move children to free_list */ list_splice_init(&desc->tx_list, &atchan->free_list); /* move myself to free_list */ @@ -635,6 +705,103 @@ static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx) } /** + * atc_prep_dma_interleaved - prepare memory to memory interleaved operation + * @chan: the channel to prepare operation on + * @xt: Interleaved transfer template + * @flags: tx descriptor status flags + */ +static struct dma_async_tx_descriptor * +atc_prep_dma_interleaved(struct dma_chan *chan, + struct dma_interleaved_template *xt, + unsigned long flags) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct data_chunk *first = xt->sgl; + struct at_desc *desc = NULL; + size_t xfer_count; + unsigned int dwidth; + u32 ctrla; + u32 ctrlb; + size_t len = 0; + int i; + + if (unlikely(!xt || xt->numf != 1 || !xt->frame_size)) + return NULL; + + dev_info(chan2dev(chan), + "%s: src=%pad, dest=%pad, numf=%d, frame_size=%d, flags=0x%lx\n", + __func__, &xt->src_start, &xt->dst_start, xt->numf, + xt->frame_size, flags); + + /* + * The controller can only "skip" X bytes every Y bytes, so we + * need to make sure we are given a template that fit that + * description, ie a template with chunks that always have the + * same size, with the same ICGs. + */ + for (i = 0; i < xt->frame_size; i++) { + struct data_chunk *chunk = xt->sgl + i; + + if ((chunk->size != xt->sgl->size) || + (dmaengine_get_dst_icg(xt, chunk) != dmaengine_get_dst_icg(xt, first)) || + (dmaengine_get_src_icg(xt, chunk) != dmaengine_get_src_icg(xt, first))) { + dev_err(chan2dev(chan), + "%s: the controller can transfer only identical chunks\n", + __func__); + return NULL; + } + + len += chunk->size; + } + + dwidth = atc_get_xfer_width(xt->src_start, + xt->dst_start, len); + + xfer_count = len >> dwidth; + if (xfer_count > ATC_BTSIZE_MAX) { + dev_err(chan2dev(chan), "%s: buffer is too big\n", __func__); + return NULL; + } + + ctrla = ATC_SRC_WIDTH(dwidth) | + ATC_DST_WIDTH(dwidth); + + ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN + | ATC_SRC_ADDR_MODE_INCR + | ATC_DST_ADDR_MODE_INCR + | ATC_SRC_PIP + | ATC_DST_PIP + | ATC_FC_MEM2MEM; + + /* create the transfer */ + desc = atc_desc_get(atchan); + if (!desc) { + dev_err(chan2dev(chan), + "%s: couldn't allocate our descriptor\n", __func__); + return NULL; + } + + desc->lli.saddr = xt->src_start; + desc->lli.daddr = xt->dst_start; + desc->lli.ctrla = ctrla | xfer_count; + desc->lli.ctrlb = ctrlb; + + desc->boundary = first->size >> dwidth; + desc->dst_hole = (dmaengine_get_dst_icg(xt, first) >> dwidth) + 1; + desc->src_hole = (dmaengine_get_src_icg(xt, first) >> dwidth) + 1; + + desc->txd.cookie = -EBUSY; + desc->total_len = desc->len = len; + + /* set end-of-link to the last link descriptor of list*/ + set_desc_eol(desc); + + desc->txd.flags = flags; /* client is in control of this ack */ + + return &desc->txd; +} + +/** * atc_prep_dma_memcpy - prepare a memcpy operation * @chan: the channel to prepare operation on * @dest: operation virtual destination address @@ -657,8 +824,8 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, u32 ctrla; u32 ctrlb; - dev_vdbg(chan2dev(chan), "prep_dma_memcpy: d0x%x s0x%x l0x%zx f0x%lx\n", - dest, src, len, flags); + dev_vdbg(chan2dev(chan), "prep_dma_memcpy: d%pad s%pad l0x%zx f0x%lx\n", + &dest, &src, len, flags); if (unlikely(!len)) { dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n"); @@ -702,10 +869,6 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, first->txd.cookie = -EBUSY; first->total_len = len; - /* set transfer width for the calculation of the residue */ - first->tx_width = src_width; - prev->tx_width = src_width; - /* set end-of-link to the last link descriptor of list*/ set_desc_eol(desc); @@ -718,6 +881,187 @@ err_desc_get: return NULL; } +static struct at_desc *atc_create_memset_desc(struct dma_chan *chan, + dma_addr_t psrc, + dma_addr_t pdst, + size_t len) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_desc *desc; + size_t xfer_count; + + u32 ctrla = ATC_SRC_WIDTH(2) | ATC_DST_WIDTH(2); + u32 ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN | + ATC_SRC_ADDR_MODE_FIXED | + ATC_DST_ADDR_MODE_INCR | + ATC_FC_MEM2MEM; + + xfer_count = len >> 2; + if (xfer_count > ATC_BTSIZE_MAX) { + dev_err(chan2dev(chan), "%s: buffer is too big\n", + __func__); + return NULL; + } + + desc = atc_desc_get(atchan); + if (!desc) { + dev_err(chan2dev(chan), "%s: can't get a descriptor\n", + __func__); + return NULL; + } + + desc->lli.saddr = psrc; + desc->lli.daddr = pdst; + desc->lli.ctrla = ctrla | xfer_count; + desc->lli.ctrlb = ctrlb; + + desc->txd.cookie = 0; + desc->len = len; + + return desc; +} + +/** + * atc_prep_dma_memset - prepare a memcpy operation + * @chan: the channel to prepare operation on + * @dest: operation virtual destination address + * @value: value to set memory buffer to + * @len: operation length + * @flags: tx descriptor status flags + */ +static struct dma_async_tx_descriptor * +atc_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value, + size_t len, unsigned long flags) +{ + struct at_dma *atdma = to_at_dma(chan->device); + struct at_desc *desc; + void __iomem *vaddr; + dma_addr_t paddr; + + dev_vdbg(chan2dev(chan), "%s: d%pad v0x%x l0x%zx f0x%lx\n", __func__, + &dest, value, len, flags); + + if (unlikely(!len)) { + dev_dbg(chan2dev(chan), "%s: length is zero!\n", __func__); + return NULL; + } + + if (!is_dma_fill_aligned(chan->device, dest, 0, len)) { + dev_dbg(chan2dev(chan), "%s: buffer is not aligned\n", + __func__); + return NULL; + } + + vaddr = dma_pool_alloc(atdma->memset_pool, GFP_ATOMIC, &paddr); + if (!vaddr) { + dev_err(chan2dev(chan), "%s: couldn't allocate buffer\n", + __func__); + return NULL; + } + *(u32*)vaddr = value; + + desc = atc_create_memset_desc(chan, paddr, dest, len); + if (!desc) { + dev_err(chan2dev(chan), "%s: couldn't get a descriptor\n", + __func__); + goto err_free_buffer; + } + + desc->memset_paddr = paddr; + desc->memset_vaddr = vaddr; + desc->memset_buffer = true; + + desc->txd.cookie = -EBUSY; + desc->total_len = len; + + /* set end-of-link on the descriptor */ + set_desc_eol(desc); + + desc->txd.flags = flags; + + return &desc->txd; + +err_free_buffer: + dma_pool_free(atdma->memset_pool, vaddr, paddr); + return NULL; +} + +static struct dma_async_tx_descriptor * +atc_prep_dma_memset_sg(struct dma_chan *chan, + struct scatterlist *sgl, + unsigned int sg_len, int value, + unsigned long flags) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_dma *atdma = to_at_dma(chan->device); + struct at_desc *desc = NULL, *first = NULL, *prev = NULL; + struct scatterlist *sg; + void __iomem *vaddr; + dma_addr_t paddr; + size_t total_len = 0; + int i; + + dev_vdbg(chan2dev(chan), "%s: v0x%x l0x%zx f0x%lx\n", __func__, + value, sg_len, flags); + + if (unlikely(!sgl || !sg_len)) { + dev_dbg(chan2dev(chan), "%s: scatterlist is empty!\n", + __func__); + return NULL; + } + + vaddr = dma_pool_alloc(atdma->memset_pool, GFP_ATOMIC, &paddr); + if (!vaddr) { + dev_err(chan2dev(chan), "%s: couldn't allocate buffer\n", + __func__); + return NULL; + } + *(u32*)vaddr = value; + + for_each_sg(sgl, sg, sg_len, i) { + dma_addr_t dest = sg_dma_address(sg); + size_t len = sg_dma_len(sg); + + dev_vdbg(chan2dev(chan), "%s: d%pad, l0x%zx\n", + __func__, &dest, len); + + if (!is_dma_fill_aligned(chan->device, dest, 0, len)) { + dev_err(chan2dev(chan), "%s: buffer is not aligned\n", + __func__); + goto err_put_desc; + } + + desc = atc_create_memset_desc(chan, paddr, dest, len); + if (!desc) + goto err_put_desc; + + atc_desc_chain(&first, &prev, desc); + + total_len += len; + } + + /* + * Only set the buffer pointers on the last descriptor to + * avoid free'ing while we have our transfer still going + */ + desc->memset_paddr = paddr; + desc->memset_vaddr = vaddr; + desc->memset_buffer = true; + + first->txd.cookie = -EBUSY; + first->total_len = total_len; + + /* set end-of-link on the descriptor */ + set_desc_eol(desc); + + first->txd.flags = flags; + + return &first->txd; + +err_put_desc: + atc_desc_put(atchan, first); + return NULL; +} /** * atc_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction @@ -854,10 +1198,6 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, first->txd.cookie = -EBUSY; first->total_len = total_len; - /* set transfer width for the calculation of the residue */ - first->tx_width = reg_width; - prev->tx_width = reg_width; - /* first link descriptor of list is responsible of flags */ first->txd.flags = flags; /* client is in control of this ack */ @@ -975,12 +1315,6 @@ atc_prep_dma_sg(struct dma_chan *chan, desc->txd.cookie = 0; desc->len = len; - /* - * Although we only need the transfer width for the first and - * the last descriptor, its easier to set it to all descriptors. - */ - desc->tx_width = src_width; - atc_desc_chain(&first, &prev, desc); /* update the lengths and addresses for the next loop cycle */ @@ -1105,9 +1439,9 @@ atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, unsigned int periods = buf_len / period_len; unsigned int i; - dev_vdbg(chan2dev(chan), "prep_dma_cyclic: %s buf@0x%08x - %d (%d/%d)\n", + dev_vdbg(chan2dev(chan), "prep_dma_cyclic: %s buf@%pad - %d (%d/%d)\n", direction == DMA_MEM_TO_DEV ? "TO DEVICE" : "FROM DEVICE", - buf_addr, + &buf_addr, periods, buf_len, period_len); if (unlikely(!atslave || !buf_len || !period_len)) { @@ -1154,7 +1488,6 @@ atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, /* First descriptor of the chain embedds additional information */ first->txd.cookie = -EBUSY; first->total_len = buf_len; - first->tx_width = reg_width; return &first->txd; @@ -1609,7 +1942,11 @@ static int __init at_dma_probe(struct platform_device *pdev) /* setup platform data for each SoC */ dma_cap_set(DMA_MEMCPY, at91sam9rl_config.cap_mask); dma_cap_set(DMA_SG, at91sam9rl_config.cap_mask); + dma_cap_set(DMA_INTERLEAVE, at91sam9g45_config.cap_mask); dma_cap_set(DMA_MEMCPY, at91sam9g45_config.cap_mask); + dma_cap_set(DMA_MEMSET, at91sam9g45_config.cap_mask); + dma_cap_set(DMA_MEMSET_SG, at91sam9g45_config.cap_mask); + dma_cap_set(DMA_PRIVATE, at91sam9g45_config.cap_mask); dma_cap_set(DMA_SLAVE, at91sam9g45_config.cap_mask); dma_cap_set(DMA_SG, at91sam9g45_config.cap_mask); @@ -1673,7 +2010,16 @@ static int __init at_dma_probe(struct platform_device *pdev) if (!atdma->dma_desc_pool) { dev_err(&pdev->dev, "No memory for descriptors dma pool\n"); err = -ENOMEM; - goto err_pool_create; + goto err_desc_pool_create; + } + + /* create a pool of consistent memory blocks for memset blocks */ + atdma->memset_pool = dma_pool_create("at_hdmac_memset_pool", + &pdev->dev, sizeof(int), 4, 0); + if (!atdma->memset_pool) { + dev_err(&pdev->dev, "No memory for memset dma pool\n"); + err = -ENOMEM; + goto err_memset_pool_create; } /* clear any pending interrupt */ @@ -1713,9 +2059,18 @@ static int __init at_dma_probe(struct platform_device *pdev) atdma->dma_common.dev = &pdev->dev; /* set prep routines based on capability */ + if (dma_has_cap(DMA_INTERLEAVE, atdma->dma_common.cap_mask)) + atdma->dma_common.device_prep_interleaved_dma = atc_prep_dma_interleaved; + if (dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask)) atdma->dma_common.device_prep_dma_memcpy = atc_prep_dma_memcpy; + if (dma_has_cap(DMA_MEMSET, atdma->dma_common.cap_mask)) { + atdma->dma_common.device_prep_dma_memset = atc_prep_dma_memset; + atdma->dma_common.device_prep_dma_memset_sg = atc_prep_dma_memset_sg; + atdma->dma_common.fill_align = DMAENGINE_ALIGN_4_BYTES; + } + if (dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask)) { atdma->dma_common.device_prep_slave_sg = atc_prep_slave_sg; /* controller can do slave DMA: can trigger cyclic transfers */ @@ -1736,8 +2091,9 @@ static int __init at_dma_probe(struct platform_device *pdev) dma_writel(atdma, EN, AT_DMA_ENABLE); - dev_info(&pdev->dev, "Atmel AHB DMA Controller ( %s%s%s), %d channels\n", + dev_info(&pdev->dev, "Atmel AHB DMA Controller ( %s%s%s%s), %d channels\n", dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask) ? "cpy " : "", + dma_has_cap(DMA_MEMSET, atdma->dma_common.cap_mask) ? "set " : "", dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask) ? "slave " : "", dma_has_cap(DMA_SG, atdma->dma_common.cap_mask) ? "sg-cpy " : "", plat_dat->nr_channels); @@ -1762,8 +2118,10 @@ static int __init at_dma_probe(struct platform_device *pdev) err_of_dma_controller_register: dma_async_device_unregister(&atdma->dma_common); + dma_pool_destroy(atdma->memset_pool); +err_memset_pool_create: dma_pool_destroy(atdma->dma_desc_pool); -err_pool_create: +err_desc_pool_create: free_irq(platform_get_irq(pdev, 0), atdma); err_irq: clk_disable_unprepare(atdma->clk); @@ -1788,6 +2146,7 @@ static int at_dma_remove(struct platform_device *pdev) at_dma_off(atdma); dma_async_device_unregister(&atdma->dma_common); + dma_pool_destroy(atdma->memset_pool); dma_pool_destroy(atdma->dma_desc_pool); free_irq(platform_get_irq(pdev, 0), atdma); diff --git a/kernel/drivers/dma/at_hdmac_regs.h b/kernel/drivers/dma/at_hdmac_regs.h index 2727ca560..7f58f0615 100644 --- a/kernel/drivers/dma/at_hdmac_regs.h +++ b/kernel/drivers/dma/at_hdmac_regs.h @@ -112,6 +112,7 @@ #define ATC_SRC_WIDTH_BYTE (0x0 << 24) #define ATC_SRC_WIDTH_HALFWORD (0x1 << 24) #define ATC_SRC_WIDTH_WORD (0x2 << 24) +#define ATC_REG_TO_SRC_WIDTH(r) (((r) >> 24) & 0x3) #define ATC_DST_WIDTH_MASK (0x3 << 28) /* Destination Single Transfer Size */ #define ATC_DST_WIDTH(x) ((x) << 28) #define ATC_DST_WIDTH_BYTE (0x0 << 28) @@ -182,7 +183,6 @@ struct at_lli { * @txd: support for the async_tx api * @desc_node: node on the channed descriptors list * @len: descriptor byte count - * @tx_width: transfer width * @total_len: total transaction byte count */ struct at_desc { @@ -194,8 +194,17 @@ struct at_desc { struct dma_async_tx_descriptor txd; struct list_head desc_node; size_t len; - u32 tx_width; size_t total_len; + + /* Interleaved data */ + size_t boundary; + size_t dst_hole; + size_t src_hole; + + /* Memset temporary buffer */ + bool memset_buffer; + dma_addr_t memset_paddr; + int *memset_vaddr; }; static inline struct at_desc * @@ -326,6 +335,7 @@ struct at_dma { u8 all_chan_mask; struct dma_pool *dma_desc_pool; + struct dma_pool *memset_pool; /* AT THE END channels table */ struct at_dma_chan chan[0]; }; @@ -375,9 +385,9 @@ static void vdbg_dump_regs(struct at_dma_chan *atchan) {} static void atc_dump_lli(struct at_dma_chan *atchan, struct at_lli *lli) { dev_crit(chan2dev(&atchan->chan_common), - " desc: s0x%x d0x%x ctrl0x%x:0x%x l0x%x\n", - lli->saddr, lli->daddr, - lli->ctrla, lli->ctrlb, lli->dscr); + " desc: s%pad d%pad ctrl0x%x:0x%x l0x%pad\n", + &lli->saddr, &lli->daddr, + lli->ctrla, lli->ctrlb, &lli->dscr); } diff --git a/kernel/drivers/dma/at_xdmac.c b/kernel/drivers/dma/at_xdmac.c index c89a7abb5..02f9aa4eb 100644 --- a/kernel/drivers/dma/at_xdmac.c +++ b/kernel/drivers/dma/at_xdmac.c @@ -156,7 +156,7 @@ #define AT_XDMAC_CC_WRIP (0x1 << 23) /* Write in Progress (read only) */ #define AT_XDMAC_CC_WRIP_DONE (0x0 << 23) #define AT_XDMAC_CC_WRIP_IN_PROGRESS (0x1 << 23) -#define AT_XDMAC_CC_PERID(i) (0x7f & (h) << 24) /* Channel Peripheral Identifier */ +#define AT_XDMAC_CC_PERID(i) (0x7f & (i) << 24) /* Channel Peripheral Identifier */ #define AT_XDMAC_CDS_MSP 0x2C /* Channel Data Stride Memory Set Pattern */ #define AT_XDMAC_CSUS 0x30 /* Channel Source Microblock Stride */ #define AT_XDMAC_CDUS 0x34 /* Channel Destination Microblock Stride */ @@ -176,6 +176,7 @@ #define AT_XDMAC_MAX_CHAN 0x20 #define AT_XDMAC_MAX_CSIZE 16 /* 16 data */ #define AT_XDMAC_MAX_DWIDTH 8 /* 64 bits */ +#define AT_XDMAC_RESIDUE_MAX_RETRIES 5 #define AT_XDMAC_DMA_BUSWIDTHS\ (BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) |\ @@ -235,6 +236,10 @@ struct at_xdmac_lld { dma_addr_t mbr_sa; /* Source Address Member */ dma_addr_t mbr_da; /* Destination Address Member */ u32 mbr_cfg; /* Configuration Register */ + u32 mbr_bc; /* Block Control Register */ + u32 mbr_ds; /* Data Stride Register */ + u32 mbr_sus; /* Source Microblock Stride Register */ + u32 mbr_dus; /* Destination Microblock Stride Register */ }; @@ -355,16 +360,19 @@ static void at_xdmac_start_xfer(struct at_xdmac_chan *atchan, * descriptor view 2 since some fields of the configuration register * depend on transfer size and src/dest addresses. */ - if (at_xdmac_chan_is_cyclic(atchan)) { + if (at_xdmac_chan_is_cyclic(atchan)) reg = AT_XDMAC_CNDC_NDVIEW_NDV1; - at_xdmac_chan_write(atchan, AT_XDMAC_CC, first->lld.mbr_cfg); - } else { - /* - * No need to write AT_XDMAC_CC reg, it will be done when the - * descriptor is fecthed. - */ + else if (first->lld.mbr_ubc & AT_XDMAC_MBR_UBC_NDV3) + reg = AT_XDMAC_CNDC_NDVIEW_NDV3; + else reg = AT_XDMAC_CNDC_NDVIEW_NDV2; - } + /* + * Even if the register will be updated from the configuration in the + * descriptor when using view 2 or higher, the PROT bit won't be set + * properly. This bit can be modified only by using the channel + * configuration register. + */ + at_xdmac_chan_write(atchan, AT_XDMAC_CC, first->lld.mbr_cfg); reg |= AT_XDMAC_CNDC_NDDUP | AT_XDMAC_CNDC_NDSUP @@ -448,6 +456,15 @@ static struct at_xdmac_desc *at_xdmac_alloc_desc(struct dma_chan *chan, return desc; } +void at_xdmac_init_used_desc(struct at_xdmac_desc *desc) +{ + memset(&desc->lld, 0, sizeof(desc->lld)); + INIT_LIST_HEAD(&desc->descs_list); + desc->direction = DMA_TRANS_NONE; + desc->xfer_size = 0; + desc->active_xfer = false; +} + /* Call must be protected by lock. */ static struct at_xdmac_desc *at_xdmac_get_desc(struct at_xdmac_chan *atchan) { @@ -459,12 +476,39 @@ static struct at_xdmac_desc *at_xdmac_get_desc(struct at_xdmac_chan *atchan) desc = list_first_entry(&atchan->free_descs_list, struct at_xdmac_desc, desc_node); list_del(&desc->desc_node); - desc->active_xfer = false; + at_xdmac_init_used_desc(desc); } return desc; } +static void at_xdmac_queue_desc(struct dma_chan *chan, + struct at_xdmac_desc *prev, + struct at_xdmac_desc *desc) +{ + if (!prev || !desc) + return; + + prev->lld.mbr_nda = desc->tx_dma_desc.phys; + prev->lld.mbr_ubc |= AT_XDMAC_MBR_UBC_NDE; + + dev_dbg(chan2dev(chan), "%s: chain lld: prev=0x%p, mbr_nda=%pad\n", + __func__, prev, &prev->lld.mbr_nda); +} + +static inline void at_xdmac_increment_block_count(struct dma_chan *chan, + struct at_xdmac_desc *desc) +{ + if (!desc) + return; + + desc->lld.mbr_bc++; + + dev_dbg(chan2dev(chan), + "%s: incrementing the block count of the desc 0x%p\n", + __func__, desc); +} + static struct dma_chan *at_xdmac_xlate(struct of_phandle_args *dma_spec, struct of_dma *of_dma) { @@ -591,12 +635,12 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, enum dma_transfer_direction direction, unsigned long flags, void *context) { - struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); - struct at_xdmac_desc *first = NULL, *prev = NULL; - struct scatterlist *sg; - int i; - unsigned int xfer_size = 0; - unsigned long irqflags; + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + struct at_xdmac_desc *first = NULL, *prev = NULL; + struct scatterlist *sg; + int i; + unsigned int xfer_size = 0; + unsigned long irqflags; struct dma_async_tx_descriptor *ret = NULL; if (!sgl) @@ -655,7 +699,6 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV2 /* next descriptor view */ | AT_XDMAC_MBR_UBC_NDEN /* next descriptor dst parameter update */ | AT_XDMAC_MBR_UBC_NSEN /* next descriptor src parameter update */ - | (i == sg_len - 1 ? 0 : AT_XDMAC_MBR_UBC_NDE) /* descriptor fetch */ | (len >> fixed_dwidth); /* microblock length */ desc->lld.mbr_cfg = (atchan->cfg & ~AT_XDMAC_CC_DWIDTH_MASK) | AT_XDMAC_CC_DWIDTH(fixed_dwidth); @@ -664,12 +707,8 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc); /* Chain lld. */ - if (prev) { - prev->lld.mbr_nda = desc->tx_dma_desc.phys; - dev_dbg(chan2dev(chan), - "%s: chain lld: prev=0x%p, mbr_nda=%pad\n", - __func__, prev, &prev->lld.mbr_nda); - } + if (prev) + at_xdmac_queue_desc(chan, prev, desc); prev = desc; if (!first) @@ -749,7 +788,6 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV1 | AT_XDMAC_MBR_UBC_NDEN | AT_XDMAC_MBR_UBC_NSEN - | AT_XDMAC_MBR_UBC_NDE | period_len >> at_xdmac_get_dwidth(desc->lld.mbr_cfg); dev_dbg(chan2dev(chan), @@ -757,12 +795,8 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc); /* Chain lld. */ - if (prev) { - prev->lld.mbr_nda = desc->tx_dma_desc.phys; - dev_dbg(chan2dev(chan), - "%s: chain lld: prev=0x%p, mbr_nda=%pad\n", - __func__, prev, &prev->lld.mbr_nda); - } + if (prev) + at_xdmac_queue_desc(chan, prev, desc); prev = desc; if (!first) @@ -773,10 +807,7 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, list_add_tail(&desc->desc_node, &first->descs_list); } - prev->lld.mbr_nda = first->tx_dma_desc.phys; - dev_dbg(chan2dev(chan), - "%s: chain lld: prev=0x%p, mbr_nda=%pad\n", - __func__, prev, &prev->lld.mbr_nda); + at_xdmac_queue_desc(chan, prev, first); first->tx_dma_desc.flags = flags; first->xfer_size = buf_len; first->direction = direction; @@ -784,6 +815,217 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, return &first->tx_dma_desc; } +static inline u32 at_xdmac_align_width(struct dma_chan *chan, dma_addr_t addr) +{ + u32 width; + + /* + * Check address alignment to select the greater data width we + * can use. + * + * Some XDMAC implementations don't provide dword transfer, in + * this case selecting dword has the same behavior as + * selecting word transfers. + */ + if (!(addr & 7)) { + width = AT_XDMAC_CC_DWIDTH_DWORD; + dev_dbg(chan2dev(chan), "%s: dwidth: double word\n", __func__); + } else if (!(addr & 3)) { + width = AT_XDMAC_CC_DWIDTH_WORD; + dev_dbg(chan2dev(chan), "%s: dwidth: word\n", __func__); + } else if (!(addr & 1)) { + width = AT_XDMAC_CC_DWIDTH_HALFWORD; + dev_dbg(chan2dev(chan), "%s: dwidth: half word\n", __func__); + } else { + width = AT_XDMAC_CC_DWIDTH_BYTE; + dev_dbg(chan2dev(chan), "%s: dwidth: byte\n", __func__); + } + + return width; +} + +static struct at_xdmac_desc * +at_xdmac_interleaved_queue_desc(struct dma_chan *chan, + struct at_xdmac_chan *atchan, + struct at_xdmac_desc *prev, + dma_addr_t src, dma_addr_t dst, + struct dma_interleaved_template *xt, + struct data_chunk *chunk) +{ + struct at_xdmac_desc *desc; + u32 dwidth; + unsigned long flags; + size_t ublen; + /* + * WARNING: The channel configuration is set here since there is no + * dmaengine_slave_config call in this case. Moreover we don't know the + * direction, it involves we can't dynamically set the source and dest + * interface so we have to use the same one. Only interface 0 allows EBI + * access. Hopefully we can access DDR through both ports (at least on + * SAMA5D4x), so we can use the same interface for source and dest, + * that solves the fact we don't know the direction. + */ + u32 chan_cc = AT_XDMAC_CC_DIF(0) + | AT_XDMAC_CC_SIF(0) + | AT_XDMAC_CC_MBSIZE_SIXTEEN + | AT_XDMAC_CC_TYPE_MEM_TRAN; + + dwidth = at_xdmac_align_width(chan, src | dst | chunk->size); + if (chunk->size >= (AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)) { + dev_dbg(chan2dev(chan), + "%s: chunk too big (%d, max size %lu)...\n", + __func__, chunk->size, + AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth); + return NULL; + } + + if (prev) + dev_dbg(chan2dev(chan), + "Adding items at the end of desc 0x%p\n", prev); + + if (xt->src_inc) { + if (xt->src_sgl) + chan_cc |= AT_XDMAC_CC_SAM_UBS_AM; + else + chan_cc |= AT_XDMAC_CC_SAM_INCREMENTED_AM; + } + + if (xt->dst_inc) { + if (xt->dst_sgl) + chan_cc |= AT_XDMAC_CC_DAM_UBS_AM; + else + chan_cc |= AT_XDMAC_CC_DAM_INCREMENTED_AM; + } + + spin_lock_irqsave(&atchan->lock, flags); + desc = at_xdmac_get_desc(atchan); + spin_unlock_irqrestore(&atchan->lock, flags); + if (!desc) { + dev_err(chan2dev(chan), "can't get descriptor\n"); + return NULL; + } + + chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth); + + ublen = chunk->size >> dwidth; + + desc->lld.mbr_sa = src; + desc->lld.mbr_da = dst; + desc->lld.mbr_sus = dmaengine_get_src_icg(xt, chunk); + desc->lld.mbr_dus = dmaengine_get_dst_icg(xt, chunk); + + desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV3 + | AT_XDMAC_MBR_UBC_NDEN + | AT_XDMAC_MBR_UBC_NSEN + | ublen; + desc->lld.mbr_cfg = chan_cc; + + dev_dbg(chan2dev(chan), + "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n", + __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, + desc->lld.mbr_ubc, desc->lld.mbr_cfg); + + /* Chain lld. */ + if (prev) + at_xdmac_queue_desc(chan, prev, desc); + + return desc; +} + +static struct dma_async_tx_descriptor * +at_xdmac_prep_interleaved(struct dma_chan *chan, + struct dma_interleaved_template *xt, + unsigned long flags) +{ + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + struct at_xdmac_desc *prev = NULL, *first = NULL; + dma_addr_t dst_addr, src_addr; + size_t src_skip = 0, dst_skip = 0, len = 0; + struct data_chunk *chunk; + int i; + + if (!xt || !xt->numf || (xt->dir != DMA_MEM_TO_MEM)) + return NULL; + + /* + * TODO: Handle the case where we have to repeat a chain of + * descriptors... + */ + if ((xt->numf > 1) && (xt->frame_size > 1)) + return NULL; + + dev_dbg(chan2dev(chan), "%s: src=%pad, dest=%pad, numf=%d, frame_size=%d, flags=0x%lx\n", + __func__, &xt->src_start, &xt->dst_start, xt->numf, + xt->frame_size, flags); + + src_addr = xt->src_start; + dst_addr = xt->dst_start; + + if (xt->numf > 1) { + first = at_xdmac_interleaved_queue_desc(chan, atchan, + NULL, + src_addr, dst_addr, + xt, xt->sgl); + + /* Length of the block is (BLEN+1) microblocks. */ + for (i = 0; i < xt->numf - 1; i++) + at_xdmac_increment_block_count(chan, first); + + dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n", + __func__, first, first); + list_add_tail(&first->desc_node, &first->descs_list); + } else { + for (i = 0; i < xt->frame_size; i++) { + size_t src_icg = 0, dst_icg = 0; + struct at_xdmac_desc *desc; + + chunk = xt->sgl + i; + + dst_icg = dmaengine_get_dst_icg(xt, chunk); + src_icg = dmaengine_get_src_icg(xt, chunk); + + src_skip = chunk->size + src_icg; + dst_skip = chunk->size + dst_icg; + + dev_dbg(chan2dev(chan), + "%s: chunk size=%d, src icg=%d, dst icg=%d\n", + __func__, chunk->size, src_icg, dst_icg); + + desc = at_xdmac_interleaved_queue_desc(chan, atchan, + prev, + src_addr, dst_addr, + xt, chunk); + if (!desc) { + list_splice_init(&first->descs_list, + &atchan->free_descs_list); + return NULL; + } + + if (!first) + first = desc; + + dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n", + __func__, desc, first); + list_add_tail(&desc->desc_node, &first->descs_list); + + if (xt->src_sgl) + src_addr += src_skip; + + if (xt->dst_sgl) + dst_addr += dst_skip; + + len += chunk->size; + prev = desc; + } + } + + first->tx_dma_desc.cookie = -EBUSY; + first->tx_dma_desc.flags = flags; + first->xfer_size = len; + + return &first->tx_dma_desc; +} + static struct dma_async_tx_descriptor * at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, size_t len, unsigned long flags) @@ -815,24 +1057,7 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, if (unlikely(!len)) return NULL; - /* - * Check address alignment to select the greater data width we can use. - * Some XDMAC implementations don't provide dword transfer, in this - * case selecting dword has the same behavior as selecting word transfers. - */ - if (!((src_addr | dst_addr) & 7)) { - dwidth = AT_XDMAC_CC_DWIDTH_DWORD; - dev_dbg(chan2dev(chan), "%s: dwidth: double word\n", __func__); - } else if (!((src_addr | dst_addr) & 3)) { - dwidth = AT_XDMAC_CC_DWIDTH_WORD; - dev_dbg(chan2dev(chan), "%s: dwidth: word\n", __func__); - } else if (!((src_addr | dst_addr) & 1)) { - dwidth = AT_XDMAC_CC_DWIDTH_HALFWORD; - dev_dbg(chan2dev(chan), "%s: dwidth: half word\n", __func__); - } else { - dwidth = AT_XDMAC_CC_DWIDTH_BYTE; - dev_dbg(chan2dev(chan), "%s: dwidth: byte\n", __func__); - } + dwidth = at_xdmac_align_width(chan, src_addr | dst_addr); /* Prepare descriptors. */ while (remaining_size) { @@ -862,19 +1087,9 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, dev_dbg(chan2dev(chan), "%s: xfer_size=%zu\n", __func__, xfer_size); /* Check remaining length and change data width if needed. */ - if (!((src_addr | dst_addr | xfer_size) & 7)) { - dwidth = AT_XDMAC_CC_DWIDTH_DWORD; - dev_dbg(chan2dev(chan), "%s: dwidth: double word\n", __func__); - } else if (!((src_addr | dst_addr | xfer_size) & 3)) { - dwidth = AT_XDMAC_CC_DWIDTH_WORD; - dev_dbg(chan2dev(chan), "%s: dwidth: word\n", __func__); - } else if (!((src_addr | dst_addr | xfer_size) & 1)) { - dwidth = AT_XDMAC_CC_DWIDTH_HALFWORD; - dev_dbg(chan2dev(chan), "%s: dwidth: half word\n", __func__); - } else if ((src_addr | dst_addr | xfer_size) & 1) { - dwidth = AT_XDMAC_CC_DWIDTH_BYTE; - dev_dbg(chan2dev(chan), "%s: dwidth: byte\n", __func__); - } + dwidth = at_xdmac_align_width(chan, + src_addr | dst_addr | xfer_size); + chan_cc &= ~AT_XDMAC_CC_DWIDTH_MASK; chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth); ublen = xfer_size >> dwidth; @@ -885,7 +1100,6 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV2 | AT_XDMAC_MBR_UBC_NDEN | AT_XDMAC_MBR_UBC_NSEN - | (remaining_size ? AT_XDMAC_MBR_UBC_NDE : 0) | ublen; desc->lld.mbr_cfg = chan_cc; @@ -894,12 +1108,8 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc, desc->lld.mbr_cfg); /* Chain lld. */ - if (prev) { - prev->lld.mbr_nda = desc->tx_dma_desc.phys; - dev_dbg(chan2dev(chan), - "%s: chain lld: prev=0x%p, mbr_nda=0x%08x\n", - __func__, prev, prev->lld.mbr_nda); - } + if (prev) + at_xdmac_queue_desc(chan, prev, desc); prev = desc; if (!first) @@ -916,6 +1126,255 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, return &first->tx_dma_desc; } +static struct at_xdmac_desc *at_xdmac_memset_create_desc(struct dma_chan *chan, + struct at_xdmac_chan *atchan, + dma_addr_t dst_addr, + size_t len, + int value) +{ + struct at_xdmac_desc *desc; + unsigned long flags; + size_t ublen; + u32 dwidth; + /* + * WARNING: The channel configuration is set here since there is no + * dmaengine_slave_config call in this case. Moreover we don't know the + * direction, it involves we can't dynamically set the source and dest + * interface so we have to use the same one. Only interface 0 allows EBI + * access. Hopefully we can access DDR through both ports (at least on + * SAMA5D4x), so we can use the same interface for source and dest, + * that solves the fact we don't know the direction. + */ + u32 chan_cc = AT_XDMAC_CC_DAM_UBS_AM + | AT_XDMAC_CC_SAM_INCREMENTED_AM + | AT_XDMAC_CC_DIF(0) + | AT_XDMAC_CC_SIF(0) + | AT_XDMAC_CC_MBSIZE_SIXTEEN + | AT_XDMAC_CC_MEMSET_HW_MODE + | AT_XDMAC_CC_TYPE_MEM_TRAN; + + dwidth = at_xdmac_align_width(chan, dst_addr); + + if (len >= (AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)) { + dev_err(chan2dev(chan), + "%s: Transfer too large, aborting...\n", + __func__); + return NULL; + } + + spin_lock_irqsave(&atchan->lock, flags); + desc = at_xdmac_get_desc(atchan); + spin_unlock_irqrestore(&atchan->lock, flags); + if (!desc) { + dev_err(chan2dev(chan), "can't get descriptor\n"); + return NULL; + } + + chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth); + + ublen = len >> dwidth; + + desc->lld.mbr_da = dst_addr; + desc->lld.mbr_ds = value; + desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV3 + | AT_XDMAC_MBR_UBC_NDEN + | AT_XDMAC_MBR_UBC_NSEN + | ublen; + desc->lld.mbr_cfg = chan_cc; + + dev_dbg(chan2dev(chan), + "%s: lld: mbr_da=%pad, mbr_ds=%pad, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n", + __func__, &desc->lld.mbr_da, &desc->lld.mbr_ds, desc->lld.mbr_ubc, + desc->lld.mbr_cfg); + + return desc; +} + +struct dma_async_tx_descriptor * +at_xdmac_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value, + size_t len, unsigned long flags) +{ + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + struct at_xdmac_desc *desc; + + dev_dbg(chan2dev(chan), "%s: dest=%pad, len=%d, pattern=0x%x, flags=0x%lx\n", + __func__, &dest, len, value, flags); + + if (unlikely(!len)) + return NULL; + + desc = at_xdmac_memset_create_desc(chan, atchan, dest, len, value); + list_add_tail(&desc->desc_node, &desc->descs_list); + + desc->tx_dma_desc.cookie = -EBUSY; + desc->tx_dma_desc.flags = flags; + desc->xfer_size = len; + + return &desc->tx_dma_desc; +} + +static struct dma_async_tx_descriptor * +at_xdmac_prep_dma_memset_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, int value, + unsigned long flags) +{ + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + struct at_xdmac_desc *desc, *pdesc = NULL, + *ppdesc = NULL, *first = NULL; + struct scatterlist *sg, *psg = NULL, *ppsg = NULL; + size_t stride = 0, pstride = 0, len = 0; + int i; + + if (!sgl) + return NULL; + + dev_dbg(chan2dev(chan), "%s: sg_len=%d, value=0x%x, flags=0x%lx\n", + __func__, sg_len, value, flags); + + /* Prepare descriptors. */ + for_each_sg(sgl, sg, sg_len, i) { + dev_dbg(chan2dev(chan), "%s: dest=%pad, len=%d, pattern=0x%x, flags=0x%lx\n", + __func__, &sg_dma_address(sg), sg_dma_len(sg), + value, flags); + desc = at_xdmac_memset_create_desc(chan, atchan, + sg_dma_address(sg), + sg_dma_len(sg), + value); + if (!desc && first) + list_splice_init(&first->descs_list, + &atchan->free_descs_list); + + if (!first) + first = desc; + + /* Update our strides */ + pstride = stride; + if (psg) + stride = sg_dma_address(sg) - + (sg_dma_address(psg) + sg_dma_len(psg)); + + /* + * The scatterlist API gives us only the address and + * length of each elements. + * + * Unfortunately, we don't have the stride, which we + * will need to compute. + * + * That make us end up in a situation like this one: + * len stride len stride len + * +-------+ +-------+ +-------+ + * | N-2 | | N-1 | | N | + * +-------+ +-------+ +-------+ + * + * We need all these three elements (N-2, N-1 and N) + * to actually take the decision on whether we need to + * queue N-1 or reuse N-2. + * + * We will only consider N if it is the last element. + */ + if (ppdesc && pdesc) { + if ((stride == pstride) && + (sg_dma_len(ppsg) == sg_dma_len(psg))) { + dev_dbg(chan2dev(chan), + "%s: desc 0x%p can be merged with desc 0x%p\n", + __func__, pdesc, ppdesc); + + /* + * Increment the block count of the + * N-2 descriptor + */ + at_xdmac_increment_block_count(chan, ppdesc); + ppdesc->lld.mbr_dus = stride; + + /* + * Put back the N-1 descriptor in the + * free descriptor list + */ + list_add_tail(&pdesc->desc_node, + &atchan->free_descs_list); + + /* + * Make our N-1 descriptor pointer + * point to the N-2 since they were + * actually merged. + */ + pdesc = ppdesc; + + /* + * Rule out the case where we don't have + * pstride computed yet (our second sg + * element) + * + * We also want to catch the case where there + * would be a negative stride, + */ + } else if (pstride || + sg_dma_address(sg) < sg_dma_address(psg)) { + /* + * Queue the N-1 descriptor after the + * N-2 + */ + at_xdmac_queue_desc(chan, ppdesc, pdesc); + + /* + * Add the N-1 descriptor to the list + * of the descriptors used for this + * transfer + */ + list_add_tail(&desc->desc_node, + &first->descs_list); + dev_dbg(chan2dev(chan), + "%s: add desc 0x%p to descs_list 0x%p\n", + __func__, desc, first); + } + } + + /* + * If we are the last element, just see if we have the + * same size than the previous element. + * + * If so, we can merge it with the previous descriptor + * since we don't care about the stride anymore. + */ + if ((i == (sg_len - 1)) && + sg_dma_len(psg) == sg_dma_len(sg)) { + dev_dbg(chan2dev(chan), + "%s: desc 0x%p can be merged with desc 0x%p\n", + __func__, desc, pdesc); + + /* + * Increment the block count of the N-1 + * descriptor + */ + at_xdmac_increment_block_count(chan, pdesc); + pdesc->lld.mbr_dus = stride; + + /* + * Put back the N descriptor in the free + * descriptor list + */ + list_add_tail(&desc->desc_node, + &atchan->free_descs_list); + } + + /* Update our descriptors */ + ppdesc = pdesc; + pdesc = desc; + + /* Update our scatter pointers */ + ppsg = psg; + psg = sg; + + len += sg_dma_len(sg); + } + + first->tx_dma_desc.cookie = -EBUSY; + first->tx_dma_desc.flags = flags; + first->xfer_size = len; + + return &first->tx_dma_desc; +} + static enum dma_status at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, struct dma_tx_state *txstate) @@ -925,8 +1384,8 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, struct at_xdmac_desc *desc, *_desc; struct list_head *descs_list; enum dma_status ret; - int residue; - u32 cur_nda, mask, value; + int residue, retry; + u32 cur_nda, check_nda, cur_ubc, mask, value; u8 dwidth = 0; unsigned long flags; @@ -963,7 +1422,42 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, cpu_relax(); } + /* + * When processing the residue, we need to read two registers but we + * can't do it in an atomic way. AT_XDMAC_CNDA is used to find where + * we stand in the descriptor list and AT_XDMAC_CUBC is used + * to know how many data are remaining for the current descriptor. + * Since the dma channel is not paused to not loose data, between the + * AT_XDMAC_CNDA and AT_XDMAC_CUBC read, we may have change of + * descriptor. + * For that reason, after reading AT_XDMAC_CUBC, we check if we are + * still using the same descriptor by reading a second time + * AT_XDMAC_CNDA. If AT_XDMAC_CNDA has changed, it means we have to + * read again AT_XDMAC_CUBC. + * Memory barriers are used to ensure the read order of the registers. + * A max number of retries is set because unlikely it can never ends if + * we are transferring a lot of data with small buffers. + */ cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc; + rmb(); + cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC); + for (retry = 0; retry < AT_XDMAC_RESIDUE_MAX_RETRIES; retry++) { + rmb(); + check_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc; + + if (likely(cur_nda == check_nda)) + break; + + cur_nda = check_nda; + rmb(); + cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC); + } + + if (unlikely(retry >= AT_XDMAC_RESIDUE_MAX_RETRIES)) { + ret = DMA_ERROR; + goto spin_unlock; + } + /* * Remove size of all microblocks already transferred and the current * one. Then add the remaining size to transfer of the current @@ -976,7 +1470,7 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, if ((desc->lld.mbr_nda & 0xfffffffc) == cur_nda) break; } - residue += at_xdmac_chan_read(atchan, AT_XDMAC_CUBC) << dwidth; + residue += cur_ubc << dwidth; dma_set_residue(txstate, residue); @@ -1230,6 +1724,7 @@ static int at_xdmac_device_terminate_all(struct dma_chan *chan) list_for_each_entry_safe(desc, _desc, &atchan->xfers_list, xfer_node) at_xdmac_remove_xfer(atchan, desc); + clear_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status); clear_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status); spin_unlock_irqrestore(&atchan->lock, flags); @@ -1362,6 +1857,8 @@ static int atmel_xdmac_resume(struct device *dev) atchan = to_at_xdmac_chan(chan); at_xdmac_chan_write(atchan, AT_XDMAC_CC, atchan->save_cc); if (at_xdmac_chan_is_cyclic(atchan)) { + if (at_xdmac_chan_is_paused(atchan)) + at_xdmac_device_resume(chan); at_xdmac_chan_write(atchan, AT_XDMAC_CNDA, atchan->save_cnda); at_xdmac_chan_write(atchan, AT_XDMAC_CNDC, atchan->save_cndc); at_xdmac_chan_write(atchan, AT_XDMAC_CIE, atchan->save_cim); @@ -1446,7 +1943,10 @@ static int at_xdmac_probe(struct platform_device *pdev) } dma_cap_set(DMA_CYCLIC, atxdmac->dma.cap_mask); + dma_cap_set(DMA_INTERLEAVE, atxdmac->dma.cap_mask); dma_cap_set(DMA_MEMCPY, atxdmac->dma.cap_mask); + dma_cap_set(DMA_MEMSET, atxdmac->dma.cap_mask); + dma_cap_set(DMA_MEMSET_SG, atxdmac->dma.cap_mask); dma_cap_set(DMA_SLAVE, atxdmac->dma.cap_mask); /* * Without DMA_PRIVATE the driver is not able to allocate more than @@ -1459,7 +1959,10 @@ static int at_xdmac_probe(struct platform_device *pdev) atxdmac->dma.device_tx_status = at_xdmac_tx_status; atxdmac->dma.device_issue_pending = at_xdmac_issue_pending; atxdmac->dma.device_prep_dma_cyclic = at_xdmac_prep_dma_cyclic; + atxdmac->dma.device_prep_interleaved_dma = at_xdmac_prep_interleaved; atxdmac->dma.device_prep_dma_memcpy = at_xdmac_prep_dma_memcpy; + atxdmac->dma.device_prep_dma_memset = at_xdmac_prep_dma_memset; + atxdmac->dma.device_prep_dma_memset_sg = at_xdmac_prep_dma_memset_sg; atxdmac->dma.device_prep_slave_sg = at_xdmac_prep_slave_sg; atxdmac->dma.device_config = at_xdmac_device_config; atxdmac->dma.device_pause = at_xdmac_device_pause; diff --git a/kernel/drivers/dma/bcm2835-dma.c b/kernel/drivers/dma/bcm2835-dma.c index c92d6a70c..996c4b00d 100644 --- a/kernel/drivers/dma/bcm2835-dma.c +++ b/kernel/drivers/dma/bcm2835-dma.c @@ -31,6 +31,7 @@ */ #include <linux/dmaengine.h> #include <linux/dma-mapping.h> +#include <linux/dmapool.h> #include <linux/err.h> #include <linux/init.h> #include <linux/interrupt.h> @@ -62,6 +63,11 @@ struct bcm2835_dma_cb { uint32_t pad[2]; }; +struct bcm2835_cb_entry { + struct bcm2835_dma_cb *cb; + dma_addr_t paddr; +}; + struct bcm2835_chan { struct virt_dma_chan vc; struct list_head node; @@ -72,18 +78,18 @@ struct bcm2835_chan { int ch; struct bcm2835_desc *desc; + struct dma_pool *cb_pool; void __iomem *chan_base; int irq_number; }; struct bcm2835_desc { + struct bcm2835_chan *c; struct virt_dma_desc vd; enum dma_transfer_direction dir; - unsigned int control_block_size; - struct bcm2835_dma_cb *control_block_base; - dma_addr_t control_block_base_phys; + struct bcm2835_cb_entry *cb_list; unsigned int frames; size_t size; @@ -143,10 +149,13 @@ static inline struct bcm2835_desc *to_bcm2835_dma_desc( static void bcm2835_dma_desc_free(struct virt_dma_desc *vd) { struct bcm2835_desc *desc = container_of(vd, struct bcm2835_desc, vd); - dma_free_coherent(desc->vd.tx.chan->device->dev, - desc->control_block_size, - desc->control_block_base, - desc->control_block_base_phys); + int i; + + for (i = 0; i < desc->frames; i++) + dma_pool_free(desc->c->cb_pool, desc->cb_list[i].cb, + desc->cb_list[i].paddr); + + kfree(desc->cb_list); kfree(desc); } @@ -199,7 +208,7 @@ static void bcm2835_dma_start_desc(struct bcm2835_chan *c) c->desc = d = to_bcm2835_dma_desc(&vd->tx); - writel(d->control_block_base_phys, c->chan_base + BCM2835_DMA_ADDR); + writel(d->cb_list[0].paddr, c->chan_base + BCM2835_DMA_ADDR); writel(BCM2835_DMA_ACTIVE, c->chan_base + BCM2835_DMA_CS); } @@ -232,9 +241,16 @@ static irqreturn_t bcm2835_dma_callback(int irq, void *data) static int bcm2835_dma_alloc_chan_resources(struct dma_chan *chan) { struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + struct device *dev = c->vc.chan.device->dev; + + dev_dbg(dev, "Allocating DMA channel %d\n", c->ch); - dev_dbg(c->vc.chan.device->dev, - "Allocating DMA channel %d\n", c->ch); + c->cb_pool = dma_pool_create(dev_name(dev), dev, + sizeof(struct bcm2835_dma_cb), 0, 0); + if (!c->cb_pool) { + dev_err(dev, "unable to allocate descriptor pool\n"); + return -ENOMEM; + } return request_irq(c->irq_number, bcm2835_dma_callback, 0, "DMA IRQ", c); @@ -246,6 +262,7 @@ static void bcm2835_dma_free_chan_resources(struct dma_chan *chan) vchan_free_chan_resources(&c->vc); free_irq(c->irq_number, c); + dma_pool_destroy(c->cb_pool); dev_dbg(c->vc.chan.device->dev, "Freeing DMA channel %u\n", c->ch); } @@ -261,8 +278,7 @@ static size_t bcm2835_dma_desc_size_pos(struct bcm2835_desc *d, dma_addr_t addr) size_t size; for (size = i = 0; i < d->frames; i++) { - struct bcm2835_dma_cb *control_block = - &d->control_block_base[i]; + struct bcm2835_dma_cb *control_block = d->cb_list[i].cb; size_t this_size = control_block->length; dma_addr_t dma; @@ -343,6 +359,7 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic( dma_addr_t dev_addr; unsigned int es, sync_type; unsigned int frame; + int i; /* Grab configuration */ if (!is_slave_direction(direction)) { @@ -374,27 +391,31 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic( if (!d) return NULL; + d->c = c; d->dir = direction; d->frames = buf_len / period_len; - /* Allocate memory for control blocks */ - d->control_block_size = d->frames * sizeof(struct bcm2835_dma_cb); - d->control_block_base = dma_zalloc_coherent(chan->device->dev, - d->control_block_size, &d->control_block_base_phys, - GFP_NOWAIT); - - if (!d->control_block_base) { + d->cb_list = kcalloc(d->frames, sizeof(*d->cb_list), GFP_KERNEL); + if (!d->cb_list) { kfree(d); return NULL; } + /* Allocate memory for control blocks */ + for (i = 0; i < d->frames; i++) { + struct bcm2835_cb_entry *cb_entry = &d->cb_list[i]; + + cb_entry->cb = dma_pool_zalloc(c->cb_pool, GFP_ATOMIC, + &cb_entry->paddr); + if (!cb_entry->cb) + goto error_cb; + } /* * Iterate over all frames, create a control block * for each frame and link them together. */ for (frame = 0; frame < d->frames; frame++) { - struct bcm2835_dma_cb *control_block = - &d->control_block_base[frame]; + struct bcm2835_dma_cb *control_block = d->cb_list[frame].cb; /* Setup adresses */ if (d->dir == DMA_DEV_TO_MEM) { @@ -428,12 +449,21 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic( * This DMA engine driver currently only supports cyclic DMA. * Therefore, wrap around at number of frames. */ - control_block->next = d->control_block_base_phys + - sizeof(struct bcm2835_dma_cb) - * ((frame + 1) % d->frames); + control_block->next = d->cb_list[((frame + 1) % d->frames)].paddr; } return vchan_tx_prep(&c->vc, &d->vd, flags); +error_cb: + i--; + for (; i >= 0; i--) { + struct bcm2835_cb_entry *cb_entry = &d->cb_list[i]; + + dma_pool_free(c->cb_pool, cb_entry->cb, cb_entry->paddr); + } + + kfree(d->cb_list); + kfree(d); + return NULL; } static int bcm2835_dma_slave_config(struct dma_chan *chan, diff --git a/kernel/drivers/dma/coh901318.c b/kernel/drivers/dma/coh901318.c index fd22dd369..c340ca9bd 100644 --- a/kernel/drivers/dma/coh901318.c +++ b/kernel/drivers/dma/coh901318.c @@ -2730,7 +2730,7 @@ static int __init coh901318_probe(struct platform_device *pdev) * This controller can only access address at even 32bit boundaries, * i.e. 2^2 */ - base->dma_memcpy.copy_align = 2; + base->dma_memcpy.copy_align = DMAENGINE_ALIGN_4_BYTES; err = dma_async_device_register(&base->dma_memcpy); if (err) diff --git a/kernel/drivers/dma/dma-axi-dmac.c b/kernel/drivers/dma/dma-axi-dmac.c new file mode 100644 index 000000000..5b2395e7e --- /dev/null +++ b/kernel/drivers/dma/dma-axi-dmac.c @@ -0,0 +1,691 @@ +/* + * Driver for the Analog Devices AXI-DMAC core + * + * Copyright 2013-2015 Analog Devices Inc. + * Author: Lars-Peter Clausen <lars@metafoo.de> + * + * Licensed under the GPL-2. + */ + +#include <linux/clk.h> +#include <linux/device.h> +#include <linux/dma-mapping.h> +#include <linux/dmaengine.h> +#include <linux/err.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_dma.h> +#include <linux/platform_device.h> +#include <linux/slab.h> + +#include <dt-bindings/dma/axi-dmac.h> + +#include "dmaengine.h" +#include "virt-dma.h" + +/* + * The AXI-DMAC is a soft IP core that is used in FPGA designs. The core has + * various instantiation parameters which decided the exact feature set support + * by the core. + * + * Each channel of the core has a source interface and a destination interface. + * The number of channels and the type of the channel interfaces is selected at + * configuration time. A interface can either be a connected to a central memory + * interconnect, which allows access to system memory, or it can be connected to + * a dedicated bus which is directly connected to a data port on a peripheral. + * Given that those are configuration options of the core that are selected when + * it is instantiated this means that they can not be changed by software at + * runtime. By extension this means that each channel is uni-directional. It can + * either be device to memory or memory to device, but not both. Also since the + * device side is a dedicated data bus only connected to a single peripheral + * there is no address than can or needs to be configured for the device side. + */ + +#define AXI_DMAC_REG_IRQ_MASK 0x80 +#define AXI_DMAC_REG_IRQ_PENDING 0x84 +#define AXI_DMAC_REG_IRQ_SOURCE 0x88 + +#define AXI_DMAC_REG_CTRL 0x400 +#define AXI_DMAC_REG_TRANSFER_ID 0x404 +#define AXI_DMAC_REG_START_TRANSFER 0x408 +#define AXI_DMAC_REG_FLAGS 0x40c +#define AXI_DMAC_REG_DEST_ADDRESS 0x410 +#define AXI_DMAC_REG_SRC_ADDRESS 0x414 +#define AXI_DMAC_REG_X_LENGTH 0x418 +#define AXI_DMAC_REG_Y_LENGTH 0x41c +#define AXI_DMAC_REG_DEST_STRIDE 0x420 +#define AXI_DMAC_REG_SRC_STRIDE 0x424 +#define AXI_DMAC_REG_TRANSFER_DONE 0x428 +#define AXI_DMAC_REG_ACTIVE_TRANSFER_ID 0x42c +#define AXI_DMAC_REG_STATUS 0x430 +#define AXI_DMAC_REG_CURRENT_SRC_ADDR 0x434 +#define AXI_DMAC_REG_CURRENT_DEST_ADDR 0x438 + +#define AXI_DMAC_CTRL_ENABLE BIT(0) +#define AXI_DMAC_CTRL_PAUSE BIT(1) + +#define AXI_DMAC_IRQ_SOT BIT(0) +#define AXI_DMAC_IRQ_EOT BIT(1) + +#define AXI_DMAC_FLAG_CYCLIC BIT(0) + +struct axi_dmac_sg { + dma_addr_t src_addr; + dma_addr_t dest_addr; + unsigned int x_len; + unsigned int y_len; + unsigned int dest_stride; + unsigned int src_stride; + unsigned int id; +}; + +struct axi_dmac_desc { + struct virt_dma_desc vdesc; + bool cyclic; + + unsigned int num_submitted; + unsigned int num_completed; + unsigned int num_sgs; + struct axi_dmac_sg sg[]; +}; + +struct axi_dmac_chan { + struct virt_dma_chan vchan; + + struct axi_dmac_desc *next_desc; + struct list_head active_descs; + enum dma_transfer_direction direction; + + unsigned int src_width; + unsigned int dest_width; + unsigned int src_type; + unsigned int dest_type; + + unsigned int max_length; + unsigned int align_mask; + + bool hw_cyclic; + bool hw_2d; +}; + +struct axi_dmac { + void __iomem *base; + int irq; + + struct clk *clk; + + struct dma_device dma_dev; + struct axi_dmac_chan chan; + + struct device_dma_parameters dma_parms; +}; + +static struct axi_dmac *chan_to_axi_dmac(struct axi_dmac_chan *chan) +{ + return container_of(chan->vchan.chan.device, struct axi_dmac, + dma_dev); +} + +static struct axi_dmac_chan *to_axi_dmac_chan(struct dma_chan *c) +{ + return container_of(c, struct axi_dmac_chan, vchan.chan); +} + +static struct axi_dmac_desc *to_axi_dmac_desc(struct virt_dma_desc *vdesc) +{ + return container_of(vdesc, struct axi_dmac_desc, vdesc); +} + +static void axi_dmac_write(struct axi_dmac *axi_dmac, unsigned int reg, + unsigned int val) +{ + writel(val, axi_dmac->base + reg); +} + +static int axi_dmac_read(struct axi_dmac *axi_dmac, unsigned int reg) +{ + return readl(axi_dmac->base + reg); +} + +static int axi_dmac_src_is_mem(struct axi_dmac_chan *chan) +{ + return chan->src_type == AXI_DMAC_BUS_TYPE_AXI_MM; +} + +static int axi_dmac_dest_is_mem(struct axi_dmac_chan *chan) +{ + return chan->dest_type == AXI_DMAC_BUS_TYPE_AXI_MM; +} + +static bool axi_dmac_check_len(struct axi_dmac_chan *chan, unsigned int len) +{ + if (len == 0 || len > chan->max_length) + return false; + if ((len & chan->align_mask) != 0) /* Not aligned */ + return false; + return true; +} + +static bool axi_dmac_check_addr(struct axi_dmac_chan *chan, dma_addr_t addr) +{ + if ((addr & chan->align_mask) != 0) /* Not aligned */ + return false; + return true; +} + +static void axi_dmac_start_transfer(struct axi_dmac_chan *chan) +{ + struct axi_dmac *dmac = chan_to_axi_dmac(chan); + struct virt_dma_desc *vdesc; + struct axi_dmac_desc *desc; + struct axi_dmac_sg *sg; + unsigned int flags = 0; + unsigned int val; + + val = axi_dmac_read(dmac, AXI_DMAC_REG_START_TRANSFER); + if (val) /* Queue is full, wait for the next SOT IRQ */ + return; + + desc = chan->next_desc; + + if (!desc) { + vdesc = vchan_next_desc(&chan->vchan); + if (!vdesc) + return; + list_move_tail(&vdesc->node, &chan->active_descs); + desc = to_axi_dmac_desc(vdesc); + } + sg = &desc->sg[desc->num_submitted]; + + desc->num_submitted++; + if (desc->num_submitted == desc->num_sgs) + chan->next_desc = NULL; + else + chan->next_desc = desc; + + sg->id = axi_dmac_read(dmac, AXI_DMAC_REG_TRANSFER_ID); + + if (axi_dmac_dest_is_mem(chan)) { + axi_dmac_write(dmac, AXI_DMAC_REG_DEST_ADDRESS, sg->dest_addr); + axi_dmac_write(dmac, AXI_DMAC_REG_DEST_STRIDE, sg->dest_stride); + } + + if (axi_dmac_src_is_mem(chan)) { + axi_dmac_write(dmac, AXI_DMAC_REG_SRC_ADDRESS, sg->src_addr); + axi_dmac_write(dmac, AXI_DMAC_REG_SRC_STRIDE, sg->src_stride); + } + + /* + * If the hardware supports cyclic transfers and there is no callback to + * call, enable hw cyclic mode to avoid unnecessary interrupts. + */ + if (chan->hw_cyclic && desc->cyclic && !desc->vdesc.tx.callback) + flags |= AXI_DMAC_FLAG_CYCLIC; + + axi_dmac_write(dmac, AXI_DMAC_REG_X_LENGTH, sg->x_len - 1); + axi_dmac_write(dmac, AXI_DMAC_REG_Y_LENGTH, sg->y_len - 1); + axi_dmac_write(dmac, AXI_DMAC_REG_FLAGS, flags); + axi_dmac_write(dmac, AXI_DMAC_REG_START_TRANSFER, 1); +} + +static struct axi_dmac_desc *axi_dmac_active_desc(struct axi_dmac_chan *chan) +{ + return list_first_entry_or_null(&chan->active_descs, + struct axi_dmac_desc, vdesc.node); +} + +static void axi_dmac_transfer_done(struct axi_dmac_chan *chan, + unsigned int completed_transfers) +{ + struct axi_dmac_desc *active; + struct axi_dmac_sg *sg; + + active = axi_dmac_active_desc(chan); + if (!active) + return; + + if (active->cyclic) { + vchan_cyclic_callback(&active->vdesc); + } else { + do { + sg = &active->sg[active->num_completed]; + if (!(BIT(sg->id) & completed_transfers)) + break; + active->num_completed++; + if (active->num_completed == active->num_sgs) { + list_del(&active->vdesc.node); + vchan_cookie_complete(&active->vdesc); + active = axi_dmac_active_desc(chan); + } + } while (active); + } +} + +static irqreturn_t axi_dmac_interrupt_handler(int irq, void *devid) +{ + struct axi_dmac *dmac = devid; + unsigned int pending; + + pending = axi_dmac_read(dmac, AXI_DMAC_REG_IRQ_PENDING); + axi_dmac_write(dmac, AXI_DMAC_REG_IRQ_PENDING, pending); + + spin_lock(&dmac->chan.vchan.lock); + /* One or more transfers have finished */ + if (pending & AXI_DMAC_IRQ_EOT) { + unsigned int completed; + + completed = axi_dmac_read(dmac, AXI_DMAC_REG_TRANSFER_DONE); + axi_dmac_transfer_done(&dmac->chan, completed); + } + /* Space has become available in the descriptor queue */ + if (pending & AXI_DMAC_IRQ_SOT) + axi_dmac_start_transfer(&dmac->chan); + spin_unlock(&dmac->chan.vchan.lock); + + return IRQ_HANDLED; +} + +static int axi_dmac_terminate_all(struct dma_chan *c) +{ + struct axi_dmac_chan *chan = to_axi_dmac_chan(c); + struct axi_dmac *dmac = chan_to_axi_dmac(chan); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&chan->vchan.lock, flags); + axi_dmac_write(dmac, AXI_DMAC_REG_CTRL, 0); + chan->next_desc = NULL; + vchan_get_all_descriptors(&chan->vchan, &head); + list_splice_tail_init(&chan->active_descs, &head); + spin_unlock_irqrestore(&chan->vchan.lock, flags); + + vchan_dma_desc_free_list(&chan->vchan, &head); + + return 0; +} + +static void axi_dmac_issue_pending(struct dma_chan *c) +{ + struct axi_dmac_chan *chan = to_axi_dmac_chan(c); + struct axi_dmac *dmac = chan_to_axi_dmac(chan); + unsigned long flags; + + axi_dmac_write(dmac, AXI_DMAC_REG_CTRL, AXI_DMAC_CTRL_ENABLE); + + spin_lock_irqsave(&chan->vchan.lock, flags); + if (vchan_issue_pending(&chan->vchan)) + axi_dmac_start_transfer(chan); + spin_unlock_irqrestore(&chan->vchan.lock, flags); +} + +static struct axi_dmac_desc *axi_dmac_alloc_desc(unsigned int num_sgs) +{ + struct axi_dmac_desc *desc; + + desc = kzalloc(sizeof(struct axi_dmac_desc) + + sizeof(struct axi_dmac_sg) * num_sgs, GFP_NOWAIT); + if (!desc) + return NULL; + + desc->num_sgs = num_sgs; + + return desc; +} + +static struct dma_async_tx_descriptor *axi_dmac_prep_slave_sg( + struct dma_chan *c, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct axi_dmac_chan *chan = to_axi_dmac_chan(c); + struct axi_dmac_desc *desc; + struct scatterlist *sg; + unsigned int i; + + if (direction != chan->direction) + return NULL; + + desc = axi_dmac_alloc_desc(sg_len); + if (!desc) + return NULL; + + for_each_sg(sgl, sg, sg_len, i) { + if (!axi_dmac_check_addr(chan, sg_dma_address(sg)) || + !axi_dmac_check_len(chan, sg_dma_len(sg))) { + kfree(desc); + return NULL; + } + + if (direction == DMA_DEV_TO_MEM) + desc->sg[i].dest_addr = sg_dma_address(sg); + else + desc->sg[i].src_addr = sg_dma_address(sg); + desc->sg[i].x_len = sg_dma_len(sg); + desc->sg[i].y_len = 1; + } + + desc->cyclic = false; + + return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags); +} + +static struct dma_async_tx_descriptor *axi_dmac_prep_dma_cyclic( + struct dma_chan *c, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags) +{ + struct axi_dmac_chan *chan = to_axi_dmac_chan(c); + struct axi_dmac_desc *desc; + unsigned int num_periods, i; + + if (direction != chan->direction) + return NULL; + + if (!axi_dmac_check_len(chan, buf_len) || + !axi_dmac_check_addr(chan, buf_addr)) + return NULL; + + if (period_len == 0 || buf_len % period_len) + return NULL; + + num_periods = buf_len / period_len; + + desc = axi_dmac_alloc_desc(num_periods); + if (!desc) + return NULL; + + for (i = 0; i < num_periods; i++) { + if (direction == DMA_DEV_TO_MEM) + desc->sg[i].dest_addr = buf_addr; + else + desc->sg[i].src_addr = buf_addr; + desc->sg[i].x_len = period_len; + desc->sg[i].y_len = 1; + buf_addr += period_len; + } + + desc->cyclic = true; + + return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags); +} + +static struct dma_async_tx_descriptor *axi_dmac_prep_interleaved( + struct dma_chan *c, struct dma_interleaved_template *xt, + unsigned long flags) +{ + struct axi_dmac_chan *chan = to_axi_dmac_chan(c); + struct axi_dmac_desc *desc; + size_t dst_icg, src_icg; + + if (xt->frame_size != 1) + return NULL; + + if (xt->dir != chan->direction) + return NULL; + + if (axi_dmac_src_is_mem(chan)) { + if (!xt->src_inc || !axi_dmac_check_addr(chan, xt->src_start)) + return NULL; + } + + if (axi_dmac_dest_is_mem(chan)) { + if (!xt->dst_inc || !axi_dmac_check_addr(chan, xt->dst_start)) + return NULL; + } + + dst_icg = dmaengine_get_dst_icg(xt, &xt->sgl[0]); + src_icg = dmaengine_get_src_icg(xt, &xt->sgl[0]); + + if (chan->hw_2d) { + if (!axi_dmac_check_len(chan, xt->sgl[0].size) || + !axi_dmac_check_len(chan, xt->numf)) + return NULL; + if (xt->sgl[0].size + dst_icg > chan->max_length || + xt->sgl[0].size + src_icg > chan->max_length) + return NULL; + } else { + if (dst_icg != 0 || src_icg != 0) + return NULL; + if (chan->max_length / xt->sgl[0].size < xt->numf) + return NULL; + if (!axi_dmac_check_len(chan, xt->sgl[0].size * xt->numf)) + return NULL; + } + + desc = axi_dmac_alloc_desc(1); + if (!desc) + return NULL; + + if (axi_dmac_src_is_mem(chan)) { + desc->sg[0].src_addr = xt->src_start; + desc->sg[0].src_stride = xt->sgl[0].size + src_icg; + } + + if (axi_dmac_dest_is_mem(chan)) { + desc->sg[0].dest_addr = xt->dst_start; + desc->sg[0].dest_stride = xt->sgl[0].size + dst_icg; + } + + if (chan->hw_2d) { + desc->sg[0].x_len = xt->sgl[0].size; + desc->sg[0].y_len = xt->numf; + } else { + desc->sg[0].x_len = xt->sgl[0].size * xt->numf; + desc->sg[0].y_len = 1; + } + + return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags); +} + +static void axi_dmac_free_chan_resources(struct dma_chan *c) +{ + vchan_free_chan_resources(to_virt_chan(c)); +} + +static void axi_dmac_desc_free(struct virt_dma_desc *vdesc) +{ + kfree(container_of(vdesc, struct axi_dmac_desc, vdesc)); +} + +/* + * The configuration stored in the devicetree matches the configuration + * parameters of the peripheral instance and allows the driver to know which + * features are implemented and how it should behave. + */ +static int axi_dmac_parse_chan_dt(struct device_node *of_chan, + struct axi_dmac_chan *chan) +{ + u32 val; + int ret; + + ret = of_property_read_u32(of_chan, "reg", &val); + if (ret) + return ret; + + /* We only support 1 channel for now */ + if (val != 0) + return -EINVAL; + + ret = of_property_read_u32(of_chan, "adi,source-bus-type", &val); + if (ret) + return ret; + if (val > AXI_DMAC_BUS_TYPE_FIFO) + return -EINVAL; + chan->src_type = val; + + ret = of_property_read_u32(of_chan, "adi,destination-bus-type", &val); + if (ret) + return ret; + if (val > AXI_DMAC_BUS_TYPE_FIFO) + return -EINVAL; + chan->dest_type = val; + + ret = of_property_read_u32(of_chan, "adi,source-bus-width", &val); + if (ret) + return ret; + chan->src_width = val / 8; + + ret = of_property_read_u32(of_chan, "adi,destination-bus-width", &val); + if (ret) + return ret; + chan->dest_width = val / 8; + + ret = of_property_read_u32(of_chan, "adi,length-width", &val); + if (ret) + return ret; + + if (val >= 32) + chan->max_length = UINT_MAX; + else + chan->max_length = (1ULL << val) - 1; + + chan->align_mask = max(chan->dest_width, chan->src_width) - 1; + + if (axi_dmac_dest_is_mem(chan) && axi_dmac_src_is_mem(chan)) + chan->direction = DMA_MEM_TO_MEM; + else if (!axi_dmac_dest_is_mem(chan) && axi_dmac_src_is_mem(chan)) + chan->direction = DMA_MEM_TO_DEV; + else if (axi_dmac_dest_is_mem(chan) && !axi_dmac_src_is_mem(chan)) + chan->direction = DMA_DEV_TO_MEM; + else + chan->direction = DMA_DEV_TO_DEV; + + chan->hw_cyclic = of_property_read_bool(of_chan, "adi,cyclic"); + chan->hw_2d = of_property_read_bool(of_chan, "adi,2d"); + + return 0; +} + +static int axi_dmac_probe(struct platform_device *pdev) +{ + struct device_node *of_channels, *of_chan; + struct dma_device *dma_dev; + struct axi_dmac *dmac; + struct resource *res; + int ret; + + dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL); + if (!dmac) + return -ENOMEM; + + dmac->irq = platform_get_irq(pdev, 0); + if (dmac->irq <= 0) + return -EINVAL; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + dmac->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(dmac->base)) + return PTR_ERR(dmac->base); + + dmac->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(dmac->clk)) + return PTR_ERR(dmac->clk); + + INIT_LIST_HEAD(&dmac->chan.active_descs); + + of_channels = of_get_child_by_name(pdev->dev.of_node, "adi,channels"); + if (of_channels == NULL) + return -ENODEV; + + for_each_child_of_node(of_channels, of_chan) { + ret = axi_dmac_parse_chan_dt(of_chan, &dmac->chan); + if (ret) { + of_node_put(of_chan); + of_node_put(of_channels); + return -EINVAL; + } + } + of_node_put(of_channels); + + pdev->dev.dma_parms = &dmac->dma_parms; + dma_set_max_seg_size(&pdev->dev, dmac->chan.max_length); + + dma_dev = &dmac->dma_dev; + dma_cap_set(DMA_SLAVE, dma_dev->cap_mask); + dma_cap_set(DMA_CYCLIC, dma_dev->cap_mask); + dma_dev->device_free_chan_resources = axi_dmac_free_chan_resources; + dma_dev->device_tx_status = dma_cookie_status; + dma_dev->device_issue_pending = axi_dmac_issue_pending; + dma_dev->device_prep_slave_sg = axi_dmac_prep_slave_sg; + dma_dev->device_prep_dma_cyclic = axi_dmac_prep_dma_cyclic; + dma_dev->device_prep_interleaved_dma = axi_dmac_prep_interleaved; + dma_dev->device_terminate_all = axi_dmac_terminate_all; + dma_dev->dev = &pdev->dev; + dma_dev->chancnt = 1; + dma_dev->src_addr_widths = BIT(dmac->chan.src_width); + dma_dev->dst_addr_widths = BIT(dmac->chan.dest_width); + dma_dev->directions = BIT(dmac->chan.direction); + dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + INIT_LIST_HEAD(&dma_dev->channels); + + dmac->chan.vchan.desc_free = axi_dmac_desc_free; + vchan_init(&dmac->chan.vchan, dma_dev); + + ret = clk_prepare_enable(dmac->clk); + if (ret < 0) + return ret; + + axi_dmac_write(dmac, AXI_DMAC_REG_IRQ_MASK, 0x00); + + ret = dma_async_device_register(dma_dev); + if (ret) + goto err_clk_disable; + + ret = of_dma_controller_register(pdev->dev.of_node, + of_dma_xlate_by_chan_id, dma_dev); + if (ret) + goto err_unregister_device; + + ret = request_irq(dmac->irq, axi_dmac_interrupt_handler, 0, + dev_name(&pdev->dev), dmac); + if (ret) + goto err_unregister_of; + + platform_set_drvdata(pdev, dmac); + + return 0; + +err_unregister_of: + of_dma_controller_free(pdev->dev.of_node); +err_unregister_device: + dma_async_device_unregister(&dmac->dma_dev); +err_clk_disable: + clk_disable_unprepare(dmac->clk); + + return ret; +} + +static int axi_dmac_remove(struct platform_device *pdev) +{ + struct axi_dmac *dmac = platform_get_drvdata(pdev); + + of_dma_controller_free(pdev->dev.of_node); + free_irq(dmac->irq, dmac); + tasklet_kill(&dmac->chan.vchan.task); + dma_async_device_unregister(&dmac->dma_dev); + clk_disable_unprepare(dmac->clk); + + return 0; +} + +static const struct of_device_id axi_dmac_of_match_table[] = { + { .compatible = "adi,axi-dmac-1.00.a" }, + { }, +}; + +static struct platform_driver axi_dmac_driver = { + .driver = { + .name = "dma-axi-dmac", + .of_match_table = axi_dmac_of_match_table, + }, + .probe = axi_dmac_probe, + .remove = axi_dmac_remove, +}; +module_platform_driver(axi_dmac_driver); + +MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>"); +MODULE_DESCRIPTION("DMA controller driver for the AXI-DMAC controller"); +MODULE_LICENSE("GPL v2"); diff --git a/kernel/drivers/dma/dma-jz4780.c b/kernel/drivers/dma/dma-jz4780.c index 26d2f0e09..dade7c47f 100644 --- a/kernel/drivers/dma/dma-jz4780.c +++ b/kernel/drivers/dma/dma-jz4780.c @@ -145,7 +145,8 @@ struct jz4780_dma_dev { struct jz4780_dma_chan chan[JZ_DMA_NR_CHANNELS]; }; -struct jz4780_dma_data { +struct jz4780_dma_filter_data { + struct device_node *of_node; uint32_t transfer_type; int channel; }; @@ -214,11 +215,25 @@ static void jz4780_dma_desc_free(struct virt_dma_desc *vdesc) kfree(desc); } -static uint32_t jz4780_dma_transfer_size(unsigned long val, int *ord) +static uint32_t jz4780_dma_transfer_size(unsigned long val, uint32_t *shift) { - *ord = ffs(val) - 1; + int ord = ffs(val) - 1; - switch (*ord) { + /* + * 8 byte transfer sizes unsupported so fall back on 4. If it's larger + * than the maximum, just limit it. It is perfectly safe to fall back + * in this way since we won't exceed the maximum burst size supported + * by the device, the only effect is reduced efficiency. This is better + * than refusing to perform the request at all. + */ + if (ord == 3) + ord = 2; + else if (ord > 7) + ord = 7; + + *shift = ord; + + switch (ord) { case 0: return JZ_DMA_SIZE_1_BYTE; case 1: @@ -231,20 +246,17 @@ static uint32_t jz4780_dma_transfer_size(unsigned long val, int *ord) return JZ_DMA_SIZE_32_BYTE; case 6: return JZ_DMA_SIZE_64_BYTE; - case 7: - return JZ_DMA_SIZE_128_BYTE; default: - return -EINVAL; + return JZ_DMA_SIZE_128_BYTE; } } -static uint32_t jz4780_dma_setup_hwdesc(struct jz4780_dma_chan *jzchan, +static int jz4780_dma_setup_hwdesc(struct jz4780_dma_chan *jzchan, struct jz4780_dma_hwdesc *desc, dma_addr_t addr, size_t len, enum dma_transfer_direction direction) { struct dma_slave_config *config = &jzchan->config; uint32_t width, maxburst, tsz; - int ord; if (direction == DMA_MEM_TO_DEV) { desc->dcm = JZ_DMA_DCM_SAI; @@ -271,8 +283,8 @@ static uint32_t jz4780_dma_setup_hwdesc(struct jz4780_dma_chan *jzchan, * divisible by the transfer size, and we must not use more than the * maximum burst specified by the user. */ - tsz = jz4780_dma_transfer_size(addr | len | (width * maxburst), &ord); - jzchan->transfer_shift = ord; + tsz = jz4780_dma_transfer_size(addr | len | (width * maxburst), + &jzchan->transfer_shift); switch (width) { case DMA_SLAVE_BUSWIDTH_1_BYTE: @@ -289,12 +301,14 @@ static uint32_t jz4780_dma_setup_hwdesc(struct jz4780_dma_chan *jzchan, desc->dcm |= width << JZ_DMA_DCM_SP_SHIFT; desc->dcm |= width << JZ_DMA_DCM_DP_SHIFT; - desc->dtc = len >> ord; + desc->dtc = len >> jzchan->transfer_shift; + return 0; } static struct dma_async_tx_descriptor *jz4780_dma_prep_slave_sg( struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, - enum dma_transfer_direction direction, unsigned long flags) + enum dma_transfer_direction direction, unsigned long flags, + void *context) { struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan); struct jz4780_dma_desc *desc; @@ -307,12 +321,11 @@ static struct dma_async_tx_descriptor *jz4780_dma_prep_slave_sg( for (i = 0; i < sg_len; i++) { err = jz4780_dma_setup_hwdesc(jzchan, &desc->desc[i], - sg_dma_address(&sgl[i]), - sg_dma_len(&sgl[i]), - direction); + sg_dma_address(&sgl[i]), + sg_dma_len(&sgl[i]), + direction); if (err < 0) - return ERR_PTR(err); - + return NULL; desc->desc[i].dcm |= JZ_DMA_DCM_TIE; @@ -354,9 +367,9 @@ static struct dma_async_tx_descriptor *jz4780_dma_prep_dma_cyclic( for (i = 0; i < periods; i++) { err = jz4780_dma_setup_hwdesc(jzchan, &desc->desc[i], buf_addr, - period_len, direction); + period_len, direction); if (err < 0) - return ERR_PTR(err); + return NULL; buf_addr += period_len; @@ -390,15 +403,13 @@ struct dma_async_tx_descriptor *jz4780_dma_prep_dma_memcpy( struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan); struct jz4780_dma_desc *desc; uint32_t tsz; - int ord; desc = jz4780_dma_desc_alloc(jzchan, 1, DMA_MEMCPY); if (!desc) return NULL; - tsz = jz4780_dma_transfer_size(dest | src | len, &ord); - if (tsz < 0) - return ERR_PTR(tsz); + tsz = jz4780_dma_transfer_size(dest | src | len, + &jzchan->transfer_shift); desc->desc[0].dsa = src; desc->desc[0].dta = dest; @@ -407,7 +418,7 @@ struct dma_async_tx_descriptor *jz4780_dma_prep_dma_memcpy( tsz << JZ_DMA_DCM_TSZ_SHIFT | JZ_DMA_WIDTH_32_BIT << JZ_DMA_DCM_SP_SHIFT | JZ_DMA_WIDTH_32_BIT << JZ_DMA_DCM_DP_SHIFT; - desc->desc[0].dtc = len >> ord; + desc->desc[0].dtc = len >> jzchan->transfer_shift; return vchan_tx_prep(&jzchan->vchan, &desc->vdesc, flags); } @@ -484,8 +495,9 @@ static void jz4780_dma_issue_pending(struct dma_chan *chan) spin_unlock_irqrestore(&jzchan->vchan.lock, flags); } -static int jz4780_dma_terminate_all(struct jz4780_dma_chan *jzchan) +static int jz4780_dma_terminate_all(struct dma_chan *chan) { + struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan); struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan); unsigned long flags; LIST_HEAD(head); @@ -507,9 +519,11 @@ static int jz4780_dma_terminate_all(struct jz4780_dma_chan *jzchan) return 0; } -static int jz4780_dma_slave_config(struct jz4780_dma_chan *jzchan, - const struct dma_slave_config *config) +static int jz4780_dma_config(struct dma_chan *chan, + struct dma_slave_config *config) { + struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan); + if ((config->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES) || (config->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES)) return -EINVAL; @@ -567,8 +581,8 @@ static enum dma_status jz4780_dma_tx_status(struct dma_chan *chan, txstate->residue = 0; if (vdesc && jzchan->desc && vdesc == &jzchan->desc->vdesc - && jzchan->desc->status & (JZ_DMA_DCS_AR | JZ_DMA_DCS_HLT)) - status = DMA_ERROR; + && jzchan->desc->status & (JZ_DMA_DCS_AR | JZ_DMA_DCS_HLT)) + status = DMA_ERROR; spin_unlock_irqrestore(&jzchan->vchan.lock, flags); return status; @@ -671,7 +685,10 @@ static bool jz4780_dma_filter_fn(struct dma_chan *chan, void *param) { struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan); struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan); - struct jz4780_dma_data *data = param; + struct jz4780_dma_filter_data *data = param; + + if (jzdma->dma_device.dev->of_node != data->of_node) + return false; if (data->channel > -1) { if (data->channel != jzchan->id) @@ -690,11 +707,12 @@ static struct dma_chan *jz4780_of_dma_xlate(struct of_phandle_args *dma_spec, { struct jz4780_dma_dev *jzdma = ofdma->of_dma_data; dma_cap_mask_t mask = jzdma->dma_device.cap_mask; - struct jz4780_dma_data data; + struct jz4780_dma_filter_data data; if (dma_spec->args_count != 2) return NULL; + data.of_node = ofdma->of_node; data.transfer_type = dma_spec->args[0]; data.channel = dma_spec->args[1]; @@ -713,9 +731,14 @@ static struct dma_chan *jz4780_of_dma_xlate(struct of_phandle_args *dma_spec, data.channel); return NULL; } - } - return dma_request_channel(mask, jz4780_dma_filter_fn, &data); + jzdma->chan[data.channel].transfer_type = data.transfer_type; + + return dma_get_slave_channel( + &jzdma->chan[data.channel].vchan.chan); + } else { + return dma_request_channel(mask, jz4780_dma_filter_fn, &data); + } } static int jz4780_dma_probe(struct platform_device *pdev) @@ -743,23 +766,26 @@ static int jz4780_dma_probe(struct platform_device *pdev) if (IS_ERR(jzdma->base)) return PTR_ERR(jzdma->base); - jzdma->irq = platform_get_irq(pdev, 0); - if (jzdma->irq < 0) { + ret = platform_get_irq(pdev, 0); + if (ret < 0) { dev_err(dev, "failed to get IRQ: %d\n", ret); - return jzdma->irq; + return ret; } - ret = devm_request_irq(dev, jzdma->irq, jz4780_dma_irq_handler, 0, - dev_name(dev), jzdma); + jzdma->irq = ret; + + ret = request_irq(jzdma->irq, jz4780_dma_irq_handler, 0, dev_name(dev), + jzdma); if (ret) { dev_err(dev, "failed to request IRQ %u!\n", jzdma->irq); - return -EINVAL; + return ret; } jzdma->clk = devm_clk_get(dev, NULL); if (IS_ERR(jzdma->clk)) { dev_err(dev, "failed to get clock\n"); - return PTR_ERR(jzdma->clk); + ret = PTR_ERR(jzdma->clk); + goto err_free_irq; } clk_prepare_enable(jzdma->clk); @@ -775,13 +801,13 @@ static int jz4780_dma_probe(struct platform_device *pdev) dma_cap_set(DMA_CYCLIC, dd->cap_mask); dd->dev = dev; - dd->copy_align = 2; /* 2^2 = 4 byte alignment */ + dd->copy_align = DMAENGINE_ALIGN_4_BYTES; dd->device_alloc_chan_resources = jz4780_dma_alloc_chan_resources; dd->device_free_chan_resources = jz4780_dma_free_chan_resources; dd->device_prep_slave_sg = jz4780_dma_prep_slave_sg; dd->device_prep_dma_cyclic = jz4780_dma_prep_dma_cyclic; dd->device_prep_dma_memcpy = jz4780_dma_prep_dma_memcpy; - dd->device_config = jz4780_dma_slave_config; + dd->device_config = jz4780_dma_config; dd->device_terminate_all = jz4780_dma_terminate_all; dd->device_tx_status = jz4780_dma_tx_status; dd->device_issue_pending = jz4780_dma_issue_pending; @@ -790,7 +816,6 @@ static int jz4780_dma_probe(struct platform_device *pdev) dd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); dd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; - /* * Enable DMA controller, mark all channels as not programmable. * Also set the FMSC bit - it increases MSC performance, so it makes @@ -832,15 +857,24 @@ err_unregister_dev: err_disable_clk: clk_disable_unprepare(jzdma->clk); + +err_free_irq: + free_irq(jzdma->irq, jzdma); return ret; } static int jz4780_dma_remove(struct platform_device *pdev) { struct jz4780_dma_dev *jzdma = platform_get_drvdata(pdev); + int i; of_dma_controller_free(pdev->dev.of_node); - devm_free_irq(&pdev->dev, jzdma->irq, jzdma); + + free_irq(jzdma->irq, jzdma); + + for (i = 0; i < JZ_DMA_NR_CHANNELS; i++) + tasklet_kill(&jzdma->chan[i].vchan.task); + dma_async_device_unregister(&jzdma->dma_device); return 0; } diff --git a/kernel/drivers/dma/dmaengine.c b/kernel/drivers/dma/dmaengine.c index 3ddfd1f6c..3ecec1445 100644 --- a/kernel/drivers/dma/dmaengine.c +++ b/kernel/drivers/dma/dmaengine.c @@ -267,6 +267,13 @@ static void dma_chan_put(struct dma_chan *chan) /* This channel is not in use anymore, free it */ if (!chan->client_count && chan->device->device_free_chan_resources) chan->device->device_free_chan_resources(chan); + + /* If the channel is used via a DMA request router, free the mapping */ + if (chan->router && chan->router->route_free) { + chan->router->route_free(chan->router->dev, chan->route_data); + chan->router = NULL; + chan->route_data = NULL; + } } enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie) @@ -536,7 +543,7 @@ static struct dma_chan *private_candidate(const dma_cap_mask_t *mask, } /** - * dma_request_slave_channel - try to get specific channel exclusively + * dma_get_slave_channel - try to get specific channel exclusively * @chan: target channel */ struct dma_chan *dma_get_slave_channel(struct dma_chan *chan) @@ -547,10 +554,18 @@ struct dma_chan *dma_get_slave_channel(struct dma_chan *chan) mutex_lock(&dma_list_mutex); if (chan->client_count == 0) { + struct dma_device *device = chan->device; + + dma_cap_set(DMA_PRIVATE, device->cap_mask); + device->privatecnt++; err = dma_chan_get(chan); - if (err) + if (err) { pr_debug("%s: failed to get %s: (%d)\n", __func__, dma_chan_name(chan), err); + chan = NULL; + if (--device->privatecnt == 0) + dma_cap_clear(DMA_PRIVATE, device->cap_mask); + } } else chan = NULL; @@ -648,7 +663,7 @@ struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask, EXPORT_SYMBOL_GPL(__dma_request_channel); /** - * dma_request_slave_channel - try to allocate an exclusive slave channel + * dma_request_slave_channel_reason - try to allocate an exclusive slave channel * @dev: pointer to client device structure * @name: slave channel name * @@ -682,6 +697,10 @@ struct dma_chan *dma_request_slave_channel(struct device *dev, struct dma_chan *ch = dma_request_slave_channel_reason(dev, name); if (IS_ERR(ch)) return NULL; + + dma_cap_set(DMA_PRIVATE, ch->device->cap_mask); + ch->device->privatecnt++; + return ch; } EXPORT_SYMBOL_GPL(dma_request_slave_channel); @@ -836,6 +855,8 @@ int dma_async_device_register(struct dma_device *device) !device->device_prep_dma_pq); BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) && !device->device_prep_dma_pq_val); + BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) && + !device->device_prep_dma_memset); BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) && !device->device_prep_dma_interrupt); BUG_ON(dma_has_cap(DMA_SG, device->cap_mask) && @@ -1053,11 +1074,9 @@ static void dmaengine_destroy_unmap_pool(void) for (i = 0; i < ARRAY_SIZE(unmap_pool); i++) { struct dmaengine_unmap_pool *p = &unmap_pool[i]; - if (p->pool) - mempool_destroy(p->pool); + mempool_destroy(p->pool); p->pool = NULL; - if (p->cache) - kmem_cache_destroy(p->cache); + kmem_cache_destroy(p->cache); p->cache = NULL; } } diff --git a/kernel/drivers/dma/dmatest.c b/kernel/drivers/dma/dmatest.c index 220ee4963..b8576fd6b 100644 --- a/kernel/drivers/dma/dmatest.c +++ b/kernel/drivers/dma/dmatest.c @@ -120,7 +120,7 @@ static struct dmatest_info { static int dmatest_run_set(const char *val, const struct kernel_param *kp); static int dmatest_run_get(char *val, const struct kernel_param *kp); -static struct kernel_param_ops run_ops = { +static const struct kernel_param_ops run_ops = { .set = dmatest_run_set, .get = dmatest_run_get, }; @@ -195,7 +195,7 @@ static int dmatest_wait_get(char *val, const struct kernel_param *kp) return param_get_bool(val, kp); } -static struct kernel_param_ops wait_ops = { +static const struct kernel_param_ops wait_ops = { .get = dmatest_wait_get, .set = param_set_bool, }; diff --git a/kernel/drivers/dma/dw/Kconfig b/kernel/drivers/dma/dw/Kconfig index 36e02f0f6..e00c9b022 100644 --- a/kernel/drivers/dma/dw/Kconfig +++ b/kernel/drivers/dma/dw/Kconfig @@ -6,6 +6,9 @@ config DW_DMAC_CORE tristate select DMA_ENGINE +config DW_DMAC_BIG_ENDIAN_IO + bool + config DW_DMAC tristate "Synopsys DesignWare AHB DMA platform driver" select DW_DMAC_CORE @@ -23,6 +26,3 @@ config DW_DMAC_PCI Support the Synopsys DesignWare AHB DMA controller on the platfroms that enumerate it as a PCI device. For example, Intel Medfield has integrated this GPDMA controller. - -config DW_DMAC_BIG_ENDIAN_IO - bool diff --git a/kernel/drivers/dma/dw/core.c b/kernel/drivers/dma/dw/core.c index 1022c2e1a..4f099ea29 100644 --- a/kernel/drivers/dma/dw/core.c +++ b/kernel/drivers/dma/dw/core.c @@ -163,7 +163,7 @@ static void dwc_initialize(struct dw_dma_chan *dwc) /*----------------------------------------------------------------------*/ -static inline unsigned int dwc_fast_fls(unsigned long long v) +static inline unsigned int dwc_fast_ffs(unsigned long long v) { /* * We can be a lot more clever here, but this should take care @@ -536,16 +536,17 @@ EXPORT_SYMBOL(dw_dma_get_dst_addr); /* Called with dwc->lock held and all DMAC interrupts disabled */ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, - u32 status_err, u32 status_xfer) + u32 status_block, u32 status_err, u32 status_xfer) { unsigned long flags; - if (dwc->mask) { + if (status_block & dwc->mask) { void (*callback)(void *param); void *callback_param; dev_vdbg(chan2dev(&dwc->chan), "new cyclic period llp 0x%08x\n", channel_readl(dwc, LLP)); + dma_writel(dw, CLEAR.BLOCK, dwc->mask); callback = dwc->cdesc->period_callback; callback_param = dwc->cdesc->period_callback_param; @@ -577,6 +578,7 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, channel_writel(dwc, CTL_LO, 0); channel_writel(dwc, CTL_HI, 0); + dma_writel(dw, CLEAR.BLOCK, dwc->mask); dma_writel(dw, CLEAR.ERROR, dwc->mask); dma_writel(dw, CLEAR.XFER, dwc->mask); @@ -585,6 +587,9 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, spin_unlock_irqrestore(&dwc->lock, flags); } + + /* Re-enable interrupts */ + channel_set_bit(dw, MASK.BLOCK, dwc->mask); } /* ------------------------------------------------------------------------- */ @@ -593,10 +598,12 @@ static void dw_dma_tasklet(unsigned long data) { struct dw_dma *dw = (struct dw_dma *)data; struct dw_dma_chan *dwc; + u32 status_block; u32 status_xfer; u32 status_err; int i; + status_block = dma_readl(dw, RAW.BLOCK); status_xfer = dma_readl(dw, RAW.XFER); status_err = dma_readl(dw, RAW.ERROR); @@ -605,16 +612,15 @@ static void dw_dma_tasklet(unsigned long data) for (i = 0; i < dw->dma.chancnt; i++) { dwc = &dw->chan[i]; if (test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) - dwc_handle_cyclic(dw, dwc, status_err, status_xfer); + dwc_handle_cyclic(dw, dwc, status_block, status_err, + status_xfer); else if (status_err & (1 << i)) dwc_handle_error(dw, dwc); else if (status_xfer & (1 << i)) dwc_scan_descriptors(dw, dwc); } - /* - * Re-enable interrupts. - */ + /* Re-enable interrupts */ channel_set_bit(dw, MASK.XFER, dw->all_chan_mask); channel_set_bit(dw, MASK.ERROR, dw->all_chan_mask); } @@ -635,6 +641,7 @@ static irqreturn_t dw_dma_interrupt(int irq, void *dev_id) * softirq handler. */ channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask); + channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask); channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask); status = dma_readl(dw, STATUS_INT); @@ -645,6 +652,7 @@ static irqreturn_t dw_dma_interrupt(int irq, void *dev_id) /* Try to recover */ channel_clear_bit(dw, MASK.XFER, (1 << 8) - 1); + channel_clear_bit(dw, MASK.BLOCK, (1 << 8) - 1); channel_clear_bit(dw, MASK.SRC_TRAN, (1 << 8) - 1); channel_clear_bit(dw, MASK.DST_TRAN, (1 << 8) - 1); channel_clear_bit(dw, MASK.ERROR, (1 << 8) - 1); @@ -712,7 +720,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, dw->data_width[dwc->dst_master]); src_width = dst_width = min_t(unsigned int, data_width, - dwc_fast_fls(src | dest | len)); + dwc_fast_ffs(src | dest | len)); ctllo = DWC_DEFAULT_CTLLO(chan) | DWC_CTLL_DST_WIDTH(dst_width) @@ -791,7 +799,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, switch (direction) { case DMA_MEM_TO_DEV: - reg_width = __fls(sconfig->dst_addr_width); + reg_width = __ffs(sconfig->dst_addr_width); reg = sconfig->dst_addr; ctllo = (DWC_DEFAULT_CTLLO(chan) | DWC_CTLL_DST_WIDTH(reg_width) @@ -811,7 +819,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, len = sg_dma_len(sg); mem_width = min_t(unsigned int, - data_width, dwc_fast_fls(mem | len)); + data_width, dwc_fast_ffs(mem | len)); slave_sg_todev_fill_desc: desc = dwc_desc_get(dwc); @@ -848,7 +856,7 @@ slave_sg_todev_fill_desc: } break; case DMA_DEV_TO_MEM: - reg_width = __fls(sconfig->src_addr_width); + reg_width = __ffs(sconfig->src_addr_width); reg = sconfig->src_addr; ctllo = (DWC_DEFAULT_CTLLO(chan) | DWC_CTLL_SRC_WIDTH(reg_width) @@ -868,7 +876,7 @@ slave_sg_todev_fill_desc: len = sg_dma_len(sg); mem_width = min_t(unsigned int, - data_width, dwc_fast_fls(mem | len)); + data_width, dwc_fast_ffs(mem | len)); slave_sg_fromdev_fill_desc: desc = dwc_desc_get(dwc); @@ -1111,6 +1119,7 @@ static void dw_dma_off(struct dw_dma *dw) dma_writel(dw, CFG, 0); channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask); + channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask); channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask); channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask); channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask); @@ -1216,6 +1225,7 @@ static void dwc_free_chan_resources(struct dma_chan *chan) /* Disable interrupts */ channel_clear_bit(dw, MASK.XFER, dwc->mask); + channel_clear_bit(dw, MASK.BLOCK, dwc->mask); channel_clear_bit(dw, MASK.ERROR, dwc->mask); spin_unlock_irqrestore(&dwc->lock, flags); @@ -1245,7 +1255,7 @@ static void dwc_free_chan_resources(struct dma_chan *chan) int dw_dma_cyclic_start(struct dma_chan *chan) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); - struct dw_dma *dw = to_dw_dma(dwc->chan.device); + struct dw_dma *dw = to_dw_dma(chan->device); unsigned long flags; if (!test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) { @@ -1255,25 +1265,10 @@ int dw_dma_cyclic_start(struct dma_chan *chan) spin_lock_irqsave(&dwc->lock, flags); - /* Assert channel is idle */ - if (dma_readl(dw, CH_EN) & dwc->mask) { - dev_err(chan2dev(&dwc->chan), - "%s: BUG: Attempted to start non-idle channel\n", - __func__); - dwc_dump_chan_regs(dwc); - spin_unlock_irqrestore(&dwc->lock, flags); - return -EBUSY; - } - - dma_writel(dw, CLEAR.ERROR, dwc->mask); - dma_writel(dw, CLEAR.XFER, dwc->mask); + /* Enable interrupts to perform cyclic transfer */ + channel_set_bit(dw, MASK.BLOCK, dwc->mask); - /* Setup DMAC channel registers */ - channel_writel(dwc, LLP, dwc->cdesc->desc[0]->txd.phys); - channel_writel(dwc, CTL_LO, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); - channel_writel(dwc, CTL_HI, 0); - - channel_set_bit(dw, CH_EN, dwc->mask); + dwc_dostart(dwc, dwc->cdesc->desc[0]); spin_unlock_irqrestore(&dwc->lock, flags); @@ -1479,6 +1474,7 @@ void dw_dma_cyclic_free(struct dma_chan *chan) dwc_chan_disable(dw, dwc); + dma_writel(dw, CLEAR.BLOCK, dwc->mask); dma_writel(dw, CLEAR.ERROR, dwc->mask); dma_writel(dw, CLEAR.XFER, dwc->mask); @@ -1499,9 +1495,8 @@ EXPORT_SYMBOL(dw_dma_cyclic_free); int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) { struct dw_dma *dw; - bool autocfg; + bool autocfg = false; unsigned int dw_params; - unsigned int nr_channels; unsigned int max_blk_size = 0; int err; int i; @@ -1515,33 +1510,42 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) pm_runtime_get_sync(chip->dev); - dw_params = dma_read_byaddr(chip->regs, DW_PARAMS); - autocfg = dw_params >> DW_PARAMS_EN & 0x1; + if (!pdata) { + dw_params = dma_read_byaddr(chip->regs, DW_PARAMS); + dev_dbg(chip->dev, "DW_PARAMS: 0x%08x\n", dw_params); - dev_dbg(chip->dev, "DW_PARAMS: 0x%08x\n", dw_params); + autocfg = dw_params >> DW_PARAMS_EN & 1; + if (!autocfg) { + err = -EINVAL; + goto err_pdata; + } - if (!pdata && autocfg) { pdata = devm_kzalloc(chip->dev, sizeof(*pdata), GFP_KERNEL); if (!pdata) { err = -ENOMEM; goto err_pdata; } + /* Get hardware configuration parameters */ + pdata->nr_channels = (dw_params >> DW_PARAMS_NR_CHAN & 7) + 1; + pdata->nr_masters = (dw_params >> DW_PARAMS_NR_MASTER & 3) + 1; + for (i = 0; i < pdata->nr_masters; i++) { + pdata->data_width[i] = + (dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3) + 2; + } + max_blk_size = dma_readl(dw, MAX_BLK_SIZE); + /* Fill platform data with the default values */ pdata->is_private = true; + pdata->is_memcpy = true; pdata->chan_allocation_order = CHAN_ALLOCATION_ASCENDING; pdata->chan_priority = CHAN_PRIORITY_ASCENDING; - } else if (!pdata || pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) { + } else if (pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) { err = -EINVAL; goto err_pdata; } - if (autocfg) - nr_channels = (dw_params >> DW_PARAMS_NR_CHAN & 0x7) + 1; - else - nr_channels = pdata->nr_channels; - - dw->chan = devm_kcalloc(chip->dev, nr_channels, sizeof(*dw->chan), + dw->chan = devm_kcalloc(chip->dev, pdata->nr_channels, sizeof(*dw->chan), GFP_KERNEL); if (!dw->chan) { err = -ENOMEM; @@ -1549,29 +1553,16 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) } /* Get hardware configuration parameters */ - if (autocfg) { - max_blk_size = dma_readl(dw, MAX_BLK_SIZE); - - dw->nr_masters = (dw_params >> DW_PARAMS_NR_MASTER & 3) + 1; - for (i = 0; i < dw->nr_masters; i++) { - dw->data_width[i] = - (dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3) + 2; - } - } else { - dw->nr_masters = pdata->nr_masters; - for (i = 0; i < dw->nr_masters; i++) - dw->data_width[i] = pdata->data_width[i]; - } + dw->nr_masters = pdata->nr_masters; + for (i = 0; i < dw->nr_masters; i++) + dw->data_width[i] = pdata->data_width[i]; /* Calculate all channel mask before DMA setup */ - dw->all_chan_mask = (1 << nr_channels) - 1; + dw->all_chan_mask = (1 << pdata->nr_channels) - 1; /* Force dma off, just in case */ dw_dma_off(dw); - /* Disable BLOCK interrupts as well */ - channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask); - /* Create a pool of consistent memory blocks for hardware descriptors */ dw->desc_pool = dmam_pool_create("dw_dmac_desc_pool", chip->dev, sizeof(struct dw_desc), 4, 0); @@ -1589,9 +1580,8 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) goto err_pdata; INIT_LIST_HEAD(&dw->dma.channels); - for (i = 0; i < nr_channels; i++) { + for (i = 0; i < pdata->nr_channels; i++) { struct dw_dma_chan *dwc = &dw->chan[i]; - int r = nr_channels - i - 1; dwc->chan.device = &dw->dma; dma_cookie_init(&dwc->chan); @@ -1603,7 +1593,7 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) /* 7 is highest priority & 0 is lowest. */ if (pdata->chan_priority == CHAN_PRIORITY_ASCENDING) - dwc->priority = r; + dwc->priority = pdata->nr_channels - i - 1; else dwc->priority = i; @@ -1622,6 +1612,7 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) /* Hardware configuration */ if (autocfg) { unsigned int dwc_params; + unsigned int r = DW_DMA_MAX_NR_CHANNELS - i - 1; void __iomem *addr = chip->regs + r * sizeof(u32); dwc_params = dma_read_byaddr(addr, DWC_PARAMS); @@ -1656,10 +1647,13 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) dma_writel(dw, CLEAR.DST_TRAN, dw->all_chan_mask); dma_writel(dw, CLEAR.ERROR, dw->all_chan_mask); - dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask); + /* Set capabilities */ dma_cap_set(DMA_SLAVE, dw->dma.cap_mask); if (pdata->is_private) dma_cap_set(DMA_PRIVATE, dw->dma.cap_mask); + if (pdata->is_memcpy) + dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask); + dw->dma.dev = chip->dev; dw->dma.device_alloc_chan_resources = dwc_alloc_chan_resources; dw->dma.device_free_chan_resources = dwc_free_chan_resources; @@ -1687,7 +1681,7 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) goto err_dma_register; dev_info(chip->dev, "DesignWare DMA Controller, %d channels\n", - nr_channels); + pdata->nr_channels); pm_runtime_put_sync_suspend(chip->dev); @@ -1746,4 +1740,4 @@ EXPORT_SYMBOL_GPL(dw_dma_enable); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller core driver"); MODULE_AUTHOR("Haavard Skinnemoen (Atmel)"); -MODULE_AUTHOR("Viresh Kumar <viresh.linux@gmail.com>"); +MODULE_AUTHOR("Viresh Kumar <vireshk@kernel.org>"); diff --git a/kernel/drivers/dma/dw/pci.c b/kernel/drivers/dma/dw/pci.c index b144706b3..4c30fdd09 100644 --- a/kernel/drivers/dma/dw/pci.c +++ b/kernel/drivers/dma/dw/pci.c @@ -15,12 +15,6 @@ #include "internal.h" -static struct dw_dma_platform_data dw_pci_pdata = { - .is_private = 1, - .chan_allocation_order = CHAN_ALLOCATION_ASCENDING, - .chan_priority = CHAN_PRIORITY_ASCENDING, -}; - static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid) { struct dw_dma_chip *chip; @@ -101,19 +95,19 @@ static const struct dev_pm_ops dw_pci_dev_pm_ops = { static const struct pci_device_id dw_pci_id_table[] = { /* Medfield */ - { PCI_VDEVICE(INTEL, 0x0827), (kernel_ulong_t)&dw_pci_pdata }, - { PCI_VDEVICE(INTEL, 0x0830), (kernel_ulong_t)&dw_pci_pdata }, + { PCI_VDEVICE(INTEL, 0x0827) }, + { PCI_VDEVICE(INTEL, 0x0830) }, /* BayTrail */ - { PCI_VDEVICE(INTEL, 0x0f06), (kernel_ulong_t)&dw_pci_pdata }, - { PCI_VDEVICE(INTEL, 0x0f40), (kernel_ulong_t)&dw_pci_pdata }, + { PCI_VDEVICE(INTEL, 0x0f06) }, + { PCI_VDEVICE(INTEL, 0x0f40) }, /* Braswell */ - { PCI_VDEVICE(INTEL, 0x2286), (kernel_ulong_t)&dw_pci_pdata }, - { PCI_VDEVICE(INTEL, 0x22c0), (kernel_ulong_t)&dw_pci_pdata }, + { PCI_VDEVICE(INTEL, 0x2286) }, + { PCI_VDEVICE(INTEL, 0x22c0) }, /* Haswell */ - { PCI_VDEVICE(INTEL, 0x9c60), (kernel_ulong_t)&dw_pci_pdata }, + { PCI_VDEVICE(INTEL, 0x9c60) }, { } }; MODULE_DEVICE_TABLE(pci, dw_pci_id_table); diff --git a/kernel/drivers/dma/dw/platform.c b/kernel/drivers/dma/dw/platform.c index b2c3ae071..68a481575 100644 --- a/kernel/drivers/dma/dw/platform.c +++ b/kernel/drivers/dma/dw/platform.c @@ -155,6 +155,7 @@ static int dw_probe(struct platform_device *pdev) struct dw_dma_chip *chip; struct device *dev = &pdev->dev; struct resource *mem; + const struct acpi_device_id *id; struct dw_dma_platform_data *pdata; int err; @@ -178,6 +179,11 @@ static int dw_probe(struct platform_device *pdev) pdata = dev_get_platdata(dev); if (!pdata) pdata = dw_dma_parse_dt(pdev); + if (!pdata && has_acpi_companion(dev)) { + id = acpi_match_device(dev->driver->acpi_match_table, dev); + if (id) + pdata = (struct dw_dma_platform_data *)id->driver_data; + } chip->dev = dev; @@ -246,8 +252,17 @@ MODULE_DEVICE_TABLE(of, dw_dma_of_id_table); #endif #ifdef CONFIG_ACPI +static struct dw_dma_platform_data dw_dma_acpi_pdata = { + .nr_channels = 8, + .is_private = true, + .chan_allocation_order = CHAN_ALLOCATION_ASCENDING, + .chan_priority = CHAN_PRIORITY_ASCENDING, + .block_size = 4095, + .nr_masters = 2, +}; + static const struct acpi_device_id dw_dma_acpi_id_table[] = { - { "INTL9C60", 0 }, + { "INTL9C60", (kernel_ulong_t)&dw_dma_acpi_pdata }, { } }; MODULE_DEVICE_TABLE(acpi, dw_dma_acpi_id_table); diff --git a/kernel/drivers/dma/edma.c b/kernel/drivers/dma/edma.c index bf09db7ca..16fe773fb 100644 --- a/kernel/drivers/dma/edma.c +++ b/kernel/drivers/dma/edma.c @@ -25,28 +25,93 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/of.h> +#include <linux/of_dma.h> +#include <linux/of_irq.h> +#include <linux/of_address.h> +#include <linux/of_device.h> +#include <linux/pm_runtime.h> #include <linux/platform_data/edma.h> #include "dmaengine.h" #include "virt-dma.h" -/* - * This will go away when the private EDMA API is folded - * into this driver and the platform device(s) are - * instantiated in the arch code. We can only get away - * with this simplification because DA8XX may not be built - * in the same kernel image with other DaVinci parts. This - * avoids having to sprinkle dmaengine driver platform devices - * and data throughout all the existing board files. - */ -#ifdef CONFIG_ARCH_DAVINCI_DA8XX -#define EDMA_CTLRS 2 -#define EDMA_CHANS 32 -#else -#define EDMA_CTLRS 1 -#define EDMA_CHANS 64 -#endif /* CONFIG_ARCH_DAVINCI_DA8XX */ +/* Offsets matching "struct edmacc_param" */ +#define PARM_OPT 0x00 +#define PARM_SRC 0x04 +#define PARM_A_B_CNT 0x08 +#define PARM_DST 0x0c +#define PARM_SRC_DST_BIDX 0x10 +#define PARM_LINK_BCNTRLD 0x14 +#define PARM_SRC_DST_CIDX 0x18 +#define PARM_CCNT 0x1c + +#define PARM_SIZE 0x20 + +/* Offsets for EDMA CC global channel registers and their shadows */ +#define SH_ER 0x00 /* 64 bits */ +#define SH_ECR 0x08 /* 64 bits */ +#define SH_ESR 0x10 /* 64 bits */ +#define SH_CER 0x18 /* 64 bits */ +#define SH_EER 0x20 /* 64 bits */ +#define SH_EECR 0x28 /* 64 bits */ +#define SH_EESR 0x30 /* 64 bits */ +#define SH_SER 0x38 /* 64 bits */ +#define SH_SECR 0x40 /* 64 bits */ +#define SH_IER 0x50 /* 64 bits */ +#define SH_IECR 0x58 /* 64 bits */ +#define SH_IESR 0x60 /* 64 bits */ +#define SH_IPR 0x68 /* 64 bits */ +#define SH_ICR 0x70 /* 64 bits */ +#define SH_IEVAL 0x78 +#define SH_QER 0x80 +#define SH_QEER 0x84 +#define SH_QEECR 0x88 +#define SH_QEESR 0x8c +#define SH_QSER 0x90 +#define SH_QSECR 0x94 +#define SH_SIZE 0x200 + +/* Offsets for EDMA CC global registers */ +#define EDMA_REV 0x0000 +#define EDMA_CCCFG 0x0004 +#define EDMA_QCHMAP 0x0200 /* 8 registers */ +#define EDMA_DMAQNUM 0x0240 /* 8 registers (4 on OMAP-L1xx) */ +#define EDMA_QDMAQNUM 0x0260 +#define EDMA_QUETCMAP 0x0280 +#define EDMA_QUEPRI 0x0284 +#define EDMA_EMR 0x0300 /* 64 bits */ +#define EDMA_EMCR 0x0308 /* 64 bits */ +#define EDMA_QEMR 0x0310 +#define EDMA_QEMCR 0x0314 +#define EDMA_CCERR 0x0318 +#define EDMA_CCERRCLR 0x031c +#define EDMA_EEVAL 0x0320 +#define EDMA_DRAE 0x0340 /* 4 x 64 bits*/ +#define EDMA_QRAE 0x0380 /* 4 registers */ +#define EDMA_QUEEVTENTRY 0x0400 /* 2 x 16 registers */ +#define EDMA_QSTAT 0x0600 /* 2 registers */ +#define EDMA_QWMTHRA 0x0620 +#define EDMA_QWMTHRB 0x0624 +#define EDMA_CCSTAT 0x0640 + +#define EDMA_M 0x1000 /* global channel registers */ +#define EDMA_ECR 0x1008 +#define EDMA_ECRH 0x100C +#define EDMA_SHADOW0 0x2000 /* 4 shadow regions */ +#define EDMA_PARM 0x4000 /* PaRAM entries */ + +#define PARM_OFFSET(param_no) (EDMA_PARM + ((param_no) << 5)) + +#define EDMA_DCHMAP 0x0100 /* 64 registers */ + +/* CCCFG register */ +#define GET_NUM_DMACH(x) (x & 0x7) /* bits 0-2 */ +#define GET_NUM_QDMACH(x) ((x & 0x70) >> 4) /* bits 4-6 */ +#define GET_NUM_PAENTRY(x) ((x & 0x7000) >> 12) /* bits 12-14 */ +#define GET_NUM_EVQUE(x) ((x & 0x70000) >> 16) /* bits 16-18 */ +#define GET_NUM_REGN(x) ((x & 0x300000) >> 20) /* bits 20-21 */ +#define CHMAP_EXIST BIT(24) /* * Max of 20 segments per channel to conserve PaRAM slots @@ -59,6 +124,37 @@ #define EDMA_MAX_SLOTS MAX_NR_SG #define EDMA_DESCRIPTORS 16 +#define EDMA_CHANNEL_ANY -1 /* for edma_alloc_channel() */ +#define EDMA_SLOT_ANY -1 /* for edma_alloc_slot() */ +#define EDMA_CONT_PARAMS_ANY 1001 +#define EDMA_CONT_PARAMS_FIXED_EXACT 1002 +#define EDMA_CONT_PARAMS_FIXED_NOT_EXACT 1003 + +/* PaRAM slots are laid out like this */ +struct edmacc_param { + u32 opt; + u32 src; + u32 a_b_cnt; + u32 dst; + u32 src_dst_bidx; + u32 link_bcntrld; + u32 src_dst_cidx; + u32 ccnt; +} __packed; + +/* fields in edmacc_param.opt */ +#define SAM BIT(0) +#define DAM BIT(1) +#define SYNCDIM BIT(2) +#define STATIC BIT(3) +#define EDMA_FWID (0x07 << 8) +#define TCCMODE BIT(11) +#define EDMA_TCC(t) ((t) << 12) +#define TCINTEN BIT(20) +#define ITCINTEN BIT(21) +#define TCCHEN BIT(22) +#define ITCCHEN BIT(23) + struct edma_pset { u32 len; dma_addr_t addr; @@ -105,26 +201,524 @@ struct edma_desc { struct edma_cc; +struct edma_tc { + struct device_node *node; + u16 id; +}; + struct edma_chan { struct virt_dma_chan vchan; struct list_head node; struct edma_desc *edesc; struct edma_cc *ecc; + struct edma_tc *tc; int ch_num; bool alloced; + bool hw_triggered; int slot[EDMA_MAX_SLOTS]; int missed; struct dma_slave_config cfg; }; struct edma_cc { - int ctlr; + struct device *dev; + struct edma_soc_info *info; + void __iomem *base; + int id; + bool legacy_mode; + + /* eDMA3 resource information */ + unsigned num_channels; + unsigned num_qchannels; + unsigned num_region; + unsigned num_slots; + unsigned num_tc; + bool chmap_exist; + enum dma_event_q default_queue; + + /* + * The slot_inuse bit for each PaRAM slot is clear unless the slot is + * in use by Linux or if it is allocated to be used by DSP. + */ + unsigned long *slot_inuse; + struct dma_device dma_slave; - struct edma_chan slave_chans[EDMA_CHANS]; - int num_slave_chans; + struct dma_device *dma_memcpy; + struct edma_chan *slave_chans; + struct edma_tc *tc_list; int dummy_slot; }; +/* dummy param set used to (re)initialize parameter RAM slots */ +static const struct edmacc_param dummy_paramset = { + .link_bcntrld = 0xffff, + .ccnt = 1, +}; + +#define EDMA_BINDING_LEGACY 0 +#define EDMA_BINDING_TPCC 1 +static const struct of_device_id edma_of_ids[] = { + { + .compatible = "ti,edma3", + .data = (void *)EDMA_BINDING_LEGACY, + }, + { + .compatible = "ti,edma3-tpcc", + .data = (void *)EDMA_BINDING_TPCC, + }, + {} +}; + +static const struct of_device_id edma_tptc_of_ids[] = { + { .compatible = "ti,edma3-tptc", }, + {} +}; + +static inline unsigned int edma_read(struct edma_cc *ecc, int offset) +{ + return (unsigned int)__raw_readl(ecc->base + offset); +} + +static inline void edma_write(struct edma_cc *ecc, int offset, int val) +{ + __raw_writel(val, ecc->base + offset); +} + +static inline void edma_modify(struct edma_cc *ecc, int offset, unsigned and, + unsigned or) +{ + unsigned val = edma_read(ecc, offset); + + val &= and; + val |= or; + edma_write(ecc, offset, val); +} + +static inline void edma_and(struct edma_cc *ecc, int offset, unsigned and) +{ + unsigned val = edma_read(ecc, offset); + + val &= and; + edma_write(ecc, offset, val); +} + +static inline void edma_or(struct edma_cc *ecc, int offset, unsigned or) +{ + unsigned val = edma_read(ecc, offset); + + val |= or; + edma_write(ecc, offset, val); +} + +static inline unsigned int edma_read_array(struct edma_cc *ecc, int offset, + int i) +{ + return edma_read(ecc, offset + (i << 2)); +} + +static inline void edma_write_array(struct edma_cc *ecc, int offset, int i, + unsigned val) +{ + edma_write(ecc, offset + (i << 2), val); +} + +static inline void edma_modify_array(struct edma_cc *ecc, int offset, int i, + unsigned and, unsigned or) +{ + edma_modify(ecc, offset + (i << 2), and, or); +} + +static inline void edma_or_array(struct edma_cc *ecc, int offset, int i, + unsigned or) +{ + edma_or(ecc, offset + (i << 2), or); +} + +static inline void edma_or_array2(struct edma_cc *ecc, int offset, int i, int j, + unsigned or) +{ + edma_or(ecc, offset + ((i * 2 + j) << 2), or); +} + +static inline void edma_write_array2(struct edma_cc *ecc, int offset, int i, + int j, unsigned val) +{ + edma_write(ecc, offset + ((i * 2 + j) << 2), val); +} + +static inline unsigned int edma_shadow0_read(struct edma_cc *ecc, int offset) +{ + return edma_read(ecc, EDMA_SHADOW0 + offset); +} + +static inline unsigned int edma_shadow0_read_array(struct edma_cc *ecc, + int offset, int i) +{ + return edma_read(ecc, EDMA_SHADOW0 + offset + (i << 2)); +} + +static inline void edma_shadow0_write(struct edma_cc *ecc, int offset, + unsigned val) +{ + edma_write(ecc, EDMA_SHADOW0 + offset, val); +} + +static inline void edma_shadow0_write_array(struct edma_cc *ecc, int offset, + int i, unsigned val) +{ + edma_write(ecc, EDMA_SHADOW0 + offset + (i << 2), val); +} + +static inline unsigned int edma_param_read(struct edma_cc *ecc, int offset, + int param_no) +{ + return edma_read(ecc, EDMA_PARM + offset + (param_no << 5)); +} + +static inline void edma_param_write(struct edma_cc *ecc, int offset, + int param_no, unsigned val) +{ + edma_write(ecc, EDMA_PARM + offset + (param_no << 5), val); +} + +static inline void edma_param_modify(struct edma_cc *ecc, int offset, + int param_no, unsigned and, unsigned or) +{ + edma_modify(ecc, EDMA_PARM + offset + (param_no << 5), and, or); +} + +static inline void edma_param_and(struct edma_cc *ecc, int offset, int param_no, + unsigned and) +{ + edma_and(ecc, EDMA_PARM + offset + (param_no << 5), and); +} + +static inline void edma_param_or(struct edma_cc *ecc, int offset, int param_no, + unsigned or) +{ + edma_or(ecc, EDMA_PARM + offset + (param_no << 5), or); +} + +static inline void set_bits(int offset, int len, unsigned long *p) +{ + for (; len > 0; len--) + set_bit(offset + (len - 1), p); +} + +static inline void clear_bits(int offset, int len, unsigned long *p) +{ + for (; len > 0; len--) + clear_bit(offset + (len - 1), p); +} + +static void edma_assign_priority_to_queue(struct edma_cc *ecc, int queue_no, + int priority) +{ + int bit = queue_no * 4; + + edma_modify(ecc, EDMA_QUEPRI, ~(0x7 << bit), ((priority & 0x7) << bit)); +} + +static void edma_set_chmap(struct edma_chan *echan, int slot) +{ + struct edma_cc *ecc = echan->ecc; + int channel = EDMA_CHAN_SLOT(echan->ch_num); + + if (ecc->chmap_exist) { + slot = EDMA_CHAN_SLOT(slot); + edma_write_array(ecc, EDMA_DCHMAP, channel, (slot << 5)); + } +} + +static void edma_setup_interrupt(struct edma_chan *echan, bool enable) +{ + struct edma_cc *ecc = echan->ecc; + int channel = EDMA_CHAN_SLOT(echan->ch_num); + + if (enable) { + edma_shadow0_write_array(ecc, SH_ICR, channel >> 5, + BIT(channel & 0x1f)); + edma_shadow0_write_array(ecc, SH_IESR, channel >> 5, + BIT(channel & 0x1f)); + } else { + edma_shadow0_write_array(ecc, SH_IECR, channel >> 5, + BIT(channel & 0x1f)); + } +} + +/* + * paRAM slot management functions + */ +static void edma_write_slot(struct edma_cc *ecc, unsigned slot, + const struct edmacc_param *param) +{ + slot = EDMA_CHAN_SLOT(slot); + if (slot >= ecc->num_slots) + return; + memcpy_toio(ecc->base + PARM_OFFSET(slot), param, PARM_SIZE); +} + +static void edma_read_slot(struct edma_cc *ecc, unsigned slot, + struct edmacc_param *param) +{ + slot = EDMA_CHAN_SLOT(slot); + if (slot >= ecc->num_slots) + return; + memcpy_fromio(param, ecc->base + PARM_OFFSET(slot), PARM_SIZE); +} + +/** + * edma_alloc_slot - allocate DMA parameter RAM + * @ecc: pointer to edma_cc struct + * @slot: specific slot to allocate; negative for "any unused slot" + * + * This allocates a parameter RAM slot, initializing it to hold a + * dummy transfer. Slots allocated using this routine have not been + * mapped to a hardware DMA channel, and will normally be used by + * linking to them from a slot associated with a DMA channel. + * + * Normal use is to pass EDMA_SLOT_ANY as the @slot, but specific + * slots may be allocated on behalf of DSP firmware. + * + * Returns the number of the slot, else negative errno. + */ +static int edma_alloc_slot(struct edma_cc *ecc, int slot) +{ + if (slot > 0) { + slot = EDMA_CHAN_SLOT(slot); + /* Requesting entry paRAM slot for a HW triggered channel. */ + if (ecc->chmap_exist && slot < ecc->num_channels) + slot = EDMA_SLOT_ANY; + } + + if (slot < 0) { + if (ecc->chmap_exist) + slot = 0; + else + slot = ecc->num_channels; + for (;;) { + slot = find_next_zero_bit(ecc->slot_inuse, + ecc->num_slots, + slot); + if (slot == ecc->num_slots) + return -ENOMEM; + if (!test_and_set_bit(slot, ecc->slot_inuse)) + break; + } + } else if (slot >= ecc->num_slots) { + return -EINVAL; + } else if (test_and_set_bit(slot, ecc->slot_inuse)) { + return -EBUSY; + } + + edma_write_slot(ecc, slot, &dummy_paramset); + + return EDMA_CTLR_CHAN(ecc->id, slot); +} + +static void edma_free_slot(struct edma_cc *ecc, unsigned slot) +{ + slot = EDMA_CHAN_SLOT(slot); + if (slot >= ecc->num_slots) + return; + + edma_write_slot(ecc, slot, &dummy_paramset); + clear_bit(slot, ecc->slot_inuse); +} + +/** + * edma_link - link one parameter RAM slot to another + * @ecc: pointer to edma_cc struct + * @from: parameter RAM slot originating the link + * @to: parameter RAM slot which is the link target + * + * The originating slot should not be part of any active DMA transfer. + */ +static void edma_link(struct edma_cc *ecc, unsigned from, unsigned to) +{ + if (unlikely(EDMA_CTLR(from) != EDMA_CTLR(to))) + dev_warn(ecc->dev, "Ignoring eDMA instance for linking\n"); + + from = EDMA_CHAN_SLOT(from); + to = EDMA_CHAN_SLOT(to); + if (from >= ecc->num_slots || to >= ecc->num_slots) + return; + + edma_param_modify(ecc, PARM_LINK_BCNTRLD, from, 0xffff0000, + PARM_OFFSET(to)); +} + +/** + * edma_get_position - returns the current transfer point + * @ecc: pointer to edma_cc struct + * @slot: parameter RAM slot being examined + * @dst: true selects the dest position, false the source + * + * Returns the position of the current active slot + */ +static dma_addr_t edma_get_position(struct edma_cc *ecc, unsigned slot, + bool dst) +{ + u32 offs; + + slot = EDMA_CHAN_SLOT(slot); + offs = PARM_OFFSET(slot); + offs += dst ? PARM_DST : PARM_SRC; + + return edma_read(ecc, offs); +} + +/* + * Channels with event associations will be triggered by their hardware + * events, and channels without such associations will be triggered by + * software. (At this writing there is no interface for using software + * triggers except with channels that don't support hardware triggers.) + */ +static void edma_start(struct edma_chan *echan) +{ + struct edma_cc *ecc = echan->ecc; + int channel = EDMA_CHAN_SLOT(echan->ch_num); + int j = (channel >> 5); + unsigned int mask = BIT(channel & 0x1f); + + if (!echan->hw_triggered) { + /* EDMA channels without event association */ + dev_dbg(ecc->dev, "ESR%d %08x\n", j, + edma_shadow0_read_array(ecc, SH_ESR, j)); + edma_shadow0_write_array(ecc, SH_ESR, j, mask); + } else { + /* EDMA channel with event association */ + dev_dbg(ecc->dev, "ER%d %08x\n", j, + edma_shadow0_read_array(ecc, SH_ER, j)); + /* Clear any pending event or error */ + edma_write_array(ecc, EDMA_ECR, j, mask); + edma_write_array(ecc, EDMA_EMCR, j, mask); + /* Clear any SER */ + edma_shadow0_write_array(ecc, SH_SECR, j, mask); + edma_shadow0_write_array(ecc, SH_EESR, j, mask); + dev_dbg(ecc->dev, "EER%d %08x\n", j, + edma_shadow0_read_array(ecc, SH_EER, j)); + } +} + +static void edma_stop(struct edma_chan *echan) +{ + struct edma_cc *ecc = echan->ecc; + int channel = EDMA_CHAN_SLOT(echan->ch_num); + int j = (channel >> 5); + unsigned int mask = BIT(channel & 0x1f); + + edma_shadow0_write_array(ecc, SH_EECR, j, mask); + edma_shadow0_write_array(ecc, SH_ECR, j, mask); + edma_shadow0_write_array(ecc, SH_SECR, j, mask); + edma_write_array(ecc, EDMA_EMCR, j, mask); + + /* clear possibly pending completion interrupt */ + edma_shadow0_write_array(ecc, SH_ICR, j, mask); + + dev_dbg(ecc->dev, "EER%d %08x\n", j, + edma_shadow0_read_array(ecc, SH_EER, j)); + + /* REVISIT: consider guarding against inappropriate event + * chaining by overwriting with dummy_paramset. + */ +} + +/* + * Temporarily disable EDMA hardware events on the specified channel, + * preventing them from triggering new transfers + */ +static void edma_pause(struct edma_chan *echan) +{ + int channel = EDMA_CHAN_SLOT(echan->ch_num); + unsigned int mask = BIT(channel & 0x1f); + + edma_shadow0_write_array(echan->ecc, SH_EECR, channel >> 5, mask); +} + +/* Re-enable EDMA hardware events on the specified channel. */ +static void edma_resume(struct edma_chan *echan) +{ + int channel = EDMA_CHAN_SLOT(echan->ch_num); + unsigned int mask = BIT(channel & 0x1f); + + edma_shadow0_write_array(echan->ecc, SH_EESR, channel >> 5, mask); +} + +static void edma_trigger_channel(struct edma_chan *echan) +{ + struct edma_cc *ecc = echan->ecc; + int channel = EDMA_CHAN_SLOT(echan->ch_num); + unsigned int mask = BIT(channel & 0x1f); + + edma_shadow0_write_array(ecc, SH_ESR, (channel >> 5), mask); + + dev_dbg(ecc->dev, "ESR%d %08x\n", (channel >> 5), + edma_shadow0_read_array(ecc, SH_ESR, (channel >> 5))); +} + +static void edma_clean_channel(struct edma_chan *echan) +{ + struct edma_cc *ecc = echan->ecc; + int channel = EDMA_CHAN_SLOT(echan->ch_num); + int j = (channel >> 5); + unsigned int mask = BIT(channel & 0x1f); + + dev_dbg(ecc->dev, "EMR%d %08x\n", j, edma_read_array(ecc, EDMA_EMR, j)); + edma_shadow0_write_array(ecc, SH_ECR, j, mask); + /* Clear the corresponding EMR bits */ + edma_write_array(ecc, EDMA_EMCR, j, mask); + /* Clear any SER */ + edma_shadow0_write_array(ecc, SH_SECR, j, mask); + edma_write(ecc, EDMA_CCERRCLR, BIT(16) | BIT(1) | BIT(0)); +} + +/* Move channel to a specific event queue */ +static void edma_assign_channel_eventq(struct edma_chan *echan, + enum dma_event_q eventq_no) +{ + struct edma_cc *ecc = echan->ecc; + int channel = EDMA_CHAN_SLOT(echan->ch_num); + int bit = (channel & 0x7) * 4; + + /* default to low priority queue */ + if (eventq_no == EVENTQ_DEFAULT) + eventq_no = ecc->default_queue; + if (eventq_no >= ecc->num_tc) + return; + + eventq_no &= 7; + edma_modify_array(ecc, EDMA_DMAQNUM, (channel >> 3), ~(0x7 << bit), + eventq_no << bit); +} + +static int edma_alloc_channel(struct edma_chan *echan, + enum dma_event_q eventq_no) +{ + struct edma_cc *ecc = echan->ecc; + int channel = EDMA_CHAN_SLOT(echan->ch_num); + + /* ensure access through shadow region 0 */ + edma_or_array2(ecc, EDMA_DRAE, 0, channel >> 5, BIT(channel & 0x1f)); + + /* ensure no events are pending */ + edma_stop(echan); + + edma_setup_interrupt(echan, true); + + edma_assign_channel_eventq(echan, eventq_no); + + return 0; +} + +static void edma_free_channel(struct edma_chan *echan) +{ + /* ensure no events are pending */ + edma_stop(echan); + /* REVISIT should probably take out of shadow region 0 */ + edma_setup_interrupt(echan, false); +} + static inline struct edma_cc *to_edma_cc(struct dma_device *d) { return container_of(d, struct edma_cc, dma_slave); @@ -135,8 +729,7 @@ static inline struct edma_chan *to_edma_chan(struct dma_chan *c) return container_of(c, struct edma_chan, vchan.chan); } -static inline struct edma_desc -*to_edma_desc(struct dma_async_tx_descriptor *tx) +static inline struct edma_desc *to_edma_desc(struct dma_async_tx_descriptor *tx) { return container_of(tx, struct edma_desc, vdesc.tx); } @@ -149,20 +742,17 @@ static void edma_desc_free(struct virt_dma_desc *vdesc) /* Dispatch a queued descriptor to the controller (caller holds lock) */ static void edma_execute(struct edma_chan *echan) { + struct edma_cc *ecc = echan->ecc; struct virt_dma_desc *vdesc; struct edma_desc *edesc; struct device *dev = echan->vchan.chan.device->dev; int i, j, left, nslots; - /* If either we processed all psets or we're still not started */ - if (!echan->edesc || - echan->edesc->pset_nr == echan->edesc->processed) { - /* Get next vdesc */ + if (!echan->edesc) { + /* Setup is needed for the first transfer */ vdesc = vchan_next_desc(&echan->vchan); - if (!vdesc) { - echan->edesc = NULL; + if (!vdesc) return; - } list_del(&vdesc->node); echan->edesc = to_edma_desc(&vdesc->tx); } @@ -177,32 +767,32 @@ static void edma_execute(struct edma_chan *echan) /* Write descriptor PaRAM set(s) */ for (i = 0; i < nslots; i++) { j = i + edesc->processed; - edma_write_slot(echan->slot[i], &edesc->pset[j].param); + edma_write_slot(ecc, echan->slot[i], &edesc->pset[j].param); edesc->sg_len += edesc->pset[j].len; - dev_vdbg(echan->vchan.chan.device->dev, - "\n pset[%d]:\n" - " chnum\t%d\n" - " slot\t%d\n" - " opt\t%08x\n" - " src\t%08x\n" - " dst\t%08x\n" - " abcnt\t%08x\n" - " ccnt\t%08x\n" - " bidx\t%08x\n" - " cidx\t%08x\n" - " lkrld\t%08x\n", - j, echan->ch_num, echan->slot[i], - edesc->pset[j].param.opt, - edesc->pset[j].param.src, - edesc->pset[j].param.dst, - edesc->pset[j].param.a_b_cnt, - edesc->pset[j].param.ccnt, - edesc->pset[j].param.src_dst_bidx, - edesc->pset[j].param.src_dst_cidx, - edesc->pset[j].param.link_bcntrld); + dev_vdbg(dev, + "\n pset[%d]:\n" + " chnum\t%d\n" + " slot\t%d\n" + " opt\t%08x\n" + " src\t%08x\n" + " dst\t%08x\n" + " abcnt\t%08x\n" + " ccnt\t%08x\n" + " bidx\t%08x\n" + " cidx\t%08x\n" + " lkrld\t%08x\n", + j, echan->ch_num, echan->slot[i], + edesc->pset[j].param.opt, + edesc->pset[j].param.src, + edesc->pset[j].param.dst, + edesc->pset[j].param.a_b_cnt, + edesc->pset[j].param.ccnt, + edesc->pset[j].param.src_dst_bidx, + edesc->pset[j].param.src_dst_cidx, + edesc->pset[j].param.link_bcntrld); /* Link to the previous slot if not the last set */ if (i != (nslots - 1)) - edma_link(echan->slot[i], echan->slot[i+1]); + edma_link(ecc, echan->slot[i], echan->slot[i + 1]); } edesc->processed += nslots; @@ -214,34 +804,32 @@ static void edma_execute(struct edma_chan *echan) */ if (edesc->processed == edesc->pset_nr) { if (edesc->cyclic) - edma_link(echan->slot[nslots-1], echan->slot[1]); + edma_link(ecc, echan->slot[nslots - 1], echan->slot[1]); else - edma_link(echan->slot[nslots-1], + edma_link(ecc, echan->slot[nslots - 1], echan->ecc->dummy_slot); } - if (edesc->processed <= MAX_NR_SG) { + if (echan->missed) { + /* + * This happens due to setup times between intermediate + * transfers in long SG lists which have to be broken up into + * transfers of MAX_NR_SG + */ + dev_dbg(dev, "missed event on channel %d\n", echan->ch_num); + edma_clean_channel(echan); + edma_stop(echan); + edma_start(echan); + edma_trigger_channel(echan); + echan->missed = 0; + } else if (edesc->processed <= MAX_NR_SG) { dev_dbg(dev, "first transfer starting on channel %d\n", echan->ch_num); - edma_start(echan->ch_num); + edma_start(echan); } else { dev_dbg(dev, "chan: %d: completed %d elements, resuming\n", echan->ch_num, edesc->processed); - edma_resume(echan->ch_num); - } - - /* - * This happens due to setup times between intermediate transfers - * in long SG lists which have to be broken up into transfers of - * MAX_NR_SG - */ - if (echan->missed) { - dev_dbg(dev, "missed event on channel %d\n", echan->ch_num); - edma_clean_channel(echan->ch_num); - edma_stop(echan->ch_num); - edma_start(echan->ch_num); - edma_trigger_channel(echan->ch_num); - echan->missed = 0; + edma_resume(echan); } } @@ -259,20 +847,16 @@ static int edma_terminate_all(struct dma_chan *chan) * echan->edesc is NULL and exit.) */ if (echan->edesc) { - int cyclic = echan->edesc->cyclic; - + edma_stop(echan); + /* Move the cyclic channel back to default queue */ + if (!echan->tc && echan->edesc->cyclic) + edma_assign_channel_eventq(echan, EVENTQ_DEFAULT); /* * free the running request descriptor * since it is not in any of the vdesc lists */ edma_desc_free(&echan->edesc->vdesc); - echan->edesc = NULL; - edma_stop(echan->ch_num); - /* Move the cyclic channel back to default queue */ - if (cyclic) - edma_assign_channel_eventq(echan->ch_num, - EVENTQ_DEFAULT); } vchan_get_all_descriptors(&echan->vchan, &head); @@ -300,11 +884,10 @@ static int edma_dma_pause(struct dma_chan *chan) { struct edma_chan *echan = to_edma_chan(chan); - /* Pause/Resume only allowed with cyclic mode */ - if (!echan->edesc || !echan->edesc->cyclic) + if (!echan->edesc) return -EINVAL; - edma_pause(echan->ch_num); + edma_pause(echan); return 0; } @@ -312,11 +895,7 @@ static int edma_dma_resume(struct dma_chan *chan) { struct edma_chan *echan = to_edma_chan(chan); - /* Pause/Resume only allowed with cyclic mode */ - if (!echan->edesc->cyclic) - return -EINVAL; - - edma_resume(echan->ch_num); + edma_resume(echan); return 0; } @@ -332,19 +911,17 @@ static int edma_dma_resume(struct dma_chan *chan) * @direction: Direction of the transfer */ static int edma_config_pset(struct dma_chan *chan, struct edma_pset *epset, - dma_addr_t src_addr, dma_addr_t dst_addr, u32 burst, - enum dma_slave_buswidth dev_width, unsigned int dma_length, - enum dma_transfer_direction direction) + dma_addr_t src_addr, dma_addr_t dst_addr, u32 burst, + unsigned int acnt, unsigned int dma_length, + enum dma_transfer_direction direction) { struct edma_chan *echan = to_edma_chan(chan); struct device *dev = chan->device->dev; struct edmacc_param *param = &epset->param; - int acnt, bcnt, ccnt, cidx; + int bcnt, ccnt, cidx; int src_bidx, dst_bidx, src_cidx, dst_cidx; int absync; - acnt = dev_width; - /* src/dst_maxburst == 0 is the same case as src/dst_maxburst == 1 */ if (!burst) burst = 1; @@ -480,8 +1057,8 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg( return NULL; } - edesc = kzalloc(sizeof(*edesc) + sg_len * - sizeof(edesc->pset[0]), GFP_ATOMIC); + edesc = kzalloc(sizeof(*edesc) + sg_len * sizeof(edesc->pset[0]), + GFP_ATOMIC); if (!edesc) { dev_err(dev, "%s: Failed to allocate a descriptor\n", __func__); return NULL; @@ -498,8 +1075,7 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg( for (i = 0; i < nslots; i++) { if (echan->slot[i] < 0) { echan->slot[i] = - edma_alloc_slot(EDMA_CTLR(echan->ch_num), - EDMA_SLOT_ANY); + edma_alloc_slot(echan->ecc, EDMA_SLOT_ANY); if (echan->slot[i] < 0) { kfree(edesc); dev_err(dev, "%s: Failed to allocate slot\n", @@ -546,36 +1122,98 @@ static struct dma_async_tx_descriptor *edma_prep_dma_memcpy( struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, size_t len, unsigned long tx_flags) { - int ret; + int ret, nslots; struct edma_desc *edesc; struct device *dev = chan->device->dev; struct edma_chan *echan = to_edma_chan(chan); + unsigned int width, pset_len; if (unlikely(!echan || !len)) return NULL; - edesc = kzalloc(sizeof(*edesc) + sizeof(edesc->pset[0]), GFP_ATOMIC); + if (len < SZ_64K) { + /* + * Transfer size less than 64K can be handled with one paRAM + * slot and with one burst. + * ACNT = length + */ + width = len; + pset_len = len; + nslots = 1; + } else { + /* + * Transfer size bigger than 64K will be handled with maximum of + * two paRAM slots. + * slot1: (full_length / 32767) times 32767 bytes bursts. + * ACNT = 32767, length1: (full_length / 32767) * 32767 + * slot2: the remaining amount of data after slot1. + * ACNT = full_length - length1, length2 = ACNT + * + * When the full_length is multibple of 32767 one slot can be + * used to complete the transfer. + */ + width = SZ_32K - 1; + pset_len = rounddown(len, width); + /* One slot is enough for lengths multiple of (SZ_32K -1) */ + if (unlikely(pset_len == len)) + nslots = 1; + else + nslots = 2; + } + + edesc = kzalloc(sizeof(*edesc) + nslots * sizeof(edesc->pset[0]), + GFP_ATOMIC); if (!edesc) { dev_dbg(dev, "Failed to allocate a descriptor\n"); return NULL; } - edesc->pset_nr = 1; + edesc->pset_nr = nslots; + edesc->residue = edesc->residue_stat = len; + edesc->direction = DMA_MEM_TO_MEM; + edesc->echan = echan; ret = edma_config_pset(chan, &edesc->pset[0], src, dest, 1, - DMA_SLAVE_BUSWIDTH_4_BYTES, len, DMA_MEM_TO_MEM); - if (ret < 0) + width, pset_len, DMA_MEM_TO_MEM); + if (ret < 0) { + kfree(edesc); return NULL; + } edesc->absync = ret; - /* - * Enable intermediate transfer chaining to re-trigger channel - * on completion of every TR, and enable transfer-completion - * interrupt on completion of the whole transfer. - */ edesc->pset[0].param.opt |= ITCCHEN; - edesc->pset[0].param.opt |= TCINTEN; + if (nslots == 1) { + /* Enable transfer complete interrupt */ + edesc->pset[0].param.opt |= TCINTEN; + } else { + /* Enable transfer complete chaining for the first slot */ + edesc->pset[0].param.opt |= TCCHEN; + + if (echan->slot[1] < 0) { + echan->slot[1] = edma_alloc_slot(echan->ecc, + EDMA_SLOT_ANY); + if (echan->slot[1] < 0) { + kfree(edesc); + dev_err(dev, "%s: Failed to allocate slot\n", + __func__); + return NULL; + } + } + dest += pset_len; + src += pset_len; + pset_len = width = len % (SZ_32K - 1); + + ret = edma_config_pset(chan, &edesc->pset[1], src, dest, 1, + width, pset_len, DMA_MEM_TO_MEM); + if (ret < 0) { + kfree(edesc); + return NULL; + } + + edesc->pset[1].param.opt |= ITCCHEN; + edesc->pset[1].param.opt |= TCINTEN; + } return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags); } @@ -634,8 +1272,8 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic( if (nslots > MAX_NR_SG) return NULL; - edesc = kzalloc(sizeof(*edesc) + nslots * - sizeof(edesc->pset[0]), GFP_ATOMIC); + edesc = kzalloc(sizeof(*edesc) + nslots * sizeof(edesc->pset[0]), + GFP_ATOMIC); if (!edesc) { dev_err(dev, "%s: Failed to allocate a descriptor\n", __func__); return NULL; @@ -654,8 +1292,7 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic( /* Allocate a PaRAM slot, if needed */ if (echan->slot[i] < 0) { echan->slot[i] = - edma_alloc_slot(EDMA_CTLR(echan->ch_num), - EDMA_SLOT_ANY); + edma_alloc_slot(echan->ecc, EDMA_SLOT_ANY); if (echan->slot[i] < 0) { kfree(edesc); dev_err(dev, "%s: Failed to allocate slot\n", @@ -716,128 +1353,281 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic( } /* Place the cyclic channel to highest priority queue */ - edma_assign_channel_eventq(echan->ch_num, EVENTQ_0); + if (!echan->tc) + edma_assign_channel_eventq(echan, EVENTQ_0); return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags); } -static void edma_callback(unsigned ch_num, u16 ch_status, void *data) +static void edma_completion_handler(struct edma_chan *echan) { - struct edma_chan *echan = data; struct device *dev = echan->vchan.chan.device->dev; - struct edma_desc *edesc; - struct edmacc_param p; + struct edma_desc *edesc = echan->edesc; - edesc = echan->edesc; + if (!edesc) + return; - /* Pause the channel for non-cyclic */ - if (!edesc || (edesc && !edesc->cyclic)) - edma_pause(echan->ch_num); - - switch (ch_status) { - case EDMA_DMA_COMPLETE: - spin_lock(&echan->vchan.lock); - - if (edesc) { - if (edesc->cyclic) { - vchan_cyclic_callback(&edesc->vdesc); - } else if (edesc->processed == edesc->pset_nr) { - dev_dbg(dev, "Transfer complete, stopping channel %d\n", ch_num); - edesc->residue = 0; - edma_stop(echan->ch_num); - vchan_cookie_complete(&edesc->vdesc); - edma_execute(echan); - } else { - dev_dbg(dev, "Intermediate transfer complete on channel %d\n", ch_num); - - /* Update statistics for tx_status */ - edesc->residue -= edesc->sg_len; - edesc->residue_stat = edesc->residue; - edesc->processed_stat = edesc->processed; - - edma_execute(echan); - } + spin_lock(&echan->vchan.lock); + if (edesc->cyclic) { + vchan_cyclic_callback(&edesc->vdesc); + spin_unlock(&echan->vchan.lock); + return; + } else if (edesc->processed == edesc->pset_nr) { + edesc->residue = 0; + edma_stop(echan); + vchan_cookie_complete(&edesc->vdesc); + echan->edesc = NULL; + + dev_dbg(dev, "Transfer completed on channel %d\n", + echan->ch_num); + } else { + dev_dbg(dev, "Sub transfer completed on channel %d\n", + echan->ch_num); + + edma_pause(echan); + + /* Update statistics for tx_status */ + edesc->residue -= edesc->sg_len; + edesc->residue_stat = edesc->residue; + edesc->processed_stat = edesc->processed; + } + edma_execute(echan); + + spin_unlock(&echan->vchan.lock); +} + +/* eDMA interrupt handler */ +static irqreturn_t dma_irq_handler(int irq, void *data) +{ + struct edma_cc *ecc = data; + int ctlr; + u32 sh_ier; + u32 sh_ipr; + u32 bank; + + ctlr = ecc->id; + if (ctlr < 0) + return IRQ_NONE; + + dev_vdbg(ecc->dev, "dma_irq_handler\n"); + + sh_ipr = edma_shadow0_read_array(ecc, SH_IPR, 0); + if (!sh_ipr) { + sh_ipr = edma_shadow0_read_array(ecc, SH_IPR, 1); + if (!sh_ipr) + return IRQ_NONE; + sh_ier = edma_shadow0_read_array(ecc, SH_IER, 1); + bank = 1; + } else { + sh_ier = edma_shadow0_read_array(ecc, SH_IER, 0); + bank = 0; + } + + do { + u32 slot; + u32 channel; + + slot = __ffs(sh_ipr); + sh_ipr &= ~(BIT(slot)); + + if (sh_ier & BIT(slot)) { + channel = (bank << 5) | slot; + /* Clear the corresponding IPR bits */ + edma_shadow0_write_array(ecc, SH_ICR, bank, BIT(slot)); + edma_completion_handler(&ecc->slave_chans[channel]); } + } while (sh_ipr); - spin_unlock(&echan->vchan.lock); + edma_shadow0_write(ecc, SH_IEVAL, 1); + return IRQ_HANDLED; +} + +static void edma_error_handler(struct edma_chan *echan) +{ + struct edma_cc *ecc = echan->ecc; + struct device *dev = echan->vchan.chan.device->dev; + struct edmacc_param p; - break; - case EDMA_DMA_CC_ERROR: - spin_lock(&echan->vchan.lock); + if (!echan->edesc) + return; - edma_read_slot(EDMA_CHAN_SLOT(echan->slot[0]), &p); + spin_lock(&echan->vchan.lock); + edma_read_slot(ecc, echan->slot[0], &p); + /* + * Issue later based on missed flag which will be sure + * to happen as: + * (1) we finished transmitting an intermediate slot and + * edma_execute is coming up. + * (2) or we finished current transfer and issue will + * call edma_execute. + * + * Important note: issuing can be dangerous here and + * lead to some nasty recursion when we are in a NULL + * slot. So we avoid doing so and set the missed flag. + */ + if (p.a_b_cnt == 0 && p.ccnt == 0) { + dev_dbg(dev, "Error on null slot, setting miss\n"); + echan->missed = 1; + } else { /* - * Issue later based on missed flag which will be sure - * to happen as: - * (1) we finished transmitting an intermediate slot and - * edma_execute is coming up. - * (2) or we finished current transfer and issue will - * call edma_execute. - * - * Important note: issuing can be dangerous here and - * lead to some nasty recursion when we are in a NULL - * slot. So we avoid doing so and set the missed flag. + * The slot is already programmed but the event got + * missed, so its safe to issue it here. */ - if (p.a_b_cnt == 0 && p.ccnt == 0) { - dev_dbg(dev, "Error occurred, looks like slot is null, just setting miss\n"); - echan->missed = 1; - } else { - /* - * The slot is already programmed but the event got - * missed, so its safe to issue it here. - */ - dev_dbg(dev, "Error occurred but slot is non-null, TRIGGERING\n"); - edma_clean_channel(echan->ch_num); - edma_stop(echan->ch_num); - edma_start(echan->ch_num); - edma_trigger_channel(echan->ch_num); + dev_dbg(dev, "Missed event, TRIGGERING\n"); + edma_clean_channel(echan); + edma_stop(echan); + edma_start(echan); + edma_trigger_channel(echan); + } + spin_unlock(&echan->vchan.lock); +} + +static inline bool edma_error_pending(struct edma_cc *ecc) +{ + if (edma_read_array(ecc, EDMA_EMR, 0) || + edma_read_array(ecc, EDMA_EMR, 1) || + edma_read(ecc, EDMA_QEMR) || edma_read(ecc, EDMA_CCERR)) + return true; + + return false; +} + +/* eDMA error interrupt handler */ +static irqreturn_t dma_ccerr_handler(int irq, void *data) +{ + struct edma_cc *ecc = data; + int i, j; + int ctlr; + unsigned int cnt = 0; + unsigned int val; + + ctlr = ecc->id; + if (ctlr < 0) + return IRQ_NONE; + + dev_vdbg(ecc->dev, "dma_ccerr_handler\n"); + + if (!edma_error_pending(ecc)) + return IRQ_NONE; + + while (1) { + /* Event missed register(s) */ + for (j = 0; j < 2; j++) { + unsigned long emr; + + val = edma_read_array(ecc, EDMA_EMR, j); + if (!val) + continue; + + dev_dbg(ecc->dev, "EMR%d 0x%08x\n", j, val); + emr = val; + for (i = find_next_bit(&emr, 32, 0); i < 32; + i = find_next_bit(&emr, 32, i + 1)) { + int k = (j << 5) + i; + + /* Clear the corresponding EMR bits */ + edma_write_array(ecc, EDMA_EMCR, j, BIT(i)); + /* Clear any SER */ + edma_shadow0_write_array(ecc, SH_SECR, j, + BIT(i)); + edma_error_handler(&ecc->slave_chans[k]); + } } - spin_unlock(&echan->vchan.lock); + val = edma_read(ecc, EDMA_QEMR); + if (val) { + dev_dbg(ecc->dev, "QEMR 0x%02x\n", val); + /* Not reported, just clear the interrupt reason. */ + edma_write(ecc, EDMA_QEMCR, val); + edma_shadow0_write(ecc, SH_QSECR, val); + } + + val = edma_read(ecc, EDMA_CCERR); + if (val) { + dev_warn(ecc->dev, "CCERR 0x%08x\n", val); + /* Not reported, just clear the interrupt reason. */ + edma_write(ecc, EDMA_CCERRCLR, val); + } - break; - default: - break; + if (!edma_error_pending(ecc)) + break; + cnt++; + if (cnt > 10) + break; } + edma_write(ecc, EDMA_EEVAL, 1); + return IRQ_HANDLED; +} + +static void edma_tc_set_pm_state(struct edma_tc *tc, bool enable) +{ + struct platform_device *tc_pdev; + int ret; + + if (!IS_ENABLED(CONFIG_OF) || !tc) + return; + + tc_pdev = of_find_device_by_node(tc->node); + if (!tc_pdev) { + pr_err("%s: TPTC device is not found\n", __func__); + return; + } + if (!pm_runtime_enabled(&tc_pdev->dev)) + pm_runtime_enable(&tc_pdev->dev); + + if (enable) + ret = pm_runtime_get_sync(&tc_pdev->dev); + else + ret = pm_runtime_put_sync(&tc_pdev->dev); + + if (ret < 0) + pr_err("%s: pm_runtime_%s_sync() failed for %s\n", __func__, + enable ? "get" : "put", dev_name(&tc_pdev->dev)); } /* Alloc channel resources */ static int edma_alloc_chan_resources(struct dma_chan *chan) { struct edma_chan *echan = to_edma_chan(chan); - struct device *dev = chan->device->dev; + struct edma_cc *ecc = echan->ecc; + struct device *dev = ecc->dev; + enum dma_event_q eventq_no = EVENTQ_DEFAULT; int ret; - int a_ch_num; - LIST_HEAD(descs); - - a_ch_num = edma_alloc_channel(echan->ch_num, edma_callback, - echan, EVENTQ_DEFAULT); - if (a_ch_num < 0) { - ret = -ENODEV; - goto err_no_chan; + if (echan->tc) { + eventq_no = echan->tc->id; + } else if (ecc->tc_list) { + /* memcpy channel */ + echan->tc = &ecc->tc_list[ecc->info->default_queue]; + eventq_no = echan->tc->id; } - if (a_ch_num != echan->ch_num) { - dev_err(dev, "failed to allocate requested channel %u:%u\n", - EDMA_CTLR(echan->ch_num), + ret = edma_alloc_channel(echan, eventq_no); + if (ret) + return ret; + + echan->slot[0] = edma_alloc_slot(ecc, echan->ch_num); + if (echan->slot[0] < 0) { + dev_err(dev, "Entry slot allocation failed for channel %u\n", EDMA_CHAN_SLOT(echan->ch_num)); - ret = -ENODEV; - goto err_wrong_chan; + goto err_slot; } + /* Set up channel -> slot mapping for the entry slot */ + edma_set_chmap(echan, echan->slot[0]); echan->alloced = true; - echan->slot[0] = echan->ch_num; - dev_dbg(dev, "allocated channel %d for %u:%u\n", echan->ch_num, - EDMA_CTLR(echan->ch_num), EDMA_CHAN_SLOT(echan->ch_num)); + dev_dbg(dev, "Got eDMA channel %d for virt channel %d (%s trigger)\n", + EDMA_CHAN_SLOT(echan->ch_num), chan->chan_id, + echan->hw_triggered ? "HW" : "SW"); + + edma_tc_set_pm_state(echan->tc, true); return 0; -err_wrong_chan: - edma_free_channel(a_ch_num); -err_no_chan: +err_slot: + edma_free_channel(echan); return ret; } @@ -845,29 +1635,37 @@ err_no_chan: static void edma_free_chan_resources(struct dma_chan *chan) { struct edma_chan *echan = to_edma_chan(chan); - struct device *dev = chan->device->dev; + struct device *dev = echan->ecc->dev; int i; /* Terminate transfers */ - edma_stop(echan->ch_num); + edma_stop(echan); vchan_free_chan_resources(&echan->vchan); /* Free EDMA PaRAM slots */ - for (i = 1; i < EDMA_MAX_SLOTS; i++) { + for (i = 0; i < EDMA_MAX_SLOTS; i++) { if (echan->slot[i] >= 0) { - edma_free_slot(echan->slot[i]); + edma_free_slot(echan->ecc, echan->slot[i]); echan->slot[i] = -1; } } + /* Set entry slot to the dummy slot */ + edma_set_chmap(echan, echan->ecc->dummy_slot); + /* Free EDMA channel */ if (echan->alloced) { - edma_free_channel(echan->ch_num); + edma_free_channel(echan); echan->alloced = false; } - dev_dbg(dev, "freeing channel for %u\n", echan->ch_num); + edma_tc_set_pm_state(echan->tc, false); + echan->tc = NULL; + echan->hw_triggered = false; + + dev_dbg(dev, "Free eDMA channel %d for virt channel %d\n", + EDMA_CHAN_SLOT(echan->ch_num), chan->chan_id); } /* Send pending descriptor to hardware */ @@ -893,7 +1691,7 @@ static u32 edma_residue(struct edma_desc *edesc) * We always read the dst/src position from the first RamPar * pset. That's the one which is active now. */ - pos = edma_get_position(edesc->echan->slot[0], dst); + pos = edma_get_position(edesc->echan->ecc, edesc->echan->slot[0], dst); /* * Cyclic is simple. Just subtract pset[0].addr from pos. @@ -954,19 +1752,99 @@ static enum dma_status edma_tx_status(struct dma_chan *chan, return ret; } -static void __init edma_chan_init(struct edma_cc *ecc, - struct dma_device *dma, - struct edma_chan *echans) +static bool edma_is_memcpy_channel(int ch_num, s32 *memcpy_channels) +{ + if (!memcpy_channels) + return false; + while (*memcpy_channels != -1) { + if (*memcpy_channels == ch_num) + return true; + memcpy_channels++; + } + return false; +} + +#define EDMA_DMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)) + +static void edma_dma_init(struct edma_cc *ecc, bool legacy_mode) { + struct dma_device *s_ddev = &ecc->dma_slave; + struct dma_device *m_ddev = NULL; + s32 *memcpy_channels = ecc->info->memcpy_channels; int i, j; - for (i = 0; i < EDMA_CHANS; i++) { - struct edma_chan *echan = &echans[i]; - echan->ch_num = EDMA_CTLR_CHAN(ecc->ctlr, i); + dma_cap_zero(s_ddev->cap_mask); + dma_cap_set(DMA_SLAVE, s_ddev->cap_mask); + dma_cap_set(DMA_CYCLIC, s_ddev->cap_mask); + if (ecc->legacy_mode && !memcpy_channels) { + dev_warn(ecc->dev, + "Legacy memcpy is enabled, things might not work\n"); + + dma_cap_set(DMA_MEMCPY, s_ddev->cap_mask); + s_ddev->device_prep_dma_memcpy = edma_prep_dma_memcpy; + s_ddev->directions = BIT(DMA_MEM_TO_MEM); + } + + s_ddev->device_prep_slave_sg = edma_prep_slave_sg; + s_ddev->device_prep_dma_cyclic = edma_prep_dma_cyclic; + s_ddev->device_alloc_chan_resources = edma_alloc_chan_resources; + s_ddev->device_free_chan_resources = edma_free_chan_resources; + s_ddev->device_issue_pending = edma_issue_pending; + s_ddev->device_tx_status = edma_tx_status; + s_ddev->device_config = edma_slave_config; + s_ddev->device_pause = edma_dma_pause; + s_ddev->device_resume = edma_dma_resume; + s_ddev->device_terminate_all = edma_terminate_all; + + s_ddev->src_addr_widths = EDMA_DMA_BUSWIDTHS; + s_ddev->dst_addr_widths = EDMA_DMA_BUSWIDTHS; + s_ddev->directions |= (BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV)); + s_ddev->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + + s_ddev->dev = ecc->dev; + INIT_LIST_HEAD(&s_ddev->channels); + + if (memcpy_channels) { + m_ddev = devm_kzalloc(ecc->dev, sizeof(*m_ddev), GFP_KERNEL); + ecc->dma_memcpy = m_ddev; + + dma_cap_zero(m_ddev->cap_mask); + dma_cap_set(DMA_MEMCPY, m_ddev->cap_mask); + + m_ddev->device_prep_dma_memcpy = edma_prep_dma_memcpy; + m_ddev->device_alloc_chan_resources = edma_alloc_chan_resources; + m_ddev->device_free_chan_resources = edma_free_chan_resources; + m_ddev->device_issue_pending = edma_issue_pending; + m_ddev->device_tx_status = edma_tx_status; + m_ddev->device_config = edma_slave_config; + m_ddev->device_pause = edma_dma_pause; + m_ddev->device_resume = edma_dma_resume; + m_ddev->device_terminate_all = edma_terminate_all; + + m_ddev->src_addr_widths = EDMA_DMA_BUSWIDTHS; + m_ddev->dst_addr_widths = EDMA_DMA_BUSWIDTHS; + m_ddev->directions = BIT(DMA_MEM_TO_MEM); + m_ddev->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + + m_ddev->dev = ecc->dev; + INIT_LIST_HEAD(&m_ddev->channels); + } else if (!ecc->legacy_mode) { + dev_info(ecc->dev, "memcpy is disabled\n"); + } + + for (i = 0; i < ecc->num_channels; i++) { + struct edma_chan *echan = &ecc->slave_chans[i]; + echan->ch_num = EDMA_CTLR_CHAN(ecc->id, i); echan->ecc = ecc; echan->vchan.desc_free = edma_desc_free; - vchan_init(&echan->vchan, dma); + if (m_ddev && edma_is_memcpy_channel(i, memcpy_channels)) + vchan_init(&echan->vchan, m_ddev); + else + vchan_init(&echan->vchan, s_ddev); INIT_LIST_HEAD(&echan->node); for (j = 0; j < EDMA_MAX_SLOTS; j++) @@ -974,85 +1852,493 @@ static void __init edma_chan_init(struct edma_cc *ecc, } } -#define EDMA_DMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ - BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ - BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \ - BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)) - -static void edma_dma_init(struct edma_cc *ecc, struct dma_device *dma, - struct device *dev) +static int edma_setup_from_hw(struct device *dev, struct edma_soc_info *pdata, + struct edma_cc *ecc) { - dma->device_prep_slave_sg = edma_prep_slave_sg; - dma->device_prep_dma_cyclic = edma_prep_dma_cyclic; - dma->device_prep_dma_memcpy = edma_prep_dma_memcpy; - dma->device_alloc_chan_resources = edma_alloc_chan_resources; - dma->device_free_chan_resources = edma_free_chan_resources; - dma->device_issue_pending = edma_issue_pending; - dma->device_tx_status = edma_tx_status; - dma->device_config = edma_slave_config; - dma->device_pause = edma_dma_pause; - dma->device_resume = edma_dma_resume; - dma->device_terminate_all = edma_terminate_all; + int i; + u32 value, cccfg; + s8 (*queue_priority_map)[2]; + + /* Decode the eDMA3 configuration from CCCFG register */ + cccfg = edma_read(ecc, EDMA_CCCFG); - dma->src_addr_widths = EDMA_DMA_BUSWIDTHS; - dma->dst_addr_widths = EDMA_DMA_BUSWIDTHS; - dma->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); - dma->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + value = GET_NUM_REGN(cccfg); + ecc->num_region = BIT(value); - dma->dev = dev; + value = GET_NUM_DMACH(cccfg); + ecc->num_channels = BIT(value + 1); + + value = GET_NUM_QDMACH(cccfg); + ecc->num_qchannels = value * 2; + + value = GET_NUM_PAENTRY(cccfg); + ecc->num_slots = BIT(value + 4); + + value = GET_NUM_EVQUE(cccfg); + ecc->num_tc = value + 1; + + ecc->chmap_exist = (cccfg & CHMAP_EXIST) ? true : false; + + dev_dbg(dev, "eDMA3 CC HW configuration (cccfg: 0x%08x):\n", cccfg); + dev_dbg(dev, "num_region: %u\n", ecc->num_region); + dev_dbg(dev, "num_channels: %u\n", ecc->num_channels); + dev_dbg(dev, "num_qchannels: %u\n", ecc->num_qchannels); + dev_dbg(dev, "num_slots: %u\n", ecc->num_slots); + dev_dbg(dev, "num_tc: %u\n", ecc->num_tc); + dev_dbg(dev, "chmap_exist: %s\n", ecc->chmap_exist ? "yes" : "no"); + + /* Nothing need to be done if queue priority is provided */ + if (pdata->queue_priority_mapping) + return 0; /* - * code using dma memcpy must make sure alignment of - * length is at dma->copy_align boundary. + * Configure TC/queue priority as follows: + * Q0 - priority 0 + * Q1 - priority 1 + * Q2 - priority 2 + * ... + * The meaning of priority numbers: 0 highest priority, 7 lowest + * priority. So Q0 is the highest priority queue and the last queue has + * the lowest priority. */ - dma->copy_align = DMA_SLAVE_BUSWIDTH_4_BYTES; + queue_priority_map = devm_kcalloc(dev, ecc->num_tc + 1, sizeof(s8), + GFP_KERNEL); + if (!queue_priority_map) + return -ENOMEM; + + for (i = 0; i < ecc->num_tc; i++) { + queue_priority_map[i][0] = i; + queue_priority_map[i][1] = i; + } + queue_priority_map[i][0] = -1; + queue_priority_map[i][1] = -1; + + pdata->queue_priority_mapping = queue_priority_map; + /* Default queue has the lowest priority */ + pdata->default_queue = i - 1; - INIT_LIST_HEAD(&dma->channels); + return 0; } +#if IS_ENABLED(CONFIG_OF) +static int edma_xbar_event_map(struct device *dev, struct edma_soc_info *pdata, + size_t sz) +{ + const char pname[] = "ti,edma-xbar-event-map"; + struct resource res; + void __iomem *xbar; + s16 (*xbar_chans)[2]; + size_t nelm = sz / sizeof(s16); + u32 shift, offset, mux; + int ret, i; + + xbar_chans = devm_kcalloc(dev, nelm + 2, sizeof(s16), GFP_KERNEL); + if (!xbar_chans) + return -ENOMEM; + + ret = of_address_to_resource(dev->of_node, 1, &res); + if (ret) + return -ENOMEM; + + xbar = devm_ioremap(dev, res.start, resource_size(&res)); + if (!xbar) + return -ENOMEM; + + ret = of_property_read_u16_array(dev->of_node, pname, (u16 *)xbar_chans, + nelm); + if (ret) + return -EIO; + + /* Invalidate last entry for the other user of this mess */ + nelm >>= 1; + xbar_chans[nelm][0] = -1; + xbar_chans[nelm][1] = -1; + + for (i = 0; i < nelm; i++) { + shift = (xbar_chans[i][1] & 0x03) << 3; + offset = xbar_chans[i][1] & 0xfffffffc; + mux = readl(xbar + offset); + mux &= ~(0xff << shift); + mux |= xbar_chans[i][0] << shift; + writel(mux, (xbar + offset)); + } + + pdata->xbar_chans = (const s16 (*)[2]) xbar_chans; + return 0; +} + +static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev, + bool legacy_mode) +{ + struct edma_soc_info *info; + struct property *prop; + size_t sz; + int ret; + + info = devm_kzalloc(dev, sizeof(struct edma_soc_info), GFP_KERNEL); + if (!info) + return ERR_PTR(-ENOMEM); + + if (legacy_mode) { + prop = of_find_property(dev->of_node, "ti,edma-xbar-event-map", + &sz); + if (prop) { + ret = edma_xbar_event_map(dev, info, sz); + if (ret) + return ERR_PTR(ret); + } + return info; + } + + /* Get the list of channels allocated to be used for memcpy */ + prop = of_find_property(dev->of_node, "ti,edma-memcpy-channels", &sz); + if (prop) { + const char pname[] = "ti,edma-memcpy-channels"; + size_t nelm = sz / sizeof(s32); + s32 *memcpy_ch; + + memcpy_ch = devm_kcalloc(dev, nelm + 1, sizeof(s32), + GFP_KERNEL); + if (!memcpy_ch) + return ERR_PTR(-ENOMEM); + + ret = of_property_read_u32_array(dev->of_node, pname, + (u32 *)memcpy_ch, nelm); + if (ret) + return ERR_PTR(ret); + + memcpy_ch[nelm] = -1; + info->memcpy_channels = memcpy_ch; + } + + prop = of_find_property(dev->of_node, "ti,edma-reserved-slot-ranges", + &sz); + if (prop) { + const char pname[] = "ti,edma-reserved-slot-ranges"; + u32 (*tmp)[2]; + s16 (*rsv_slots)[2]; + size_t nelm = sz / sizeof(*tmp); + struct edma_rsv_info *rsv_info; + int i; + + if (!nelm) + return info; + + tmp = kcalloc(nelm, sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return ERR_PTR(-ENOMEM); + + rsv_info = devm_kzalloc(dev, sizeof(*rsv_info), GFP_KERNEL); + if (!rsv_info) { + kfree(tmp); + return ERR_PTR(-ENOMEM); + } + + rsv_slots = devm_kcalloc(dev, nelm + 1, sizeof(*rsv_slots), + GFP_KERNEL); + if (!rsv_slots) { + kfree(tmp); + return ERR_PTR(-ENOMEM); + } + + ret = of_property_read_u32_array(dev->of_node, pname, + (u32 *)tmp, nelm * 2); + if (ret) { + kfree(tmp); + return ERR_PTR(ret); + } + + for (i = 0; i < nelm; i++) { + rsv_slots[i][0] = tmp[i][0]; + rsv_slots[i][1] = tmp[i][1]; + } + rsv_slots[nelm][0] = -1; + rsv_slots[nelm][1] = -1; + + info->rsv = rsv_info; + info->rsv->rsv_slots = (const s16 (*)[2])rsv_slots; + + kfree(tmp); + } + + return info; +} + +static struct dma_chan *of_edma_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct edma_cc *ecc = ofdma->of_dma_data; + struct dma_chan *chan = NULL; + struct edma_chan *echan; + int i; + + if (!ecc || dma_spec->args_count < 1) + return NULL; + + for (i = 0; i < ecc->num_channels; i++) { + echan = &ecc->slave_chans[i]; + if (echan->ch_num == dma_spec->args[0]) { + chan = &echan->vchan.chan; + break; + } + } + + if (!chan) + return NULL; + + if (echan->ecc->legacy_mode && dma_spec->args_count == 1) + goto out; + + if (!echan->ecc->legacy_mode && dma_spec->args_count == 2 && + dma_spec->args[1] < echan->ecc->num_tc) { + echan->tc = &echan->ecc->tc_list[dma_spec->args[1]]; + goto out; + } + + return NULL; +out: + /* The channel is going to be used as HW synchronized */ + echan->hw_triggered = true; + return dma_get_slave_channel(chan); +} +#else +static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev, + bool legacy_mode) +{ + return ERR_PTR(-EINVAL); +} + +static struct dma_chan *of_edma_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + return NULL; +} +#endif + static int edma_probe(struct platform_device *pdev) { - struct edma_cc *ecc; + struct edma_soc_info *info = pdev->dev.platform_data; + s8 (*queue_priority_mapping)[2]; + int i, off, ln; + const s16 (*rsv_slots)[2]; + const s16 (*xbar_chans)[2]; + int irq; + char *irq_name; + struct resource *mem; + struct device_node *node = pdev->dev.of_node; + struct device *dev = &pdev->dev; + struct edma_cc *ecc; + bool legacy_mode = true; int ret; - ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (node) { + const struct of_device_id *match; + + match = of_match_node(edma_of_ids, node); + if (match && (u32)match->data == EDMA_BINDING_TPCC) + legacy_mode = false; + + info = edma_setup_info_from_dt(dev, legacy_mode); + if (IS_ERR(info)) { + dev_err(dev, "failed to get DT data\n"); + return PTR_ERR(info); + } + } + + if (!info) + return -ENODEV; + + pm_runtime_enable(dev); + ret = pm_runtime_get_sync(dev); + if (ret < 0) { + dev_err(dev, "pm_runtime_get_sync() failed\n"); + return ret; + } + + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); if (ret) return ret; - ecc = devm_kzalloc(&pdev->dev, sizeof(*ecc), GFP_KERNEL); + ecc = devm_kzalloc(dev, sizeof(*ecc), GFP_KERNEL); if (!ecc) { - dev_err(&pdev->dev, "Can't allocate controller\n"); + dev_err(dev, "Can't allocate controller\n"); return -ENOMEM; } - ecc->ctlr = pdev->id; - ecc->dummy_slot = edma_alloc_slot(ecc->ctlr, EDMA_SLOT_ANY); + ecc->dev = dev; + ecc->id = pdev->id; + ecc->legacy_mode = legacy_mode; + /* When booting with DT the pdev->id is -1 */ + if (ecc->id < 0) + ecc->id = 0; + + mem = platform_get_resource_byname(pdev, IORESOURCE_MEM, "edma3_cc"); + if (!mem) { + dev_dbg(dev, "mem resource not found, using index 0\n"); + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!mem) { + dev_err(dev, "no mem resource?\n"); + return -ENODEV; + } + } + ecc->base = devm_ioremap_resource(dev, mem); + if (IS_ERR(ecc->base)) + return PTR_ERR(ecc->base); + + platform_set_drvdata(pdev, ecc); + + /* Get eDMA3 configuration from IP */ + ret = edma_setup_from_hw(dev, info, ecc); + if (ret) + return ret; + + /* Allocate memory based on the information we got from the IP */ + ecc->slave_chans = devm_kcalloc(dev, ecc->num_channels, + sizeof(*ecc->slave_chans), GFP_KERNEL); + if (!ecc->slave_chans) + return -ENOMEM; + + ecc->slot_inuse = devm_kcalloc(dev, BITS_TO_LONGS(ecc->num_slots), + sizeof(unsigned long), GFP_KERNEL); + if (!ecc->slot_inuse) + return -ENOMEM; + + ecc->default_queue = info->default_queue; + + for (i = 0; i < ecc->num_slots; i++) + edma_write_slot(ecc, i, &dummy_paramset); + + if (info->rsv) { + /* Set the reserved slots in inuse list */ + rsv_slots = info->rsv->rsv_slots; + if (rsv_slots) { + for (i = 0; rsv_slots[i][0] != -1; i++) { + off = rsv_slots[i][0]; + ln = rsv_slots[i][1]; + set_bits(off, ln, ecc->slot_inuse); + } + } + } + + /* Clear the xbar mapped channels in unused list */ + xbar_chans = info->xbar_chans; + if (xbar_chans) { + for (i = 0; xbar_chans[i][1] != -1; i++) { + off = xbar_chans[i][1]; + } + } + + irq = platform_get_irq_byname(pdev, "edma3_ccint"); + if (irq < 0 && node) + irq = irq_of_parse_and_map(node, 0); + + if (irq >= 0) { + irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccint", + dev_name(dev)); + ret = devm_request_irq(dev, irq, dma_irq_handler, 0, irq_name, + ecc); + if (ret) { + dev_err(dev, "CCINT (%d) failed --> %d\n", irq, ret); + return ret; + } + } + + irq = platform_get_irq_byname(pdev, "edma3_ccerrint"); + if (irq < 0 && node) + irq = irq_of_parse_and_map(node, 2); + + if (irq >= 0) { + irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccerrint", + dev_name(dev)); + ret = devm_request_irq(dev, irq, dma_ccerr_handler, 0, irq_name, + ecc); + if (ret) { + dev_err(dev, "CCERRINT (%d) failed --> %d\n", irq, ret); + return ret; + } + } + + ecc->dummy_slot = edma_alloc_slot(ecc, EDMA_SLOT_ANY); if (ecc->dummy_slot < 0) { - dev_err(&pdev->dev, "Can't allocate PaRAM dummy slot\n"); + dev_err(dev, "Can't allocate PaRAM dummy slot\n"); return ecc->dummy_slot; } - dma_cap_zero(ecc->dma_slave.cap_mask); - dma_cap_set(DMA_SLAVE, ecc->dma_slave.cap_mask); - dma_cap_set(DMA_CYCLIC, ecc->dma_slave.cap_mask); - dma_cap_set(DMA_MEMCPY, ecc->dma_slave.cap_mask); + queue_priority_mapping = info->queue_priority_mapping; + + if (!ecc->legacy_mode) { + int lowest_priority = 0; + struct of_phandle_args tc_args; + + ecc->tc_list = devm_kcalloc(dev, ecc->num_tc, + sizeof(*ecc->tc_list), GFP_KERNEL); + if (!ecc->tc_list) + return -ENOMEM; + + for (i = 0;; i++) { + ret = of_parse_phandle_with_fixed_args(node, "ti,tptcs", + 1, i, &tc_args); + if (ret || i == ecc->num_tc) + break; + + ecc->tc_list[i].node = tc_args.np; + ecc->tc_list[i].id = i; + queue_priority_mapping[i][1] = tc_args.args[0]; + if (queue_priority_mapping[i][1] > lowest_priority) { + lowest_priority = queue_priority_mapping[i][1]; + info->default_queue = i; + } + } + } + + /* Event queue priority mapping */ + for (i = 0; queue_priority_mapping[i][0] != -1; i++) + edma_assign_priority_to_queue(ecc, queue_priority_mapping[i][0], + queue_priority_mapping[i][1]); - edma_dma_init(ecc, &ecc->dma_slave, &pdev->dev); + for (i = 0; i < ecc->num_region; i++) { + edma_write_array2(ecc, EDMA_DRAE, i, 0, 0x0); + edma_write_array2(ecc, EDMA_DRAE, i, 1, 0x0); + edma_write_array(ecc, EDMA_QRAE, i, 0x0); + } + ecc->info = info; - edma_chan_init(ecc, &ecc->dma_slave, ecc->slave_chans); + /* Init the dma device and channels */ + edma_dma_init(ecc, legacy_mode); + + for (i = 0; i < ecc->num_channels; i++) { + /* Assign all channels to the default queue */ + edma_assign_channel_eventq(&ecc->slave_chans[i], + info->default_queue); + /* Set entry slot to the dummy slot */ + edma_set_chmap(&ecc->slave_chans[i], ecc->dummy_slot); + } ret = dma_async_device_register(&ecc->dma_slave); - if (ret) + if (ret) { + dev_err(dev, "slave ddev registration failed (%d)\n", ret); goto err_reg1; + } - platform_set_drvdata(pdev, ecc); + if (ecc->dma_memcpy) { + ret = dma_async_device_register(ecc->dma_memcpy); + if (ret) { + dev_err(dev, "memcpy ddev registration failed (%d)\n", + ret); + dma_async_device_unregister(&ecc->dma_slave); + goto err_reg1; + } + } - dev_info(&pdev->dev, "TI EDMA DMA engine driver\n"); + if (node) + of_dma_controller_register(node, of_edma_xlate, ecc); + + dev_info(dev, "TI EDMA DMA engine driver\n"); return 0; err_reg1: - edma_free_slot(ecc->dummy_slot); + edma_free_slot(ecc, ecc->dummy_slot); return ret; } @@ -1061,33 +2347,112 @@ static int edma_remove(struct platform_device *pdev) struct device *dev = &pdev->dev; struct edma_cc *ecc = dev_get_drvdata(dev); + if (dev->of_node) + of_dma_controller_free(dev->of_node); dma_async_device_unregister(&ecc->dma_slave); - edma_free_slot(ecc->dummy_slot); + if (ecc->dma_memcpy) + dma_async_device_unregister(ecc->dma_memcpy); + edma_free_slot(ecc, ecc->dummy_slot); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int edma_pm_suspend(struct device *dev) +{ + struct edma_cc *ecc = dev_get_drvdata(dev); + struct edma_chan *echan = ecc->slave_chans; + int i; + + for (i = 0; i < ecc->num_channels; i++) { + if (echan[i].alloced) { + edma_setup_interrupt(&echan[i], false); + edma_tc_set_pm_state(echan[i].tc, false); + } + } return 0; } +static int edma_pm_resume(struct device *dev) +{ + struct edma_cc *ecc = dev_get_drvdata(dev); + struct edma_chan *echan = ecc->slave_chans; + int i; + s8 (*queue_priority_mapping)[2]; + + queue_priority_mapping = ecc->info->queue_priority_mapping; + + /* Event queue priority mapping */ + for (i = 0; queue_priority_mapping[i][0] != -1; i++) + edma_assign_priority_to_queue(ecc, queue_priority_mapping[i][0], + queue_priority_mapping[i][1]); + + for (i = 0; i < ecc->num_channels; i++) { + if (echan[i].alloced) { + /* ensure access through shadow region 0 */ + edma_or_array2(ecc, EDMA_DRAE, 0, i >> 5, + BIT(i & 0x1f)); + + edma_setup_interrupt(&echan[i], true); + + /* Set up channel -> slot mapping for the entry slot */ + edma_set_chmap(&echan[i], echan[i].slot[0]); + + edma_tc_set_pm_state(echan[i].tc, true); + } + } + + return 0; +} +#endif + +static const struct dev_pm_ops edma_pm_ops = { + SET_LATE_SYSTEM_SLEEP_PM_OPS(edma_pm_suspend, edma_pm_resume) +}; + static struct platform_driver edma_driver = { .probe = edma_probe, .remove = edma_remove, .driver = { - .name = "edma-dma-engine", + .name = "edma", + .pm = &edma_pm_ops, + .of_match_table = edma_of_ids, + }, +}; + +static struct platform_driver edma_tptc_driver = { + .driver = { + .name = "edma3-tptc", + .of_match_table = edma_tptc_of_ids, }, }; bool edma_filter_fn(struct dma_chan *chan, void *param) { + bool match = false; + if (chan->device->dev->driver == &edma_driver.driver) { struct edma_chan *echan = to_edma_chan(chan); unsigned ch_req = *(unsigned *)param; - return ch_req == echan->ch_num; + if (ch_req == echan->ch_num) { + /* The channel is going to be used as HW synchronized */ + echan->hw_triggered = true; + match = true; + } } - return false; + return match; } EXPORT_SYMBOL(edma_filter_fn); static int edma_init(void) { + int ret; + + ret = platform_driver_register(&edma_tptc_driver); + if (ret) + return ret; + return platform_driver_register(&edma_driver); } subsys_initcall(edma_init); @@ -1095,6 +2460,7 @@ subsys_initcall(edma_init); static void __exit edma_exit(void) { platform_driver_unregister(&edma_driver); + platform_driver_unregister(&edma_tptc_driver); } module_exit(edma_exit); diff --git a/kernel/drivers/dma/ep93xx_dma.c b/kernel/drivers/dma/ep93xx_dma.c index 24e5290fa..57ff46284 100644 --- a/kernel/drivers/dma/ep93xx_dma.c +++ b/kernel/drivers/dma/ep93xx_dma.c @@ -1364,7 +1364,7 @@ static int __init ep93xx_dma_probe(struct platform_device *pdev) return ret; } -static struct platform_device_id ep93xx_dma_driver_ids[] = { +static const struct platform_device_id ep93xx_dma_driver_ids[] = { { "ep93xx-dma-m2p", 0 }, { "ep93xx-dma-m2m", 1 }, { }, diff --git a/kernel/drivers/dma/fsl-edma.c b/kernel/drivers/dma/fsl-edma.c index 09e2842d1..915eec3cc 100644 --- a/kernel/drivers/dma/fsl-edma.c +++ b/kernel/drivers/dma/fsl-edma.c @@ -881,10 +881,6 @@ static int fsl_edma_probe(struct platform_device *pdev) } - ret = fsl_edma_irq_init(pdev, fsl_edma); - if (ret) - return ret; - fsl_edma->big_endian = of_property_read_bool(np, "big-endian"); INIT_LIST_HEAD(&fsl_edma->dma_dev.channels); @@ -900,6 +896,11 @@ static int fsl_edma_probe(struct platform_device *pdev) fsl_edma_chan_mux(fsl_chan, 0, false); } + edma_writel(fsl_edma, ~0, fsl_edma->membase + EDMA_INTR); + ret = fsl_edma_irq_init(pdev, fsl_edma); + if (ret) + return ret; + dma_cap_set(DMA_PRIVATE, fsl_edma->dma_dev.cap_mask); dma_cap_set(DMA_SLAVE, fsl_edma->dma_dev.cap_mask); dma_cap_set(DMA_CYCLIC, fsl_edma->dma_dev.cap_mask); diff --git a/kernel/drivers/dma/fsldma.c b/kernel/drivers/dma/fsldma.c index 300f821f1..2209f75fd 100644 --- a/kernel/drivers/dma/fsldma.c +++ b/kernel/drivers/dma/fsldma.c @@ -1512,6 +1512,7 @@ static const struct of_device_id fsldma_of_ids[] = { { .compatible = "fsl,elo-dma", }, {} }; +MODULE_DEVICE_TABLE(of, fsldma_of_ids); static struct platform_driver fsldma_of_driver = { .driver = { diff --git a/kernel/drivers/dma/hsu/Kconfig b/kernel/drivers/dma/hsu/Kconfig index 2810dca70..c70841731 100644 --- a/kernel/drivers/dma/hsu/Kconfig +++ b/kernel/drivers/dma/hsu/Kconfig @@ -5,10 +5,5 @@ config HSU_DMA select DMA_VIRTUAL_CHANNELS config HSU_DMA_PCI - tristate "High Speed UART DMA PCI driver" - depends on PCI - select HSU_DMA - help - Support the High Speed UART DMA on the platfroms that - enumerate it as a PCI device. For example, Intel Medfield - has integrated this HSU DMA controller. + tristate + depends on HSU_DMA && PCI diff --git a/kernel/drivers/dma/hsu/hsu.c b/kernel/drivers/dma/hsu/hsu.c index f42f71e37..823ad728a 100644 --- a/kernel/drivers/dma/hsu/hsu.c +++ b/kernel/drivers/dma/hsu/hsu.c @@ -99,21 +99,13 @@ static void hsu_dma_chan_start(struct hsu_dma_chan *hsuc) static void hsu_dma_stop_channel(struct hsu_dma_chan *hsuc) { - unsigned long flags; - - spin_lock_irqsave(&hsuc->lock, flags); hsu_chan_disable(hsuc); hsu_chan_writel(hsuc, HSU_CH_DCR, 0); - spin_unlock_irqrestore(&hsuc->lock, flags); } static void hsu_dma_start_channel(struct hsu_dma_chan *hsuc) { - unsigned long flags; - - spin_lock_irqsave(&hsuc->lock, flags); hsu_dma_chan_start(hsuc); - spin_unlock_irqrestore(&hsuc->lock, flags); } static void hsu_dma_start_transfer(struct hsu_dma_chan *hsuc) @@ -139,9 +131,9 @@ static u32 hsu_dma_chan_get_sr(struct hsu_dma_chan *hsuc) unsigned long flags; u32 sr; - spin_lock_irqsave(&hsuc->lock, flags); + spin_lock_irqsave(&hsuc->vchan.lock, flags); sr = hsu_chan_readl(hsuc, HSU_CH_SR); - spin_unlock_irqrestore(&hsuc->lock, flags); + spin_unlock_irqrestore(&hsuc->vchan.lock, flags); return sr; } @@ -154,7 +146,7 @@ irqreturn_t hsu_dma_irq(struct hsu_dma_chip *chip, unsigned short nr) u32 sr; /* Sanity check */ - if (nr >= chip->pdata->nr_channels) + if (nr >= chip->hsu->nr_channels) return IRQ_NONE; hsuc = &chip->hsu->chan[nr]; @@ -273,14 +265,11 @@ static size_t hsu_dma_active_desc_size(struct hsu_dma_chan *hsuc) struct hsu_dma_desc *desc = hsuc->desc; size_t bytes = hsu_dma_desc_size(desc); int i; - unsigned long flags; - spin_lock_irqsave(&hsuc->lock, flags); i = desc->active % HSU_DMA_CHAN_NR_DESC; do { bytes += hsu_chan_readl(hsuc, HSU_CH_DxTSR(i)); } while (--i >= 0); - spin_unlock_irqrestore(&hsuc->lock, flags); return bytes; } @@ -327,24 +316,6 @@ static int hsu_dma_slave_config(struct dma_chan *chan, return 0; } -static void hsu_dma_chan_deactivate(struct hsu_dma_chan *hsuc) -{ - unsigned long flags; - - spin_lock_irqsave(&hsuc->lock, flags); - hsu_chan_disable(hsuc); - spin_unlock_irqrestore(&hsuc->lock, flags); -} - -static void hsu_dma_chan_activate(struct hsu_dma_chan *hsuc) -{ - unsigned long flags; - - spin_lock_irqsave(&hsuc->lock, flags); - hsu_chan_enable(hsuc); - spin_unlock_irqrestore(&hsuc->lock, flags); -} - static int hsu_dma_pause(struct dma_chan *chan) { struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan); @@ -352,7 +323,7 @@ static int hsu_dma_pause(struct dma_chan *chan) spin_lock_irqsave(&hsuc->vchan.lock, flags); if (hsuc->desc && hsuc->desc->status == DMA_IN_PROGRESS) { - hsu_dma_chan_deactivate(hsuc); + hsu_chan_disable(hsuc); hsuc->desc->status = DMA_PAUSED; } spin_unlock_irqrestore(&hsuc->vchan.lock, flags); @@ -368,7 +339,7 @@ static int hsu_dma_resume(struct dma_chan *chan) spin_lock_irqsave(&hsuc->vchan.lock, flags); if (hsuc->desc && hsuc->desc->status == DMA_PAUSED) { hsuc->desc->status = DMA_IN_PROGRESS; - hsu_dma_chan_activate(hsuc); + hsu_chan_enable(hsuc); } spin_unlock_irqrestore(&hsuc->vchan.lock, flags); @@ -404,7 +375,6 @@ static void hsu_dma_free_chan_resources(struct dma_chan *chan) int hsu_dma_probe(struct hsu_dma_chip *chip) { struct hsu_dma *hsu; - struct hsu_dma_platform_data *pdata = chip->pdata; void __iomem *addr = chip->regs + chip->offset; unsigned short i; int ret; @@ -415,25 +385,16 @@ int hsu_dma_probe(struct hsu_dma_chip *chip) chip->hsu = hsu; - if (!pdata) { - pdata = devm_kzalloc(chip->dev, sizeof(*pdata), GFP_KERNEL); - if (!pdata) - return -ENOMEM; - - chip->pdata = pdata; + /* Calculate nr_channels from the IO space length */ + hsu->nr_channels = (chip->length - chip->offset) / HSU_DMA_CHAN_LENGTH; - /* Guess nr_channels from the IO space length */ - pdata->nr_channels = (chip->length - chip->offset) / - HSU_DMA_CHAN_LENGTH; - } - - hsu->chan = devm_kcalloc(chip->dev, pdata->nr_channels, + hsu->chan = devm_kcalloc(chip->dev, hsu->nr_channels, sizeof(*hsu->chan), GFP_KERNEL); if (!hsu->chan) return -ENOMEM; INIT_LIST_HEAD(&hsu->dma.channels); - for (i = 0; i < pdata->nr_channels; i++) { + for (i = 0; i < hsu->nr_channels; i++) { struct hsu_dma_chan *hsuc = &hsu->chan[i]; hsuc->vchan.desc_free = hsu_dma_desc_free; @@ -441,8 +402,6 @@ int hsu_dma_probe(struct hsu_dma_chip *chip) hsuc->direction = (i & 0x1) ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV; hsuc->reg = addr + i * HSU_DMA_CHAN_LENGTH; - - spin_lock_init(&hsuc->lock); } dma_cap_set(DMA_SLAVE, hsu->dma.cap_mask); @@ -471,7 +430,7 @@ int hsu_dma_probe(struct hsu_dma_chip *chip) if (ret) return ret; - dev_info(chip->dev, "Found HSU DMA, %d channels\n", pdata->nr_channels); + dev_info(chip->dev, "Found HSU DMA, %d channels\n", hsu->nr_channels); return 0; } EXPORT_SYMBOL_GPL(hsu_dma_probe); @@ -483,7 +442,7 @@ int hsu_dma_remove(struct hsu_dma_chip *chip) dma_async_device_unregister(&hsu->dma); - for (i = 0; i < chip->pdata->nr_channels; i++) { + for (i = 0; i < hsu->nr_channels; i++) { struct hsu_dma_chan *hsuc = &hsu->chan[i]; tasklet_kill(&hsuc->vchan.task); diff --git a/kernel/drivers/dma/hsu/hsu.h b/kernel/drivers/dma/hsu/hsu.h index 0275233cf..f06579c6d 100644 --- a/kernel/drivers/dma/hsu/hsu.h +++ b/kernel/drivers/dma/hsu/hsu.h @@ -78,7 +78,6 @@ struct hsu_dma_chan { struct virt_dma_chan vchan; void __iomem *reg; - spinlock_t lock; /* hardware configuration */ enum dma_transfer_direction direction; @@ -108,6 +107,7 @@ struct hsu_dma { /* channels */ struct hsu_dma_chan *chan; + unsigned short nr_channels; }; static inline struct hsu_dma *to_hsu_dma(struct dma_device *ddev) diff --git a/kernel/drivers/dma/hsu/pci.c b/kernel/drivers/dma/hsu/pci.c index 77879e6dd..e2db76bd5 100644 --- a/kernel/drivers/dma/hsu/pci.c +++ b/kernel/drivers/dma/hsu/pci.c @@ -31,7 +31,7 @@ static irqreturn_t hsu_pci_irq(int irq, void *dev) irqreturn_t ret = IRQ_NONE; dmaisr = readl(chip->regs + HSU_PCI_DMAISR); - for (i = 0; i < chip->pdata->nr_channels; i++) { + for (i = 0; i < chip->hsu->nr_channels; i++) { if (dmaisr & 0x1) ret |= hsu_dma_irq(chip, i); dmaisr >>= 1; diff --git a/kernel/drivers/dma/idma64.c b/kernel/drivers/dma/idma64.c new file mode 100644 index 000000000..7d56b47e4 --- /dev/null +++ b/kernel/drivers/dma/idma64.c @@ -0,0 +1,712 @@ +/* + * Core driver for the Intel integrated DMA 64-bit + * + * Copyright (C) 2015 Intel Corporation + * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/bitops.h> +#include <linux/delay.h> +#include <linux/dmaengine.h> +#include <linux/dma-mapping.h> +#include <linux/dmapool.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/slab.h> + +#include "idma64.h" + +/* Platform driver name */ +#define DRV_NAME "idma64" + +/* For now we support only two channels */ +#define IDMA64_NR_CHAN 2 + +/* ---------------------------------------------------------------------- */ + +static struct device *chan2dev(struct dma_chan *chan) +{ + return &chan->dev->device; +} + +/* ---------------------------------------------------------------------- */ + +static void idma64_off(struct idma64 *idma64) +{ + unsigned short count = 100; + + dma_writel(idma64, CFG, 0); + + channel_clear_bit(idma64, MASK(XFER), idma64->all_chan_mask); + channel_clear_bit(idma64, MASK(BLOCK), idma64->all_chan_mask); + channel_clear_bit(idma64, MASK(SRC_TRAN), idma64->all_chan_mask); + channel_clear_bit(idma64, MASK(DST_TRAN), idma64->all_chan_mask); + channel_clear_bit(idma64, MASK(ERROR), idma64->all_chan_mask); + + do { + cpu_relax(); + } while (dma_readl(idma64, CFG) & IDMA64_CFG_DMA_EN && --count); +} + +static void idma64_on(struct idma64 *idma64) +{ + dma_writel(idma64, CFG, IDMA64_CFG_DMA_EN); +} + +/* ---------------------------------------------------------------------- */ + +static void idma64_chan_init(struct idma64 *idma64, struct idma64_chan *idma64c) +{ + u32 cfghi = IDMA64C_CFGH_SRC_PER(1) | IDMA64C_CFGH_DST_PER(0); + u32 cfglo = 0; + + /* Set default burst alignment */ + cfglo |= IDMA64C_CFGL_DST_BURST_ALIGN | IDMA64C_CFGL_SRC_BURST_ALIGN; + + channel_writel(idma64c, CFG_LO, cfglo); + channel_writel(idma64c, CFG_HI, cfghi); + + /* Enable interrupts */ + channel_set_bit(idma64, MASK(XFER), idma64c->mask); + channel_set_bit(idma64, MASK(ERROR), idma64c->mask); + + /* + * Enforce the controller to be turned on. + * + * The iDMA is turned off in ->probe() and looses context during system + * suspend / resume cycle. That's why we have to enable it each time we + * use it. + */ + idma64_on(idma64); +} + +static void idma64_chan_stop(struct idma64 *idma64, struct idma64_chan *idma64c) +{ + channel_clear_bit(idma64, CH_EN, idma64c->mask); +} + +static void idma64_chan_start(struct idma64 *idma64, struct idma64_chan *idma64c) +{ + struct idma64_desc *desc = idma64c->desc; + struct idma64_hw_desc *hw = &desc->hw[0]; + + channel_writeq(idma64c, SAR, 0); + channel_writeq(idma64c, DAR, 0); + + channel_writel(idma64c, CTL_HI, IDMA64C_CTLH_BLOCK_TS(~0UL)); + channel_writel(idma64c, CTL_LO, IDMA64C_CTLL_LLP_S_EN | IDMA64C_CTLL_LLP_D_EN); + + channel_writeq(idma64c, LLP, hw->llp); + + channel_set_bit(idma64, CH_EN, idma64c->mask); +} + +static void idma64_stop_transfer(struct idma64_chan *idma64c) +{ + struct idma64 *idma64 = to_idma64(idma64c->vchan.chan.device); + + idma64_chan_stop(idma64, idma64c); +} + +static void idma64_start_transfer(struct idma64_chan *idma64c) +{ + struct idma64 *idma64 = to_idma64(idma64c->vchan.chan.device); + struct virt_dma_desc *vdesc; + + /* Get the next descriptor */ + vdesc = vchan_next_desc(&idma64c->vchan); + if (!vdesc) { + idma64c->desc = NULL; + return; + } + + list_del(&vdesc->node); + idma64c->desc = to_idma64_desc(vdesc); + + /* Configure the channel */ + idma64_chan_init(idma64, idma64c); + + /* Start the channel with a new descriptor */ + idma64_chan_start(idma64, idma64c); +} + +/* ---------------------------------------------------------------------- */ + +static void idma64_chan_irq(struct idma64 *idma64, unsigned short c, + u32 status_err, u32 status_xfer) +{ + struct idma64_chan *idma64c = &idma64->chan[c]; + struct idma64_desc *desc; + unsigned long flags; + + spin_lock_irqsave(&idma64c->vchan.lock, flags); + desc = idma64c->desc; + if (desc) { + if (status_err & (1 << c)) { + dma_writel(idma64, CLEAR(ERROR), idma64c->mask); + desc->status = DMA_ERROR; + } else if (status_xfer & (1 << c)) { + dma_writel(idma64, CLEAR(XFER), idma64c->mask); + desc->status = DMA_COMPLETE; + vchan_cookie_complete(&desc->vdesc); + idma64_start_transfer(idma64c); + } + + /* idma64_start_transfer() updates idma64c->desc */ + if (idma64c->desc == NULL || desc->status == DMA_ERROR) + idma64_stop_transfer(idma64c); + } + spin_unlock_irqrestore(&idma64c->vchan.lock, flags); +} + +static irqreturn_t idma64_irq(int irq, void *dev) +{ + struct idma64 *idma64 = dev; + u32 status = dma_readl(idma64, STATUS_INT); + u32 status_xfer; + u32 status_err; + unsigned short i; + + dev_vdbg(idma64->dma.dev, "%s: status=%#x\n", __func__, status); + + /* Check if we have any interrupt from the DMA controller */ + if (!status) + return IRQ_NONE; + + /* Disable interrupts */ + channel_clear_bit(idma64, MASK(XFER), idma64->all_chan_mask); + channel_clear_bit(idma64, MASK(ERROR), idma64->all_chan_mask); + + status_xfer = dma_readl(idma64, RAW(XFER)); + status_err = dma_readl(idma64, RAW(ERROR)); + + for (i = 0; i < idma64->dma.chancnt; i++) + idma64_chan_irq(idma64, i, status_err, status_xfer); + + /* Re-enable interrupts */ + channel_set_bit(idma64, MASK(XFER), idma64->all_chan_mask); + channel_set_bit(idma64, MASK(ERROR), idma64->all_chan_mask); + + return IRQ_HANDLED; +} + +/* ---------------------------------------------------------------------- */ + +static struct idma64_desc *idma64_alloc_desc(unsigned int ndesc) +{ + struct idma64_desc *desc; + + desc = kzalloc(sizeof(*desc), GFP_NOWAIT); + if (!desc) + return NULL; + + desc->hw = kcalloc(ndesc, sizeof(*desc->hw), GFP_NOWAIT); + if (!desc->hw) { + kfree(desc); + return NULL; + } + + return desc; +} + +static void idma64_desc_free(struct idma64_chan *idma64c, + struct idma64_desc *desc) +{ + struct idma64_hw_desc *hw; + + if (desc->ndesc) { + unsigned int i = desc->ndesc; + + do { + hw = &desc->hw[--i]; + dma_pool_free(idma64c->pool, hw->lli, hw->llp); + } while (i); + } + + kfree(desc->hw); + kfree(desc); +} + +static void idma64_vdesc_free(struct virt_dma_desc *vdesc) +{ + struct idma64_chan *idma64c = to_idma64_chan(vdesc->tx.chan); + + idma64_desc_free(idma64c, to_idma64_desc(vdesc)); +} + +static u64 idma64_hw_desc_fill(struct idma64_hw_desc *hw, + struct dma_slave_config *config, + enum dma_transfer_direction direction, u64 llp) +{ + struct idma64_lli *lli = hw->lli; + u64 sar, dar; + u32 ctlhi = IDMA64C_CTLH_BLOCK_TS(hw->len); + u32 ctllo = IDMA64C_CTLL_LLP_S_EN | IDMA64C_CTLL_LLP_D_EN; + u32 src_width, dst_width; + + if (direction == DMA_MEM_TO_DEV) { + sar = hw->phys; + dar = config->dst_addr; + ctllo |= IDMA64C_CTLL_DST_FIX | IDMA64C_CTLL_SRC_INC | + IDMA64C_CTLL_FC_M2P; + src_width = __ffs(sar | hw->len | 4); + dst_width = __ffs(config->dst_addr_width); + } else { /* DMA_DEV_TO_MEM */ + sar = config->src_addr; + dar = hw->phys; + ctllo |= IDMA64C_CTLL_DST_INC | IDMA64C_CTLL_SRC_FIX | + IDMA64C_CTLL_FC_P2M; + src_width = __ffs(config->src_addr_width); + dst_width = __ffs(dar | hw->len | 4); + } + + lli->sar = sar; + lli->dar = dar; + + lli->ctlhi = ctlhi; + lli->ctllo = ctllo | + IDMA64C_CTLL_SRC_MSIZE(config->src_maxburst) | + IDMA64C_CTLL_DST_MSIZE(config->dst_maxburst) | + IDMA64C_CTLL_DST_WIDTH(dst_width) | + IDMA64C_CTLL_SRC_WIDTH(src_width); + + lli->llp = llp; + return hw->llp; +} + +static void idma64_desc_fill(struct idma64_chan *idma64c, + struct idma64_desc *desc) +{ + struct dma_slave_config *config = &idma64c->config; + struct idma64_hw_desc *hw = &desc->hw[desc->ndesc - 1]; + struct idma64_lli *lli = hw->lli; + u64 llp = 0; + unsigned int i = desc->ndesc; + + /* Fill the hardware descriptors and link them to a list */ + do { + hw = &desc->hw[--i]; + llp = idma64_hw_desc_fill(hw, config, desc->direction, llp); + desc->length += hw->len; + } while (i); + + /* Trigger interrupt after last block */ + lli->ctllo |= IDMA64C_CTLL_INT_EN; +} + +static struct dma_async_tx_descriptor *idma64_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct idma64_chan *idma64c = to_idma64_chan(chan); + struct idma64_desc *desc; + struct scatterlist *sg; + unsigned int i; + + desc = idma64_alloc_desc(sg_len); + if (!desc) + return NULL; + + for_each_sg(sgl, sg, sg_len, i) { + struct idma64_hw_desc *hw = &desc->hw[i]; + + /* Allocate DMA capable memory for hardware descriptor */ + hw->lli = dma_pool_alloc(idma64c->pool, GFP_NOWAIT, &hw->llp); + if (!hw->lli) { + desc->ndesc = i; + idma64_desc_free(idma64c, desc); + return NULL; + } + + hw->phys = sg_dma_address(sg); + hw->len = sg_dma_len(sg); + } + + desc->ndesc = sg_len; + desc->direction = direction; + desc->status = DMA_IN_PROGRESS; + + idma64_desc_fill(idma64c, desc); + return vchan_tx_prep(&idma64c->vchan, &desc->vdesc, flags); +} + +static void idma64_issue_pending(struct dma_chan *chan) +{ + struct idma64_chan *idma64c = to_idma64_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&idma64c->vchan.lock, flags); + if (vchan_issue_pending(&idma64c->vchan) && !idma64c->desc) + idma64_start_transfer(idma64c); + spin_unlock_irqrestore(&idma64c->vchan.lock, flags); +} + +static size_t idma64_active_desc_size(struct idma64_chan *idma64c) +{ + struct idma64_desc *desc = idma64c->desc; + struct idma64_hw_desc *hw; + size_t bytes = desc->length; + u64 llp = channel_readq(idma64c, LLP); + u32 ctlhi = channel_readl(idma64c, CTL_HI); + unsigned int i = 0; + + do { + hw = &desc->hw[i]; + if (hw->llp == llp) + break; + bytes -= hw->len; + } while (++i < desc->ndesc); + + if (!i) + return bytes; + + /* The current chunk is not fully transfered yet */ + bytes += desc->hw[--i].len; + + return bytes - IDMA64C_CTLH_BLOCK_TS(ctlhi); +} + +static enum dma_status idma64_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, struct dma_tx_state *state) +{ + struct idma64_chan *idma64c = to_idma64_chan(chan); + struct virt_dma_desc *vdesc; + enum dma_status status; + size_t bytes; + unsigned long flags; + + status = dma_cookie_status(chan, cookie, state); + if (status == DMA_COMPLETE) + return status; + + spin_lock_irqsave(&idma64c->vchan.lock, flags); + vdesc = vchan_find_desc(&idma64c->vchan, cookie); + if (idma64c->desc && cookie == idma64c->desc->vdesc.tx.cookie) { + bytes = idma64_active_desc_size(idma64c); + dma_set_residue(state, bytes); + status = idma64c->desc->status; + } else if (vdesc) { + bytes = to_idma64_desc(vdesc)->length; + dma_set_residue(state, bytes); + } + spin_unlock_irqrestore(&idma64c->vchan.lock, flags); + + return status; +} + +static void convert_burst(u32 *maxburst) +{ + if (*maxburst) + *maxburst = __fls(*maxburst); + else + *maxburst = 0; +} + +static int idma64_slave_config(struct dma_chan *chan, + struct dma_slave_config *config) +{ + struct idma64_chan *idma64c = to_idma64_chan(chan); + + /* Check if chan will be configured for slave transfers */ + if (!is_slave_direction(config->direction)) + return -EINVAL; + + memcpy(&idma64c->config, config, sizeof(idma64c->config)); + + convert_burst(&idma64c->config.src_maxburst); + convert_burst(&idma64c->config.dst_maxburst); + + return 0; +} + +static void idma64_chan_deactivate(struct idma64_chan *idma64c, bool drain) +{ + unsigned short count = 100; + u32 cfglo; + + cfglo = channel_readl(idma64c, CFG_LO); + if (drain) + cfglo |= IDMA64C_CFGL_CH_DRAIN; + else + cfglo &= ~IDMA64C_CFGL_CH_DRAIN; + + channel_writel(idma64c, CFG_LO, cfglo | IDMA64C_CFGL_CH_SUSP); + do { + udelay(1); + cfglo = channel_readl(idma64c, CFG_LO); + } while (!(cfglo & IDMA64C_CFGL_FIFO_EMPTY) && --count); +} + +static void idma64_chan_activate(struct idma64_chan *idma64c) +{ + u32 cfglo; + + cfglo = channel_readl(idma64c, CFG_LO); + channel_writel(idma64c, CFG_LO, cfglo & ~IDMA64C_CFGL_CH_SUSP); +} + +static int idma64_pause(struct dma_chan *chan) +{ + struct idma64_chan *idma64c = to_idma64_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&idma64c->vchan.lock, flags); + if (idma64c->desc && idma64c->desc->status == DMA_IN_PROGRESS) { + idma64_chan_deactivate(idma64c, false); + idma64c->desc->status = DMA_PAUSED; + } + spin_unlock_irqrestore(&idma64c->vchan.lock, flags); + + return 0; +} + +static int idma64_resume(struct dma_chan *chan) +{ + struct idma64_chan *idma64c = to_idma64_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&idma64c->vchan.lock, flags); + if (idma64c->desc && idma64c->desc->status == DMA_PAUSED) { + idma64c->desc->status = DMA_IN_PROGRESS; + idma64_chan_activate(idma64c); + } + spin_unlock_irqrestore(&idma64c->vchan.lock, flags); + + return 0; +} + +static int idma64_terminate_all(struct dma_chan *chan) +{ + struct idma64_chan *idma64c = to_idma64_chan(chan); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&idma64c->vchan.lock, flags); + idma64_chan_deactivate(idma64c, true); + idma64_stop_transfer(idma64c); + if (idma64c->desc) { + idma64_vdesc_free(&idma64c->desc->vdesc); + idma64c->desc = NULL; + } + vchan_get_all_descriptors(&idma64c->vchan, &head); + spin_unlock_irqrestore(&idma64c->vchan.lock, flags); + + vchan_dma_desc_free_list(&idma64c->vchan, &head); + return 0; +} + +static int idma64_alloc_chan_resources(struct dma_chan *chan) +{ + struct idma64_chan *idma64c = to_idma64_chan(chan); + + /* Create a pool of consistent memory blocks for hardware descriptors */ + idma64c->pool = dma_pool_create(dev_name(chan2dev(chan)), + chan->device->dev, + sizeof(struct idma64_lli), 8, 0); + if (!idma64c->pool) { + dev_err(chan2dev(chan), "No memory for descriptors\n"); + return -ENOMEM; + } + + return 0; +} + +static void idma64_free_chan_resources(struct dma_chan *chan) +{ + struct idma64_chan *idma64c = to_idma64_chan(chan); + + vchan_free_chan_resources(to_virt_chan(chan)); + dma_pool_destroy(idma64c->pool); + idma64c->pool = NULL; +} + +/* ---------------------------------------------------------------------- */ + +#define IDMA64_BUSWIDTHS \ + BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) + +static int idma64_probe(struct idma64_chip *chip) +{ + struct idma64 *idma64; + unsigned short nr_chan = IDMA64_NR_CHAN; + unsigned short i; + int ret; + + idma64 = devm_kzalloc(chip->dev, sizeof(*idma64), GFP_KERNEL); + if (!idma64) + return -ENOMEM; + + idma64->regs = chip->regs; + chip->idma64 = idma64; + + idma64->chan = devm_kcalloc(chip->dev, nr_chan, sizeof(*idma64->chan), + GFP_KERNEL); + if (!idma64->chan) + return -ENOMEM; + + idma64->all_chan_mask = (1 << nr_chan) - 1; + + /* Turn off iDMA controller */ + idma64_off(idma64); + + ret = devm_request_irq(chip->dev, chip->irq, idma64_irq, IRQF_SHARED, + dev_name(chip->dev), idma64); + if (ret) + return ret; + + INIT_LIST_HEAD(&idma64->dma.channels); + for (i = 0; i < nr_chan; i++) { + struct idma64_chan *idma64c = &idma64->chan[i]; + + idma64c->vchan.desc_free = idma64_vdesc_free; + vchan_init(&idma64c->vchan, &idma64->dma); + + idma64c->regs = idma64->regs + i * IDMA64_CH_LENGTH; + idma64c->mask = BIT(i); + } + + dma_cap_set(DMA_SLAVE, idma64->dma.cap_mask); + dma_cap_set(DMA_PRIVATE, idma64->dma.cap_mask); + + idma64->dma.device_alloc_chan_resources = idma64_alloc_chan_resources; + idma64->dma.device_free_chan_resources = idma64_free_chan_resources; + + idma64->dma.device_prep_slave_sg = idma64_prep_slave_sg; + + idma64->dma.device_issue_pending = idma64_issue_pending; + idma64->dma.device_tx_status = idma64_tx_status; + + idma64->dma.device_config = idma64_slave_config; + idma64->dma.device_pause = idma64_pause; + idma64->dma.device_resume = idma64_resume; + idma64->dma.device_terminate_all = idma64_terminate_all; + + idma64->dma.src_addr_widths = IDMA64_BUSWIDTHS; + idma64->dma.dst_addr_widths = IDMA64_BUSWIDTHS; + idma64->dma.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + idma64->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + + idma64->dma.dev = chip->dev; + + ret = dma_async_device_register(&idma64->dma); + if (ret) + return ret; + + dev_info(chip->dev, "Found Intel integrated DMA 64-bit\n"); + return 0; +} + +static int idma64_remove(struct idma64_chip *chip) +{ + struct idma64 *idma64 = chip->idma64; + unsigned short i; + + dma_async_device_unregister(&idma64->dma); + + /* + * Explicitly call devm_request_irq() to avoid the side effects with + * the scheduled tasklets. + */ + devm_free_irq(chip->dev, chip->irq, idma64); + + for (i = 0; i < idma64->dma.chancnt; i++) { + struct idma64_chan *idma64c = &idma64->chan[i]; + + tasklet_kill(&idma64c->vchan.task); + } + + return 0; +} + +/* ---------------------------------------------------------------------- */ + +static int idma64_platform_probe(struct platform_device *pdev) +{ + struct idma64_chip *chip; + struct device *dev = &pdev->dev; + struct resource *mem; + int ret; + + chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; + + chip->irq = platform_get_irq(pdev, 0); + if (chip->irq < 0) + return chip->irq; + + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + chip->regs = devm_ioremap_resource(dev, mem); + if (IS_ERR(chip->regs)) + return PTR_ERR(chip->regs); + + ret = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (ret) + return ret; + + chip->dev = dev; + + ret = idma64_probe(chip); + if (ret) + return ret; + + platform_set_drvdata(pdev, chip); + return 0; +} + +static int idma64_platform_remove(struct platform_device *pdev) +{ + struct idma64_chip *chip = platform_get_drvdata(pdev); + + return idma64_remove(chip); +} + +#ifdef CONFIG_PM_SLEEP + +static int idma64_pm_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct idma64_chip *chip = platform_get_drvdata(pdev); + + idma64_off(chip->idma64); + return 0; +} + +static int idma64_pm_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct idma64_chip *chip = platform_get_drvdata(pdev); + + idma64_on(chip->idma64); + return 0; +} + +#endif /* CONFIG_PM_SLEEP */ + +static const struct dev_pm_ops idma64_dev_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(idma64_pm_suspend, idma64_pm_resume) +}; + +static struct platform_driver idma64_platform_driver = { + .probe = idma64_platform_probe, + .remove = idma64_platform_remove, + .driver = { + .name = DRV_NAME, + .pm = &idma64_dev_pm_ops, + }, +}; + +module_platform_driver(idma64_platform_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("iDMA64 core driver"); +MODULE_AUTHOR("Andy Shevchenko <andriy.shevchenko@linux.intel.com>"); +MODULE_ALIAS("platform:" DRV_NAME); diff --git a/kernel/drivers/dma/idma64.h b/kernel/drivers/dma/idma64.h new file mode 100644 index 000000000..f6aeff0af --- /dev/null +++ b/kernel/drivers/dma/idma64.h @@ -0,0 +1,229 @@ +/* + * Driver for the Intel integrated DMA 64-bit + * + * Copyright (C) 2015 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __DMA_IDMA64_H__ +#define __DMA_IDMA64_H__ + +#include <linux/device.h> +#include <linux/io.h> +#include <linux/spinlock.h> +#include <linux/types.h> + +#include <asm-generic/io-64-nonatomic-lo-hi.h> + +#include "virt-dma.h" + +/* Channel registers */ + +#define IDMA64_CH_SAR 0x00 /* Source Address Register */ +#define IDMA64_CH_DAR 0x08 /* Destination Address Register */ +#define IDMA64_CH_LLP 0x10 /* Linked List Pointer */ +#define IDMA64_CH_CTL_LO 0x18 /* Control Register Low */ +#define IDMA64_CH_CTL_HI 0x1c /* Control Register High */ +#define IDMA64_CH_SSTAT 0x20 +#define IDMA64_CH_DSTAT 0x28 +#define IDMA64_CH_SSTATAR 0x30 +#define IDMA64_CH_DSTATAR 0x38 +#define IDMA64_CH_CFG_LO 0x40 /* Configuration Register Low */ +#define IDMA64_CH_CFG_HI 0x44 /* Configuration Register High */ +#define IDMA64_CH_SGR 0x48 +#define IDMA64_CH_DSR 0x50 + +#define IDMA64_CH_LENGTH 0x58 + +/* Bitfields in CTL_LO */ +#define IDMA64C_CTLL_INT_EN (1 << 0) /* irqs enabled? */ +#define IDMA64C_CTLL_DST_WIDTH(x) ((x) << 1) /* bytes per element */ +#define IDMA64C_CTLL_SRC_WIDTH(x) ((x) << 4) +#define IDMA64C_CTLL_DST_INC (0 << 8) /* DAR update/not */ +#define IDMA64C_CTLL_DST_FIX (1 << 8) +#define IDMA64C_CTLL_SRC_INC (0 << 10) /* SAR update/not */ +#define IDMA64C_CTLL_SRC_FIX (1 << 10) +#define IDMA64C_CTLL_DST_MSIZE(x) ((x) << 11) /* burst, #elements */ +#define IDMA64C_CTLL_SRC_MSIZE(x) ((x) << 14) +#define IDMA64C_CTLL_FC_M2P (1 << 20) /* mem-to-periph */ +#define IDMA64C_CTLL_FC_P2M (2 << 20) /* periph-to-mem */ +#define IDMA64C_CTLL_LLP_D_EN (1 << 27) /* dest block chain */ +#define IDMA64C_CTLL_LLP_S_EN (1 << 28) /* src block chain */ + +/* Bitfields in CTL_HI */ +#define IDMA64C_CTLH_BLOCK_TS(x) ((x) & ((1 << 17) - 1)) +#define IDMA64C_CTLH_DONE (1 << 17) + +/* Bitfields in CFG_LO */ +#define IDMA64C_CFGL_DST_BURST_ALIGN (1 << 0) /* dst burst align */ +#define IDMA64C_CFGL_SRC_BURST_ALIGN (1 << 1) /* src burst align */ +#define IDMA64C_CFGL_CH_SUSP (1 << 8) +#define IDMA64C_CFGL_FIFO_EMPTY (1 << 9) +#define IDMA64C_CFGL_CH_DRAIN (1 << 10) /* drain FIFO */ +#define IDMA64C_CFGL_DST_OPT_BL (1 << 20) /* optimize dst burst length */ +#define IDMA64C_CFGL_SRC_OPT_BL (1 << 21) /* optimize src burst length */ + +/* Bitfields in CFG_HI */ +#define IDMA64C_CFGH_SRC_PER(x) ((x) << 0) /* src peripheral */ +#define IDMA64C_CFGH_DST_PER(x) ((x) << 4) /* dst peripheral */ +#define IDMA64C_CFGH_RD_ISSUE_THD(x) ((x) << 8) +#define IDMA64C_CFGH_RW_ISSUE_THD(x) ((x) << 18) + +/* Interrupt registers */ + +#define IDMA64_INT_XFER 0x00 +#define IDMA64_INT_BLOCK 0x08 +#define IDMA64_INT_SRC_TRAN 0x10 +#define IDMA64_INT_DST_TRAN 0x18 +#define IDMA64_INT_ERROR 0x20 + +#define IDMA64_RAW(x) (0x2c0 + IDMA64_INT_##x) /* r */ +#define IDMA64_STATUS(x) (0x2e8 + IDMA64_INT_##x) /* r (raw & mask) */ +#define IDMA64_MASK(x) (0x310 + IDMA64_INT_##x) /* rw (set = irq enabled) */ +#define IDMA64_CLEAR(x) (0x338 + IDMA64_INT_##x) /* w (ack, affects "raw") */ + +/* Common registers */ + +#define IDMA64_STATUS_INT 0x360 /* r */ +#define IDMA64_CFG 0x398 +#define IDMA64_CH_EN 0x3a0 + +/* Bitfields in CFG */ +#define IDMA64_CFG_DMA_EN (1 << 0) + +/* Hardware descriptor for Linked LIst transfers */ +struct idma64_lli { + u64 sar; + u64 dar; + u64 llp; + u32 ctllo; + u32 ctlhi; + u32 sstat; + u32 dstat; +}; + +struct idma64_hw_desc { + struct idma64_lli *lli; + dma_addr_t llp; + dma_addr_t phys; + unsigned int len; +}; + +struct idma64_desc { + struct virt_dma_desc vdesc; + enum dma_transfer_direction direction; + struct idma64_hw_desc *hw; + unsigned int ndesc; + size_t length; + enum dma_status status; +}; + +static inline struct idma64_desc *to_idma64_desc(struct virt_dma_desc *vdesc) +{ + return container_of(vdesc, struct idma64_desc, vdesc); +} + +struct idma64_chan { + struct virt_dma_chan vchan; + + void __iomem *regs; + + /* hardware configuration */ + enum dma_transfer_direction direction; + unsigned int mask; + struct dma_slave_config config; + + void *pool; + struct idma64_desc *desc; +}; + +static inline struct idma64_chan *to_idma64_chan(struct dma_chan *chan) +{ + return container_of(chan, struct idma64_chan, vchan.chan); +} + +#define channel_set_bit(idma64, reg, mask) \ + dma_writel(idma64, reg, ((mask) << 8) | (mask)) +#define channel_clear_bit(idma64, reg, mask) \ + dma_writel(idma64, reg, ((mask) << 8) | 0) + +static inline u32 idma64c_readl(struct idma64_chan *idma64c, int offset) +{ + return readl(idma64c->regs + offset); +} + +static inline void idma64c_writel(struct idma64_chan *idma64c, int offset, + u32 value) +{ + writel(value, idma64c->regs + offset); +} + +#define channel_readl(idma64c, reg) \ + idma64c_readl(idma64c, IDMA64_CH_##reg) +#define channel_writel(idma64c, reg, value) \ + idma64c_writel(idma64c, IDMA64_CH_##reg, (value)) + +static inline u64 idma64c_readq(struct idma64_chan *idma64c, int offset) +{ + return lo_hi_readq(idma64c->regs + offset); +} + +static inline void idma64c_writeq(struct idma64_chan *idma64c, int offset, + u64 value) +{ + lo_hi_writeq(value, idma64c->regs + offset); +} + +#define channel_readq(idma64c, reg) \ + idma64c_readq(idma64c, IDMA64_CH_##reg) +#define channel_writeq(idma64c, reg, value) \ + idma64c_writeq(idma64c, IDMA64_CH_##reg, (value)) + +struct idma64 { + struct dma_device dma; + + void __iomem *regs; + + /* channels */ + unsigned short all_chan_mask; + struct idma64_chan *chan; +}; + +static inline struct idma64 *to_idma64(struct dma_device *ddev) +{ + return container_of(ddev, struct idma64, dma); +} + +static inline u32 idma64_readl(struct idma64 *idma64, int offset) +{ + return readl(idma64->regs + offset); +} + +static inline void idma64_writel(struct idma64 *idma64, int offset, u32 value) +{ + writel(value, idma64->regs + offset); +} + +#define dma_readl(idma64, reg) \ + idma64_readl(idma64, IDMA64_##reg) +#define dma_writel(idma64, reg, value) \ + idma64_writel(idma64, IDMA64_##reg, (value)) + +/** + * struct idma64_chip - representation of iDMA 64-bit controller hardware + * @dev: struct device of the DMA controller + * @irq: irq line + * @regs: memory mapped I/O space + * @idma64: struct idma64 that is filed by idma64_probe() + */ +struct idma64_chip { + struct device *dev; + int irq; + void __iomem *regs; + struct idma64 *idma64; +}; + +#endif /* __DMA_IDMA64_H__ */ diff --git a/kernel/drivers/dma/imx-dma.c b/kernel/drivers/dma/imx-dma.c index eed405976..48d85f8b9 100644 --- a/kernel/drivers/dma/imx-dma.c +++ b/kernel/drivers/dma/imx-dma.c @@ -193,7 +193,7 @@ struct imxdma_filter_data { int request; }; -static struct platform_device_id imx_dma_devtype[] = { +static const struct platform_device_id imx_dma_devtype[] = { { .name = "imx1-dma", .driver_data = IMX1_DMA, @@ -1083,8 +1083,12 @@ static int __init imxdma_probe(struct platform_device *pdev) if (IS_ERR(imxdma->dma_ahb)) return PTR_ERR(imxdma->dma_ahb); - clk_prepare_enable(imxdma->dma_ipg); - clk_prepare_enable(imxdma->dma_ahb); + ret = clk_prepare_enable(imxdma->dma_ipg); + if (ret) + return ret; + ret = clk_prepare_enable(imxdma->dma_ahb); + if (ret) + goto disable_dma_ipg_clk; /* reset DMA module */ imx_dmav1_writel(imxdma, DCR_DRST, DMA_DCR); @@ -1094,20 +1098,20 @@ static int __init imxdma_probe(struct platform_device *pdev) dma_irq_handler, 0, "DMA", imxdma); if (ret) { dev_warn(imxdma->dev, "Can't register IRQ for DMA\n"); - goto err; + goto disable_dma_ahb_clk; } irq_err = platform_get_irq(pdev, 1); if (irq_err < 0) { ret = irq_err; - goto err; + goto disable_dma_ahb_clk; } ret = devm_request_irq(&pdev->dev, irq_err, imxdma_err_handler, 0, "DMA", imxdma); if (ret) { dev_warn(imxdma->dev, "Can't register ERRIRQ for DMA\n"); - goto err; + goto disable_dma_ahb_clk; } } @@ -1144,7 +1148,7 @@ static int __init imxdma_probe(struct platform_device *pdev) dev_warn(imxdma->dev, "Can't register IRQ %d " "for DMA channel %d\n", irq + i, i); - goto err; + goto disable_dma_ahb_clk; } init_timer(&imxdmac->watchdog); imxdmac->watchdog.function = &imxdma_watchdog; @@ -1183,14 +1187,14 @@ static int __init imxdma_probe(struct platform_device *pdev) platform_set_drvdata(pdev, imxdma); - imxdma->dma_device.copy_align = 2; /* 2^2 = 4 bytes alignment */ + imxdma->dma_device.copy_align = DMAENGINE_ALIGN_4_BYTES; imxdma->dma_device.dev->dma_parms = &imxdma->dma_parms; dma_set_max_seg_size(imxdma->dma_device.dev, 0xffffff); ret = dma_async_device_register(&imxdma->dma_device); if (ret) { dev_err(&pdev->dev, "unable to register\n"); - goto err; + goto disable_dma_ahb_clk; } if (pdev->dev.of_node) { @@ -1206,9 +1210,10 @@ static int __init imxdma_probe(struct platform_device *pdev) err_of_dma_controller: dma_async_device_unregister(&imxdma->dma_device); -err: - clk_disable_unprepare(imxdma->dma_ipg); +disable_dma_ahb_clk: clk_disable_unprepare(imxdma->dma_ahb); +disable_dma_ipg_clk: + clk_disable_unprepare(imxdma->dma_ipg); return ret; } diff --git a/kernel/drivers/dma/imx-sdma.c b/kernel/drivers/dma/imx-sdma.c index 62bbd7933..0f6fd42f5 100644 --- a/kernel/drivers/dma/imx-sdma.c +++ b/kernel/drivers/dma/imx-sdma.c @@ -35,12 +35,16 @@ #include <linux/platform_device.h> #include <linux/dmaengine.h> #include <linux/of.h> +#include <linux/of_address.h> #include <linux/of_device.h> #include <linux/of_dma.h> #include <asm/irq.h> #include <linux/platform_data/dma-imx-sdma.h> #include <linux/platform_data/dma-imx.h> +#include <linux/regmap.h> +#include <linux/mfd/syscon.h> +#include <linux/mfd/syscon/imx6q-iomuxc-gpr.h> #include "dmaengine.h" @@ -124,6 +128,56 @@ #define CHANGE_ENDIANNESS 0x80 /* + * p_2_p watermark_level description + * Bits Name Description + * 0-7 Lower WML Lower watermark level + * 8 PS 1: Pad Swallowing + * 0: No Pad Swallowing + * 9 PA 1: Pad Adding + * 0: No Pad Adding + * 10 SPDIF If this bit is set both source + * and destination are on SPBA + * 11 Source Bit(SP) 1: Source on SPBA + * 0: Source on AIPS + * 12 Destination Bit(DP) 1: Destination on SPBA + * 0: Destination on AIPS + * 13-15 --------- MUST BE 0 + * 16-23 Higher WML HWML + * 24-27 N Total number of samples after + * which Pad adding/Swallowing + * must be done. It must be odd. + * 28 Lower WML Event(LWE) SDMA events reg to check for + * LWML event mask + * 0: LWE in EVENTS register + * 1: LWE in EVENTS2 register + * 29 Higher WML Event(HWE) SDMA events reg to check for + * HWML event mask + * 0: HWE in EVENTS register + * 1: HWE in EVENTS2 register + * 30 --------- MUST BE 0 + * 31 CONT 1: Amount of samples to be + * transferred is unknown and + * script will keep on + * transferring samples as long as + * both events are detected and + * script must be manually stopped + * by the application + * 0: The amount of samples to be + * transferred is equal to the + * count field of mode word + */ +#define SDMA_WATERMARK_LEVEL_LWML 0xFF +#define SDMA_WATERMARK_LEVEL_PS BIT(8) +#define SDMA_WATERMARK_LEVEL_PA BIT(9) +#define SDMA_WATERMARK_LEVEL_SPDIF BIT(10) +#define SDMA_WATERMARK_LEVEL_SP BIT(11) +#define SDMA_WATERMARK_LEVEL_DP BIT(12) +#define SDMA_WATERMARK_LEVEL_HWML (0xFF << 16) +#define SDMA_WATERMARK_LEVEL_LWE BIT(28) +#define SDMA_WATERMARK_LEVEL_HWE BIT(29) +#define SDMA_WATERMARK_LEVEL_CONT BIT(31) + +/* * Mode/Count of data node descriptors - IPCv2 */ struct sdma_mode_count { @@ -259,8 +313,9 @@ struct sdma_channel { struct sdma_buffer_descriptor *bd; dma_addr_t bd_phys; unsigned int pc_from_device, pc_to_device; + unsigned int device_to_device; unsigned long flags; - dma_addr_t per_address; + dma_addr_t per_address, per_address2; unsigned long event_mask[2]; unsigned long watermark_level; u32 shp_addr, per_addr; @@ -328,6 +383,8 @@ struct sdma_engine { u32 script_number; struct sdma_script_start_addrs *script_addrs; const struct sdma_driver_data *drvdata; + u32 spba_start_addr; + u32 spba_end_addr; }; static struct sdma_driver_data sdma_imx31 = { @@ -420,7 +477,7 @@ static struct sdma_driver_data sdma_imx6q = { .script_addrs = &sdma_script_imx6q, }; -static struct platform_device_id sdma_devtypes[] = { +static const struct platform_device_id sdma_devtypes[] = { { .name = "imx25-sdma", .driver_data = (unsigned long)&sdma_imx25, @@ -705,6 +762,7 @@ static void sdma_get_pc(struct sdma_channel *sdmac, sdmac->pc_from_device = 0; sdmac->pc_to_device = 0; + sdmac->device_to_device = 0; switch (peripheral_type) { case IMX_DMATYPE_MEMORY: @@ -780,6 +838,7 @@ static void sdma_get_pc(struct sdma_channel *sdmac, sdmac->pc_from_device = per_2_emi; sdmac->pc_to_device = emi_2_per; + sdmac->device_to_device = per_2_per; } static int sdma_load_context(struct sdma_channel *sdmac) @@ -792,11 +851,12 @@ static int sdma_load_context(struct sdma_channel *sdmac) int ret; unsigned long flags; - if (sdmac->direction == DMA_DEV_TO_MEM) { + if (sdmac->direction == DMA_DEV_TO_MEM) load_address = sdmac->pc_from_device; - } else { + else if (sdmac->direction == DMA_DEV_TO_DEV) + load_address = sdmac->device_to_device; + else load_address = sdmac->pc_to_device; - } if (load_address < 0) return load_address; @@ -851,6 +911,46 @@ static int sdma_disable_channel(struct dma_chan *chan) return 0; } +static void sdma_set_watermarklevel_for_p2p(struct sdma_channel *sdmac) +{ + struct sdma_engine *sdma = sdmac->sdma; + + int lwml = sdmac->watermark_level & SDMA_WATERMARK_LEVEL_LWML; + int hwml = (sdmac->watermark_level & SDMA_WATERMARK_LEVEL_HWML) >> 16; + + set_bit(sdmac->event_id0 % 32, &sdmac->event_mask[1]); + set_bit(sdmac->event_id1 % 32, &sdmac->event_mask[0]); + + if (sdmac->event_id0 > 31) + sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_LWE; + + if (sdmac->event_id1 > 31) + sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_HWE; + + /* + * If LWML(src_maxburst) > HWML(dst_maxburst), we need + * swap LWML and HWML of INFO(A.3.2.5.1), also need swap + * r0(event_mask[1]) and r1(event_mask[0]). + */ + if (lwml > hwml) { + sdmac->watermark_level &= ~(SDMA_WATERMARK_LEVEL_LWML | + SDMA_WATERMARK_LEVEL_HWML); + sdmac->watermark_level |= hwml; + sdmac->watermark_level |= lwml << 16; + swap(sdmac->event_mask[0], sdmac->event_mask[1]); + } + + if (sdmac->per_address2 >= sdma->spba_start_addr && + sdmac->per_address2 <= sdma->spba_end_addr) + sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_SP; + + if (sdmac->per_address >= sdma->spba_start_addr && + sdmac->per_address <= sdma->spba_end_addr) + sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_DP; + + sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_CONT; +} + static int sdma_config_channel(struct dma_chan *chan) { struct sdma_channel *sdmac = to_sdma_chan(chan); @@ -869,6 +969,12 @@ static int sdma_config_channel(struct dma_chan *chan) sdma_event_enable(sdmac, sdmac->event_id0); } + if (sdmac->event_id1) { + if (sdmac->event_id1 >= sdmac->sdma->drvdata->num_events) + return -EINVAL; + sdma_event_enable(sdmac, sdmac->event_id1); + } + switch (sdmac->peripheral_type) { case IMX_DMATYPE_DSP: sdma_config_ownership(sdmac, false, true, true); @@ -887,19 +993,17 @@ static int sdma_config_channel(struct dma_chan *chan) (sdmac->peripheral_type != IMX_DMATYPE_DSP)) { /* Handle multiple event channels differently */ if (sdmac->event_id1) { - sdmac->event_mask[1] = BIT(sdmac->event_id1 % 32); - if (sdmac->event_id1 > 31) - __set_bit(31, &sdmac->watermark_level); - sdmac->event_mask[0] = BIT(sdmac->event_id0 % 32); - if (sdmac->event_id0 > 31) - __set_bit(30, &sdmac->watermark_level); - } else { + if (sdmac->peripheral_type == IMX_DMATYPE_ASRC_SP || + sdmac->peripheral_type == IMX_DMATYPE_ASRC) + sdma_set_watermarklevel_for_p2p(sdmac); + } else __set_bit(sdmac->event_id0, sdmac->event_mask); - } + /* Watermark Level */ sdmac->watermark_level |= sdmac->watermark_level; /* Address */ sdmac->shp_addr = sdmac->per_address; + sdmac->per_addr = sdmac->per_address2; } else { sdmac->watermark_level = 0; /* FIXME: M3_BASE_ADDRESS */ } @@ -987,17 +1091,22 @@ static int sdma_alloc_chan_resources(struct dma_chan *chan) sdmac->peripheral_type = data->peripheral_type; sdmac->event_id0 = data->dma_request; + sdmac->event_id1 = data->dma_request2; - clk_enable(sdmac->sdma->clk_ipg); - clk_enable(sdmac->sdma->clk_ahb); + ret = clk_enable(sdmac->sdma->clk_ipg); + if (ret) + return ret; + ret = clk_enable(sdmac->sdma->clk_ahb); + if (ret) + goto disable_clk_ipg; ret = sdma_request_channel(sdmac); if (ret) - return ret; + goto disable_clk_ahb; ret = sdma_set_channel_priority(sdmac, prio); if (ret) - return ret; + goto disable_clk_ahb; dma_async_tx_descriptor_init(&sdmac->desc, chan); sdmac->desc.tx_submit = sdma_tx_submit; @@ -1005,6 +1114,12 @@ static int sdma_alloc_chan_resources(struct dma_chan *chan) sdmac->desc.flags = DMA_CTRL_ACK; return 0; + +disable_clk_ahb: + clk_disable(sdmac->sdma->clk_ahb); +disable_clk_ipg: + clk_disable(sdmac->sdma->clk_ipg); + return ret; } static void sdma_free_chan_resources(struct dma_chan *chan) @@ -1221,6 +1336,14 @@ static int sdma_config(struct dma_chan *chan, sdmac->watermark_level = dmaengine_cfg->src_maxburst * dmaengine_cfg->src_addr_width; sdmac->word_size = dmaengine_cfg->src_addr_width; + } else if (dmaengine_cfg->direction == DMA_DEV_TO_DEV) { + sdmac->per_address2 = dmaengine_cfg->src_addr; + sdmac->per_address = dmaengine_cfg->dst_addr; + sdmac->watermark_level = dmaengine_cfg->src_maxburst & + SDMA_WATERMARK_LEVEL_LWML; + sdmac->watermark_level |= (dmaengine_cfg->dst_maxburst << 16) & + SDMA_WATERMARK_LEVEL_HWML; + sdmac->word_size = dmaengine_cfg->dst_addr_width; } else { sdmac->per_address = dmaengine_cfg->dst_addr; sdmac->watermark_level = dmaengine_cfg->dst_maxburst * @@ -1337,6 +1460,72 @@ err_firmware: release_firmware(fw); } +#define EVENT_REMAP_CELLS 3 + +static int sdma_event_remap(struct sdma_engine *sdma) +{ + struct device_node *np = sdma->dev->of_node; + struct device_node *gpr_np = of_parse_phandle(np, "gpr", 0); + struct property *event_remap; + struct regmap *gpr; + char propname[] = "fsl,sdma-event-remap"; + u32 reg, val, shift, num_map, i; + int ret = 0; + + if (IS_ERR(np) || IS_ERR(gpr_np)) + goto out; + + event_remap = of_find_property(np, propname, NULL); + num_map = event_remap ? (event_remap->length / sizeof(u32)) : 0; + if (!num_map) { + dev_dbg(sdma->dev, "no event needs to be remapped\n"); + goto out; + } else if (num_map % EVENT_REMAP_CELLS) { + dev_err(sdma->dev, "the property %s must modulo %d\n", + propname, EVENT_REMAP_CELLS); + ret = -EINVAL; + goto out; + } + + gpr = syscon_node_to_regmap(gpr_np); + if (IS_ERR(gpr)) { + dev_err(sdma->dev, "failed to get gpr regmap\n"); + ret = PTR_ERR(gpr); + goto out; + } + + for (i = 0; i < num_map; i += EVENT_REMAP_CELLS) { + ret = of_property_read_u32_index(np, propname, i, ®); + if (ret) { + dev_err(sdma->dev, "failed to read property %s index %d\n", + propname, i); + goto out; + } + + ret = of_property_read_u32_index(np, propname, i + 1, &shift); + if (ret) { + dev_err(sdma->dev, "failed to read property %s index %d\n", + propname, i + 1); + goto out; + } + + ret = of_property_read_u32_index(np, propname, i + 2, &val); + if (ret) { + dev_err(sdma->dev, "failed to read property %s index %d\n", + propname, i + 2); + goto out; + } + + regmap_update_bits(gpr, reg, BIT(shift), val << shift); + } + +out: + if (!IS_ERR(gpr_np)) + of_node_put(gpr_np); + + return ret; +} + static int sdma_get_firmware(struct sdma_engine *sdma, const char *fw_name) { @@ -1354,8 +1543,12 @@ static int sdma_init(struct sdma_engine *sdma) int i, ret; dma_addr_t ccb_phys; - clk_enable(sdma->clk_ipg); - clk_enable(sdma->clk_ahb); + ret = clk_enable(sdma->clk_ipg); + if (ret) + return ret; + ret = clk_enable(sdma->clk_ahb); + if (ret) + goto disable_clk_ipg; /* Be sure SDMA has not started yet */ writel_relaxed(0, sdma->regs + SDMA_H_C0PTR); @@ -1411,8 +1604,9 @@ static int sdma_init(struct sdma_engine *sdma) return 0; err_dma_alloc: - clk_disable(sdma->clk_ipg); clk_disable(sdma->clk_ahb); +disable_clk_ipg: + clk_disable(sdma->clk_ipg); dev_err(sdma->dev, "initialisation failed with %d\n", ret); return ret; } @@ -1444,6 +1638,14 @@ static struct dma_chan *sdma_xlate(struct of_phandle_args *dma_spec, data.dma_request = dma_spec->args[0]; data.peripheral_type = dma_spec->args[1]; data.priority = dma_spec->args[2]; + /* + * init dma_request2 to zero, which is not used by the dts. + * For P2P, dma_request2 is init from dma_request_channel(), + * chan->private will point to the imx_dma_data, and in + * device_alloc_chan_resources(), imx_dma_data.dma_request2 will + * be set to sdmac->event_id1. + */ + data.dma_request2 = 0; return dma_request_channel(mask, sdma_filter_fn, &data); } @@ -1453,10 +1655,12 @@ static int sdma_probe(struct platform_device *pdev) const struct of_device_id *of_id = of_match_device(sdma_dt_ids, &pdev->dev); struct device_node *np = pdev->dev.of_node; + struct device_node *spba_bus; const char *fw_name; int ret; int irq; struct resource *iores; + struct resource spba_res; struct sdma_platform_data *pdata = dev_get_platdata(&pdev->dev); int i; struct sdma_engine *sdma; @@ -1551,6 +1755,10 @@ static int sdma_probe(struct platform_device *pdev) if (ret) goto err_init; + ret = sdma_event_remap(sdma); + if (ret) + goto err_init; + if (sdma->drvdata->script_addrs) sdma_add_scripts(sdma, sdma->drvdata->script_addrs); if (pdata && pdata->script_addrs) @@ -1608,9 +1816,15 @@ static int sdma_probe(struct platform_device *pdev) dev_err(&pdev->dev, "failed to register controller\n"); goto err_register; } - } - dev_info(sdma->dev, "initialized\n"); + spba_bus = of_find_compatible_node(NULL, NULL, "fsl,spba-bus"); + ret = of_address_to_resource(spba_bus, 0, &spba_res); + if (!ret) { + sdma->spba_start_addr = spba_res.start; + sdma->spba_end_addr = spba_res.end; + } + of_node_put(spba_bus); + } return 0; @@ -1636,7 +1850,6 @@ static int sdma_remove(struct platform_device *pdev) } platform_set_drvdata(pdev, NULL); - dev_info(&pdev->dev, "Removed...\n"); return 0; } diff --git a/kernel/drivers/dma/ioat/Makefile b/kernel/drivers/dma/ioat/Makefile index 0ff7270af..cf5fedbe2 100644 --- a/kernel/drivers/dma/ioat/Makefile +++ b/kernel/drivers/dma/ioat/Makefile @@ -1,2 +1,2 @@ obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o -ioatdma-y := pci.o dma.o dma_v2.o dma_v3.o dca.o +ioatdma-y := init.o dma.o prep.o dca.o sysfs.o diff --git a/kernel/drivers/dma/ioat/dca.c b/kernel/drivers/dma/ioat/dca.c index ea1e107ae..2cb7c308d 100644 --- a/kernel/drivers/dma/ioat/dca.c +++ b/kernel/drivers/dma/ioat/dca.c @@ -31,7 +31,6 @@ #include "dma.h" #include "registers.h" -#include "dma_v2.h" /* * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6 @@ -71,14 +70,6 @@ static inline int dca2_tag_map_valid(u8 *tag_map) #define APICID_BIT(x) (DCA_TAG_MAP_VALID | (x)) #define IOAT_TAG_MAP_LEN 8 -static u8 ioat_tag_map_BNB[IOAT_TAG_MAP_LEN] = { - 1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), }; -static u8 ioat_tag_map_SCNB[IOAT_TAG_MAP_LEN] = { - 1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), }; -static u8 ioat_tag_map_CNB[IOAT_TAG_MAP_LEN] = { - 1, APICID_BIT(1), APICID_BIT(3), APICID_BIT(4), APICID_BIT(2), }; -static u8 ioat_tag_map_UNISYS[IOAT_TAG_MAP_LEN] = { 0 }; - /* pack PCI B/D/F into a u16 */ static inline u16 dcaid_from_pcidev(struct pci_dev *pci) { @@ -126,96 +117,6 @@ struct ioat_dca_priv { struct ioat_dca_slot req_slots[0]; }; -/* 5000 series chipset DCA Port Requester ID Table Entry Format - * [15:8] PCI-Express Bus Number - * [7:3] PCI-Express Device Number - * [2:0] PCI-Express Function Number - * - * 5000 series chipset DCA control register format - * [7:1] Reserved (0) - * [0] Ignore Function Number - */ - -static int ioat_dca_add_requester(struct dca_provider *dca, struct device *dev) -{ - struct ioat_dca_priv *ioatdca = dca_priv(dca); - struct pci_dev *pdev; - int i; - u16 id; - - /* This implementation only supports PCI-Express */ - if (!dev_is_pci(dev)) - return -ENODEV; - pdev = to_pci_dev(dev); - id = dcaid_from_pcidev(pdev); - - if (ioatdca->requester_count == ioatdca->max_requesters) - return -ENODEV; - - for (i = 0; i < ioatdca->max_requesters; i++) { - if (ioatdca->req_slots[i].pdev == NULL) { - /* found an empty slot */ - ioatdca->requester_count++; - ioatdca->req_slots[i].pdev = pdev; - ioatdca->req_slots[i].rid = id; - writew(id, ioatdca->dca_base + (i * 4)); - /* make sure the ignore function bit is off */ - writeb(0, ioatdca->dca_base + (i * 4) + 2); - return i; - } - } - /* Error, ioatdma->requester_count is out of whack */ - return -EFAULT; -} - -static int ioat_dca_remove_requester(struct dca_provider *dca, - struct device *dev) -{ - struct ioat_dca_priv *ioatdca = dca_priv(dca); - struct pci_dev *pdev; - int i; - - /* This implementation only supports PCI-Express */ - if (!dev_is_pci(dev)) - return -ENODEV; - pdev = to_pci_dev(dev); - - for (i = 0; i < ioatdca->max_requesters; i++) { - if (ioatdca->req_slots[i].pdev == pdev) { - writew(0, ioatdca->dca_base + (i * 4)); - ioatdca->req_slots[i].pdev = NULL; - ioatdca->req_slots[i].rid = 0; - ioatdca->requester_count--; - return i; - } - } - return -ENODEV; -} - -static u8 ioat_dca_get_tag(struct dca_provider *dca, - struct device *dev, - int cpu) -{ - struct ioat_dca_priv *ioatdca = dca_priv(dca); - int i, apic_id, bit, value; - u8 entry, tag; - - tag = 0; - apic_id = cpu_physical_id(cpu); - - for (i = 0; i < IOAT_TAG_MAP_LEN; i++) { - entry = ioatdca->tag_map[i]; - if (entry & DCA_TAG_MAP_VALID) { - bit = entry & ~DCA_TAG_MAP_VALID; - value = (apic_id & (1 << bit)) ? 1 : 0; - } else { - value = entry ? 1 : 0; - } - tag |= (value << i); - } - return tag; -} - static int ioat_dca_dev_managed(struct dca_provider *dca, struct device *dev) { @@ -231,260 +132,7 @@ static int ioat_dca_dev_managed(struct dca_provider *dca, return 0; } -static struct dca_ops ioat_dca_ops = { - .add_requester = ioat_dca_add_requester, - .remove_requester = ioat_dca_remove_requester, - .get_tag = ioat_dca_get_tag, - .dev_managed = ioat_dca_dev_managed, -}; - - -struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) -{ - struct dca_provider *dca; - struct ioat_dca_priv *ioatdca; - u8 *tag_map = NULL; - int i; - int err; - u8 version; - u8 max_requesters; - - if (!system_has_dca_enabled(pdev)) - return NULL; - - /* I/OAT v1 systems must have a known tag_map to support DCA */ - switch (pdev->vendor) { - case PCI_VENDOR_ID_INTEL: - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IOAT: - tag_map = ioat_tag_map_BNB; - break; - case PCI_DEVICE_ID_INTEL_IOAT_CNB: - tag_map = ioat_tag_map_CNB; - break; - case PCI_DEVICE_ID_INTEL_IOAT_SCNB: - tag_map = ioat_tag_map_SCNB; - break; - } - break; - case PCI_VENDOR_ID_UNISYS: - switch (pdev->device) { - case PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR: - tag_map = ioat_tag_map_UNISYS; - break; - } - break; - } - if (tag_map == NULL) - return NULL; - - version = readb(iobase + IOAT_VER_OFFSET); - if (version == IOAT_VER_3_0) - max_requesters = IOAT3_DCA_MAX_REQ; - else - max_requesters = IOAT_DCA_MAX_REQ; - - dca = alloc_dca_provider(&ioat_dca_ops, - sizeof(*ioatdca) + - (sizeof(struct ioat_dca_slot) * max_requesters)); - if (!dca) - return NULL; - - ioatdca = dca_priv(dca); - ioatdca->max_requesters = max_requesters; - ioatdca->dca_base = iobase + 0x54; - - /* copy over the APIC ID to DCA tag mapping */ - for (i = 0; i < IOAT_TAG_MAP_LEN; i++) - ioatdca->tag_map[i] = tag_map[i]; - - err = register_dca_provider(dca, &pdev->dev); - if (err) { - free_dca_provider(dca); - return NULL; - } - - return dca; -} - - -static int ioat2_dca_add_requester(struct dca_provider *dca, struct device *dev) -{ - struct ioat_dca_priv *ioatdca = dca_priv(dca); - struct pci_dev *pdev; - int i; - u16 id; - u16 global_req_table; - - /* This implementation only supports PCI-Express */ - if (!dev_is_pci(dev)) - return -ENODEV; - pdev = to_pci_dev(dev); - id = dcaid_from_pcidev(pdev); - - if (ioatdca->requester_count == ioatdca->max_requesters) - return -ENODEV; - - for (i = 0; i < ioatdca->max_requesters; i++) { - if (ioatdca->req_slots[i].pdev == NULL) { - /* found an empty slot */ - ioatdca->requester_count++; - ioatdca->req_slots[i].pdev = pdev; - ioatdca->req_slots[i].rid = id; - global_req_table = - readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET); - writel(id | IOAT_DCA_GREQID_VALID, - ioatdca->iobase + global_req_table + (i * 4)); - return i; - } - } - /* Error, ioatdma->requester_count is out of whack */ - return -EFAULT; -} - -static int ioat2_dca_remove_requester(struct dca_provider *dca, - struct device *dev) -{ - struct ioat_dca_priv *ioatdca = dca_priv(dca); - struct pci_dev *pdev; - int i; - u16 global_req_table; - - /* This implementation only supports PCI-Express */ - if (!dev_is_pci(dev)) - return -ENODEV; - pdev = to_pci_dev(dev); - - for (i = 0; i < ioatdca->max_requesters; i++) { - if (ioatdca->req_slots[i].pdev == pdev) { - global_req_table = - readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET); - writel(0, ioatdca->iobase + global_req_table + (i * 4)); - ioatdca->req_slots[i].pdev = NULL; - ioatdca->req_slots[i].rid = 0; - ioatdca->requester_count--; - return i; - } - } - return -ENODEV; -} - -static u8 ioat2_dca_get_tag(struct dca_provider *dca, - struct device *dev, - int cpu) -{ - u8 tag; - - tag = ioat_dca_get_tag(dca, dev, cpu); - tag = (~tag) & 0x1F; - return tag; -} - -static struct dca_ops ioat2_dca_ops = { - .add_requester = ioat2_dca_add_requester, - .remove_requester = ioat2_dca_remove_requester, - .get_tag = ioat2_dca_get_tag, - .dev_managed = ioat_dca_dev_managed, -}; - -static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset) -{ - int slots = 0; - u32 req; - u16 global_req_table; - - global_req_table = readw(iobase + dca_offset + IOAT_DCA_GREQID_OFFSET); - if (global_req_table == 0) - return 0; - do { - req = readl(iobase + global_req_table + (slots * sizeof(u32))); - slots++; - } while ((req & IOAT_DCA_GREQID_LASTID) == 0); - - return slots; -} - -struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase) -{ - struct dca_provider *dca; - struct ioat_dca_priv *ioatdca; - int slots; - int i; - int err; - u32 tag_map; - u16 dca_offset; - u16 csi_fsb_control; - u16 pcie_control; - u8 bit; - - if (!system_has_dca_enabled(pdev)) - return NULL; - - dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET); - if (dca_offset == 0) - return NULL; - - slots = ioat2_dca_count_dca_slots(iobase, dca_offset); - if (slots == 0) - return NULL; - - dca = alloc_dca_provider(&ioat2_dca_ops, - sizeof(*ioatdca) - + (sizeof(struct ioat_dca_slot) * slots)); - if (!dca) - return NULL; - - ioatdca = dca_priv(dca); - ioatdca->iobase = iobase; - ioatdca->dca_base = iobase + dca_offset; - ioatdca->max_requesters = slots; - - /* some bios might not know to turn these on */ - csi_fsb_control = readw(ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET); - if ((csi_fsb_control & IOAT_FSB_CAP_ENABLE_PREFETCH) == 0) { - csi_fsb_control |= IOAT_FSB_CAP_ENABLE_PREFETCH; - writew(csi_fsb_control, - ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET); - } - pcie_control = readw(ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET); - if ((pcie_control & IOAT_PCI_CAP_ENABLE_MEMWR) == 0) { - pcie_control |= IOAT_PCI_CAP_ENABLE_MEMWR; - writew(pcie_control, - ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET); - } - - - /* TODO version, compatibility and configuration checks */ - - /* copy out the APIC to DCA tag map */ - tag_map = readl(ioatdca->dca_base + IOAT_APICID_TAG_MAP_OFFSET); - for (i = 0; i < 5; i++) { - bit = (tag_map >> (4 * i)) & 0x0f; - if (bit < 8) - ioatdca->tag_map[i] = bit | DCA_TAG_MAP_VALID; - else - ioatdca->tag_map[i] = 0; - } - - if (!dca2_tag_map_valid(ioatdca->tag_map)) { - WARN_TAINT_ONCE(1, TAINT_FIRMWARE_WORKAROUND, - "%s %s: APICID_TAG_MAP set incorrectly by BIOS, disabling DCA\n", - dev_driver_string(&pdev->dev), - dev_name(&pdev->dev)); - free_dca_provider(dca); - return NULL; - } - - err = register_dca_provider(dca, &pdev->dev); - if (err) { - free_dca_provider(dca); - return NULL; - } - - return dca; -} - -static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev) +static int ioat_dca_add_requester(struct dca_provider *dca, struct device *dev) { struct ioat_dca_priv *ioatdca = dca_priv(dca); struct pci_dev *pdev; @@ -518,7 +166,7 @@ static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev) return -EFAULT; } -static int ioat3_dca_remove_requester(struct dca_provider *dca, +static int ioat_dca_remove_requester(struct dca_provider *dca, struct device *dev) { struct ioat_dca_priv *ioatdca = dca_priv(dca); @@ -545,7 +193,7 @@ static int ioat3_dca_remove_requester(struct dca_provider *dca, return -ENODEV; } -static u8 ioat3_dca_get_tag(struct dca_provider *dca, +static u8 ioat_dca_get_tag(struct dca_provider *dca, struct device *dev, int cpu) { @@ -576,14 +224,14 @@ static u8 ioat3_dca_get_tag(struct dca_provider *dca, return tag; } -static struct dca_ops ioat3_dca_ops = { - .add_requester = ioat3_dca_add_requester, - .remove_requester = ioat3_dca_remove_requester, - .get_tag = ioat3_dca_get_tag, +static struct dca_ops ioat_dca_ops = { + .add_requester = ioat_dca_add_requester, + .remove_requester = ioat_dca_remove_requester, + .get_tag = ioat_dca_get_tag, .dev_managed = ioat_dca_dev_managed, }; -static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset) +static int ioat_dca_count_dca_slots(void *iobase, u16 dca_offset) { int slots = 0; u32 req; @@ -618,7 +266,7 @@ static inline int dca3_tag_map_invalid(u8 *tag_map) (tag_map[4] == DCA_TAG_MAP_VALID)); } -struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase) +struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) { struct dca_provider *dca; struct ioat_dca_priv *ioatdca; @@ -645,11 +293,11 @@ struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase) if (dca_offset == 0) return NULL; - slots = ioat3_dca_count_dca_slots(iobase, dca_offset); + slots = ioat_dca_count_dca_slots(iobase, dca_offset); if (slots == 0) return NULL; - dca = alloc_dca_provider(&ioat3_dca_ops, + dca = alloc_dca_provider(&ioat_dca_ops, sizeof(*ioatdca) + (sizeof(struct ioat_dca_slot) * slots)); if (!dca) diff --git a/kernel/drivers/dma/ioat/dma.c b/kernel/drivers/dma/ioat/dma.c index ee0aa9f4c..1d5df2ef1 100644 --- a/kernel/drivers/dma/ioat/dma.c +++ b/kernel/drivers/dma/ioat/dma.c @@ -1,6 +1,6 @@ /* * Intel I/OAT DMA Linux driver - * Copyright(c) 2004 - 2009 Intel Corporation. + * Copyright(c) 2004 - 2015 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -31,31 +31,23 @@ #include <linux/dma-mapping.h> #include <linux/workqueue.h> #include <linux/prefetch.h> -#include <linux/i7300_idle.h> #include "dma.h" #include "registers.h" #include "hw.h" #include "../dmaengine.h" -int ioat_pending_level = 4; -module_param(ioat_pending_level, int, 0644); -MODULE_PARM_DESC(ioat_pending_level, - "high-water mark for pushing ioat descriptors (default: 4)"); - -/* internal functions */ -static void ioat1_cleanup(struct ioat_dma_chan *ioat); -static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat); +static void ioat_eh(struct ioatdma_chan *ioat_chan); /** * ioat_dma_do_interrupt - handler used for single vector interrupt mode * @irq: interrupt id * @data: interrupt data */ -static irqreturn_t ioat_dma_do_interrupt(int irq, void *data) +irqreturn_t ioat_dma_do_interrupt(int irq, void *data) { struct ioatdma_device *instance = data; - struct ioat_chan_common *chan; + struct ioatdma_chan *ioat_chan; unsigned long attnstatus; int bit; u8 intrctrl; @@ -72,9 +64,9 @@ static irqreturn_t ioat_dma_do_interrupt(int irq, void *data) attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET); for_each_set_bit(bit, &attnstatus, BITS_PER_LONG) { - chan = ioat_chan_by_index(instance, bit); - if (test_bit(IOAT_RUN, &chan->state)) - tasklet_schedule(&chan->cleanup_task); + ioat_chan = ioat_chan_by_index(instance, bit); + if (test_bit(IOAT_RUN, &ioat_chan->state)) + tasklet_schedule(&ioat_chan->cleanup_task); } writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); @@ -86,1161 +78,913 @@ static irqreturn_t ioat_dma_do_interrupt(int irq, void *data) * @irq: interrupt id * @data: interrupt data */ -static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data) +irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data) { - struct ioat_chan_common *chan = data; + struct ioatdma_chan *ioat_chan = data; - if (test_bit(IOAT_RUN, &chan->state)) - tasklet_schedule(&chan->cleanup_task); + if (test_bit(IOAT_RUN, &ioat_chan->state)) + tasklet_schedule(&ioat_chan->cleanup_task); return IRQ_HANDLED; } -/* common channel initialization */ -void ioat_init_channel(struct ioatdma_device *device, struct ioat_chan_common *chan, int idx) +void ioat_stop(struct ioatdma_chan *ioat_chan) { - struct dma_device *dma = &device->common; - struct dma_chan *c = &chan->common; - unsigned long data = (unsigned long) c; - - chan->device = device; - chan->reg_base = device->reg_base + (0x80 * (idx + 1)); - spin_lock_init(&chan->cleanup_lock); - chan->common.device = dma; - dma_cookie_init(&chan->common); - list_add_tail(&chan->common.device_node, &dma->channels); - device->idx[idx] = chan; - init_timer(&chan->timer); - chan->timer.function = device->timer_fn; - chan->timer.data = data; - tasklet_init(&chan->cleanup_task, device->cleanup_fn, data); + struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; + struct pci_dev *pdev = ioat_dma->pdev; + int chan_id = chan_num(ioat_chan); + struct msix_entry *msix; + + /* 1/ stop irq from firing tasklets + * 2/ stop the tasklet from re-arming irqs + */ + clear_bit(IOAT_RUN, &ioat_chan->state); + + /* flush inflight interrupts */ + switch (ioat_dma->irq_mode) { + case IOAT_MSIX: + msix = &ioat_dma->msix_entries[chan_id]; + synchronize_irq(msix->vector); + break; + case IOAT_MSI: + case IOAT_INTX: + synchronize_irq(pdev->irq); + break; + default: + break; + } + + /* flush inflight timers */ + del_timer_sync(&ioat_chan->timer); + + /* flush inflight tasklet runs */ + tasklet_kill(&ioat_chan->cleanup_task); + + /* final cleanup now that everything is quiesced and can't re-arm */ + ioat_cleanup_event((unsigned long)&ioat_chan->dma_chan); } -/** - * ioat1_dma_enumerate_channels - find and initialize the device's channels - * @device: the device to be enumerated - */ -static int ioat1_enumerate_channels(struct ioatdma_device *device) +static void __ioat_issue_pending(struct ioatdma_chan *ioat_chan) { - u8 xfercap_scale; - u32 xfercap; - int i; - struct ioat_dma_chan *ioat; - struct device *dev = &device->pdev->dev; - struct dma_device *dma = &device->common; - - INIT_LIST_HEAD(&dma->channels); - dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); - dma->chancnt &= 0x1f; /* bits [4:0] valid */ - if (dma->chancnt > ARRAY_SIZE(device->idx)) { - dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n", - dma->chancnt, ARRAY_SIZE(device->idx)); - dma->chancnt = ARRAY_SIZE(device->idx); - } - xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); - xfercap_scale &= 0x1f; /* bits [4:0] valid */ - xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); - dev_dbg(dev, "%s: xfercap = %d\n", __func__, xfercap); - -#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL - if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) - dma->chancnt--; -#endif - for (i = 0; i < dma->chancnt; i++) { - ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL); - if (!ioat) - break; + ioat_chan->dmacount += ioat_ring_pending(ioat_chan); + ioat_chan->issued = ioat_chan->head; + writew(ioat_chan->dmacount, + ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); + dev_dbg(to_dev(ioat_chan), + "%s: head: %#x tail: %#x issued: %#x count: %#x\n", + __func__, ioat_chan->head, ioat_chan->tail, + ioat_chan->issued, ioat_chan->dmacount); +} + +void ioat_issue_pending(struct dma_chan *c) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); - ioat_init_channel(device, &ioat->base, i); - ioat->xfercap = xfercap; - spin_lock_init(&ioat->desc_lock); - INIT_LIST_HEAD(&ioat->free_desc); - INIT_LIST_HEAD(&ioat->used_desc); + if (ioat_ring_pending(ioat_chan)) { + spin_lock_bh(&ioat_chan->prep_lock); + __ioat_issue_pending(ioat_chan); + spin_unlock_bh(&ioat_chan->prep_lock); } - dma->chancnt = i; - return i; } /** - * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended - * descriptors to hw - * @chan: DMA channel handle + * ioat_update_pending - log pending descriptors + * @ioat: ioat+ channel + * + * Check if the number of unsubmitted descriptors has exceeded the + * watermark. Called with prep_lock held */ -static inline void -__ioat1_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat) +static void ioat_update_pending(struct ioatdma_chan *ioat_chan) { - void __iomem *reg_base = ioat->base.reg_base; - - dev_dbg(to_dev(&ioat->base), "%s: pending: %d\n", - __func__, ioat->pending); - ioat->pending = 0; - writeb(IOAT_CHANCMD_APPEND, reg_base + IOAT1_CHANCMD_OFFSET); + if (ioat_ring_pending(ioat_chan) > ioat_pending_level) + __ioat_issue_pending(ioat_chan); } -static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan) +static void __ioat_start_null_desc(struct ioatdma_chan *ioat_chan) { - struct ioat_dma_chan *ioat = to_ioat_chan(chan); + struct ioat_ring_ent *desc; + struct ioat_dma_descriptor *hw; - if (ioat->pending > 0) { - spin_lock_bh(&ioat->desc_lock); - __ioat1_dma_memcpy_issue_pending(ioat); - spin_unlock_bh(&ioat->desc_lock); + if (ioat_ring_space(ioat_chan) < 1) { + dev_err(to_dev(ioat_chan), + "Unable to start null desc - ring full\n"); + return; } + + dev_dbg(to_dev(ioat_chan), + "%s: head: %#x tail: %#x issued: %#x\n", + __func__, ioat_chan->head, ioat_chan->tail, ioat_chan->issued); + desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head); + + hw = desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = 1; + hw->ctl_f.compl_write = 1; + /* set size to non-zero value (channel returns error when size is 0) */ + hw->size = NULL_DESC_BUFFER_SIZE; + hw->src_addr = 0; + hw->dst_addr = 0; + async_tx_ack(&desc->txd); + ioat_set_chainaddr(ioat_chan, desc->txd.phys); + dump_desc_dbg(ioat_chan, desc); + /* make sure descriptors are written before we submit */ + wmb(); + ioat_chan->head += 1; + __ioat_issue_pending(ioat_chan); } -/** - * ioat1_reset_channel - restart a channel - * @ioat: IOAT DMA channel handle - */ -static void ioat1_reset_channel(struct ioat_dma_chan *ioat) +void ioat_start_null_desc(struct ioatdma_chan *ioat_chan) { - struct ioat_chan_common *chan = &ioat->base; - void __iomem *reg_base = chan->reg_base; - u32 chansts, chanerr; - - dev_warn(to_dev(chan), "reset\n"); - chanerr = readl(reg_base + IOAT_CHANERR_OFFSET); - chansts = *chan->completion & IOAT_CHANSTS_STATUS; - if (chanerr) { - dev_err(to_dev(chan), - "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", - chan_num(chan), chansts, chanerr); - writel(chanerr, reg_base + IOAT_CHANERR_OFFSET); + spin_lock_bh(&ioat_chan->prep_lock); + if (!test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) + __ioat_start_null_desc(ioat_chan); + spin_unlock_bh(&ioat_chan->prep_lock); +} + +static void __ioat_restart_chan(struct ioatdma_chan *ioat_chan) +{ + /* set the tail to be re-issued */ + ioat_chan->issued = ioat_chan->tail; + ioat_chan->dmacount = 0; + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); + + dev_dbg(to_dev(ioat_chan), + "%s: head: %#x tail: %#x issued: %#x count: %#x\n", + __func__, ioat_chan->head, ioat_chan->tail, + ioat_chan->issued, ioat_chan->dmacount); + + if (ioat_ring_pending(ioat_chan)) { + struct ioat_ring_ent *desc; + + desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail); + ioat_set_chainaddr(ioat_chan, desc->txd.phys); + __ioat_issue_pending(ioat_chan); + } else + __ioat_start_null_desc(ioat_chan); +} + +static int ioat_quiesce(struct ioatdma_chan *ioat_chan, unsigned long tmo) +{ + unsigned long end = jiffies + tmo; + int err = 0; + u32 status; + + status = ioat_chansts(ioat_chan); + if (is_ioat_active(status) || is_ioat_idle(status)) + ioat_suspend(ioat_chan); + while (is_ioat_active(status) || is_ioat_idle(status)) { + if (tmo && time_after(jiffies, end)) { + err = -ETIMEDOUT; + break; + } + status = ioat_chansts(ioat_chan); + cpu_relax(); } - /* - * whack it upside the head with a reset - * and wait for things to settle out. - * force the pending count to a really big negative - * to make sure no one forces an issue_pending - * while we're waiting. - */ + return err; +} - ioat->pending = INT_MIN; - writeb(IOAT_CHANCMD_RESET, - reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); - set_bit(IOAT_RESET_PENDING, &chan->state); - mod_timer(&chan->timer, jiffies + RESET_DELAY); +static int ioat_reset_sync(struct ioatdma_chan *ioat_chan, unsigned long tmo) +{ + unsigned long end = jiffies + tmo; + int err = 0; + + ioat_reset(ioat_chan); + while (ioat_reset_pending(ioat_chan)) { + if (end && time_after(jiffies, end)) { + err = -ETIMEDOUT; + break; + } + cpu_relax(); + } + + return err; } -static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) +static dma_cookie_t ioat_tx_submit_unlock(struct dma_async_tx_descriptor *tx) + __releases(&ioat_chan->prep_lock) { struct dma_chan *c = tx->chan; - struct ioat_dma_chan *ioat = to_ioat_chan(c); - struct ioat_desc_sw *desc = tx_to_ioat_desc(tx); - struct ioat_chan_common *chan = &ioat->base; - struct ioat_desc_sw *first; - struct ioat_desc_sw *chain_tail; + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); dma_cookie_t cookie; - spin_lock_bh(&ioat->desc_lock); - /* cookie incr and addition to used_list must be atomic */ cookie = dma_cookie_assign(tx); - dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie); + dev_dbg(to_dev(ioat_chan), "%s: cookie: %d\n", __func__, cookie); + + if (!test_and_set_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state)) + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); - /* write address into NextDescriptor field of last desc in chain */ - first = to_ioat_desc(desc->tx_list.next); - chain_tail = to_ioat_desc(ioat->used_desc.prev); - /* make descriptor updates globally visible before chaining */ + /* make descriptor updates visible before advancing ioat->head, + * this is purposefully not smp_wmb() since we are also + * publishing the descriptor updates to a dma device + */ wmb(); - chain_tail->hw->next = first->txd.phys; - list_splice_tail_init(&desc->tx_list, &ioat->used_desc); - dump_desc_dbg(ioat, chain_tail); - dump_desc_dbg(ioat, first); - if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state)) - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + ioat_chan->head += ioat_chan->produce; - ioat->active += desc->hw->tx_cnt; - ioat->pending += desc->hw->tx_cnt; - if (ioat->pending >= ioat_pending_level) - __ioat1_dma_memcpy_issue_pending(ioat); - spin_unlock_bh(&ioat->desc_lock); + ioat_update_pending(ioat_chan); + spin_unlock_bh(&ioat_chan->prep_lock); return cookie; } -/** - * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair - * @ioat: the channel supplying the memory pool for the descriptors - * @flags: allocation flags - */ -static struct ioat_desc_sw * -ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat, gfp_t flags) +static struct ioat_ring_ent * +ioat_alloc_ring_ent(struct dma_chan *chan, gfp_t flags) { - struct ioat_dma_descriptor *desc; - struct ioat_desc_sw *desc_sw; - struct ioatdma_device *ioatdma_device; + struct ioat_dma_descriptor *hw; + struct ioat_ring_ent *desc; + struct ioatdma_device *ioat_dma; dma_addr_t phys; - ioatdma_device = ioat->base.device; - desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys); - if (unlikely(!desc)) + ioat_dma = to_ioatdma_device(chan->device); + hw = pci_pool_alloc(ioat_dma->dma_pool, flags, &phys); + if (!hw) return NULL; + memset(hw, 0, sizeof(*hw)); - desc_sw = kzalloc(sizeof(*desc_sw), flags); - if (unlikely(!desc_sw)) { - pci_pool_free(ioatdma_device->dma_pool, desc, phys); + desc = kmem_cache_zalloc(ioat_cache, flags); + if (!desc) { + pci_pool_free(ioat_dma->dma_pool, hw, phys); return NULL; } - memset(desc, 0, sizeof(*desc)); + dma_async_tx_descriptor_init(&desc->txd, chan); + desc->txd.tx_submit = ioat_tx_submit_unlock; + desc->hw = hw; + desc->txd.phys = phys; + return desc; +} - INIT_LIST_HEAD(&desc_sw->tx_list); - dma_async_tx_descriptor_init(&desc_sw->txd, &ioat->base.common); - desc_sw->txd.tx_submit = ioat1_tx_submit; - desc_sw->hw = desc; - desc_sw->txd.phys = phys; - set_desc_id(desc_sw, -1); +void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan) +{ + struct ioatdma_device *ioat_dma; - return desc_sw; + ioat_dma = to_ioatdma_device(chan->device); + pci_pool_free(ioat_dma->dma_pool, desc->hw, desc->txd.phys); + kmem_cache_free(ioat_cache, desc); } -static int ioat_initial_desc_count = 256; -module_param(ioat_initial_desc_count, int, 0644); -MODULE_PARM_DESC(ioat_initial_desc_count, - "ioat1: initial descriptors per channel (default: 256)"); -/** - * ioat1_dma_alloc_chan_resources - returns the number of allocated descriptors - * @chan: the channel to be filled out - */ -static int ioat1_dma_alloc_chan_resources(struct dma_chan *c) +struct ioat_ring_ent ** +ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags) { - struct ioat_dma_chan *ioat = to_ioat_chan(c); - struct ioat_chan_common *chan = &ioat->base; - struct ioat_desc_sw *desc; - u32 chanerr; + struct ioat_ring_ent **ring; + int descs = 1 << order; int i; - LIST_HEAD(tmp_list); - - /* have we already been set up? */ - if (!list_empty(&ioat->free_desc)) - return ioat->desccount; - /* Setup register to interrupt and write completion status on error */ - writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET); + if (order > ioat_get_max_alloc_order()) + return NULL; - chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - if (chanerr) { - dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr); - writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); + /* allocate the array to hold the software ring */ + ring = kcalloc(descs, sizeof(*ring), flags); + if (!ring) + return NULL; + for (i = 0; i < descs; i++) { + ring[i] = ioat_alloc_ring_ent(c, flags); + if (!ring[i]) { + while (i--) + ioat_free_ring_ent(ring[i], c); + kfree(ring); + return NULL; + } + set_desc_id(ring[i], i); } - /* Allocate descriptors */ - for (i = 0; i < ioat_initial_desc_count; i++) { - desc = ioat_dma_alloc_descriptor(ioat, GFP_KERNEL); - if (!desc) { - dev_err(to_dev(chan), "Only %d initial descriptors\n", i); - break; - } - set_desc_id(desc, i); - list_add_tail(&desc->node, &tmp_list); + /* link descs */ + for (i = 0; i < descs-1; i++) { + struct ioat_ring_ent *next = ring[i+1]; + struct ioat_dma_descriptor *hw = ring[i]->hw; + + hw->next = next->txd.phys; } - spin_lock_bh(&ioat->desc_lock); - ioat->desccount = i; - list_splice(&tmp_list, &ioat->free_desc); - spin_unlock_bh(&ioat->desc_lock); - - /* allocate a completion writeback area */ - /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ - chan->completion = pci_pool_alloc(chan->device->completion_pool, - GFP_KERNEL, &chan->completion_dma); - memset(chan->completion, 0, sizeof(*chan->completion)); - writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF, - chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); - writel(((u64) chan->completion_dma) >> 32, - chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); - - set_bit(IOAT_RUN, &chan->state); - ioat1_dma_start_null_desc(ioat); /* give chain to dma device */ - dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n", - __func__, ioat->desccount); - return ioat->desccount; + ring[i]->hw->next = ring[0]->txd.phys; + + return ring; } -void ioat_stop(struct ioat_chan_common *chan) +static bool reshape_ring(struct ioatdma_chan *ioat_chan, int order) { - struct ioatdma_device *device = chan->device; - struct pci_dev *pdev = device->pdev; - int chan_id = chan_num(chan); - struct msix_entry *msix; + /* reshape differs from normal ring allocation in that we want + * to allocate a new software ring while only + * extending/truncating the hardware ring + */ + struct dma_chan *c = &ioat_chan->dma_chan; + const u32 curr_size = ioat_ring_size(ioat_chan); + const u16 active = ioat_ring_active(ioat_chan); + const u32 new_size = 1 << order; + struct ioat_ring_ent **ring; + u32 i; + + if (order > ioat_get_max_alloc_order()) + return false; - /* 1/ stop irq from firing tasklets - * 2/ stop the tasklet from re-arming irqs + /* double check that we have at least 1 free descriptor */ + if (active == curr_size) + return false; + + /* when shrinking, verify that we can hold the current active + * set in the new ring */ - clear_bit(IOAT_RUN, &chan->state); + if (active >= new_size) + return false; - /* flush inflight interrupts */ - switch (device->irq_mode) { - case IOAT_MSIX: - msix = &device->msix_entries[chan_id]; - synchronize_irq(msix->vector); - break; - case IOAT_MSI: - case IOAT_INTX: - synchronize_irq(pdev->irq); - break; - default: - break; - } + /* allocate the array to hold the software ring */ + ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT); + if (!ring) + return false; - /* flush inflight timers */ - del_timer_sync(&chan->timer); + /* allocate/trim descriptors as needed */ + if (new_size > curr_size) { + /* copy current descriptors to the new ring */ + for (i = 0; i < curr_size; i++) { + u16 curr_idx = (ioat_chan->tail+i) & (curr_size-1); + u16 new_idx = (ioat_chan->tail+i) & (new_size-1); - /* flush inflight tasklet runs */ - tasklet_kill(&chan->cleanup_task); + ring[new_idx] = ioat_chan->ring[curr_idx]; + set_desc_id(ring[new_idx], new_idx); + } - /* final cleanup now that everything is quiesced and can't re-arm */ - device->cleanup_fn((unsigned long) &chan->common); -} + /* add new descriptors to the ring */ + for (i = curr_size; i < new_size; i++) { + u16 new_idx = (ioat_chan->tail+i) & (new_size-1); -/** - * ioat1_dma_free_chan_resources - release all the descriptors - * @chan: the channel to be cleaned - */ -static void ioat1_dma_free_chan_resources(struct dma_chan *c) -{ - struct ioat_dma_chan *ioat = to_ioat_chan(c); - struct ioat_chan_common *chan = &ioat->base; - struct ioatdma_device *ioatdma_device = chan->device; - struct ioat_desc_sw *desc, *_desc; - int in_use_descs = 0; - - /* Before freeing channel resources first check - * if they have been previously allocated for this channel. - */ - if (ioat->desccount == 0) - return; + ring[new_idx] = ioat_alloc_ring_ent(c, GFP_NOWAIT); + if (!ring[new_idx]) { + while (i--) { + u16 new_idx = (ioat_chan->tail+i) & + (new_size-1); + + ioat_free_ring_ent(ring[new_idx], c); + } + kfree(ring); + return false; + } + set_desc_id(ring[new_idx], new_idx); + } - ioat_stop(chan); + /* hw link new descriptors */ + for (i = curr_size-1; i < new_size; i++) { + u16 new_idx = (ioat_chan->tail+i) & (new_size-1); + struct ioat_ring_ent *next = + ring[(new_idx+1) & (new_size-1)]; + struct ioat_dma_descriptor *hw = ring[new_idx]->hw; - /* Delay 100ms after reset to allow internal DMA logic to quiesce - * before removing DMA descriptor resources. - */ - writeb(IOAT_CHANCMD_RESET, - chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); - mdelay(100); - - spin_lock_bh(&ioat->desc_lock); - list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) { - dev_dbg(to_dev(chan), "%s: freeing %d from used list\n", - __func__, desc_id(desc)); - dump_desc_dbg(ioat, desc); - in_use_descs++; - list_del(&desc->node); - pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->txd.phys); - kfree(desc); - } - list_for_each_entry_safe(desc, _desc, - &ioat->free_desc, node) { - list_del(&desc->node); - pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->txd.phys); - kfree(desc); + hw->next = next->txd.phys; + } + } else { + struct ioat_dma_descriptor *hw; + struct ioat_ring_ent *next; + + /* copy current descriptors to the new ring, dropping the + * removed descriptors + */ + for (i = 0; i < new_size; i++) { + u16 curr_idx = (ioat_chan->tail+i) & (curr_size-1); + u16 new_idx = (ioat_chan->tail+i) & (new_size-1); + + ring[new_idx] = ioat_chan->ring[curr_idx]; + set_desc_id(ring[new_idx], new_idx); + } + + /* free deleted descriptors */ + for (i = new_size; i < curr_size; i++) { + struct ioat_ring_ent *ent; + + ent = ioat_get_ring_ent(ioat_chan, ioat_chan->tail+i); + ioat_free_ring_ent(ent, c); + } + + /* fix up hardware ring */ + hw = ring[(ioat_chan->tail+new_size-1) & (new_size-1)]->hw; + next = ring[(ioat_chan->tail+new_size) & (new_size-1)]; + hw->next = next->txd.phys; } - spin_unlock_bh(&ioat->desc_lock); - pci_pool_free(ioatdma_device->completion_pool, - chan->completion, - chan->completion_dma); + dev_dbg(to_dev(ioat_chan), "%s: allocated %d descriptors\n", + __func__, new_size); - /* one is ok since we left it on there on purpose */ - if (in_use_descs > 1) - dev_err(to_dev(chan), "Freeing %d in use descriptors!\n", - in_use_descs - 1); + kfree(ioat_chan->ring); + ioat_chan->ring = ring; + ioat_chan->alloc_order = order; - chan->last_completion = 0; - chan->completion_dma = 0; - ioat->pending = 0; - ioat->desccount = 0; + return true; } /** - * ioat1_dma_get_next_descriptor - return the next available descriptor - * @ioat: IOAT DMA channel handle - * - * Gets the next descriptor from the chain, and must be called with the - * channel's desc_lock held. Allocates more descriptors if the channel - * has run out. + * ioat_check_space_lock - verify space and grab ring producer lock + * @ioat: ioat,3 channel (ring) to operate on + * @num_descs: allocation length */ -static struct ioat_desc_sw * -ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat) +int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs) + __acquires(&ioat_chan->prep_lock) { - struct ioat_desc_sw *new; + bool retry; - if (!list_empty(&ioat->free_desc)) { - new = to_ioat_desc(ioat->free_desc.next); - list_del(&new->node); - } else { - /* try to get another desc */ - new = ioat_dma_alloc_descriptor(ioat, GFP_ATOMIC); - if (!new) { - dev_err(to_dev(&ioat->base), "alloc failed\n"); - return NULL; - } + retry: + spin_lock_bh(&ioat_chan->prep_lock); + /* never allow the last descriptor to be consumed, we need at + * least one free at all times to allow for on-the-fly ring + * resizing. + */ + if (likely(ioat_ring_space(ioat_chan) > num_descs)) { + dev_dbg(to_dev(ioat_chan), "%s: num_descs: %d (%x:%x:%x)\n", + __func__, num_descs, ioat_chan->head, + ioat_chan->tail, ioat_chan->issued); + ioat_chan->produce = num_descs; + return 0; /* with ioat->prep_lock held */ + } + retry = test_and_set_bit(IOAT_RESHAPE_PENDING, &ioat_chan->state); + spin_unlock_bh(&ioat_chan->prep_lock); + + /* is another cpu already trying to expand the ring? */ + if (retry) + goto retry; + + spin_lock_bh(&ioat_chan->cleanup_lock); + spin_lock_bh(&ioat_chan->prep_lock); + retry = reshape_ring(ioat_chan, ioat_chan->alloc_order + 1); + clear_bit(IOAT_RESHAPE_PENDING, &ioat_chan->state); + spin_unlock_bh(&ioat_chan->prep_lock); + spin_unlock_bh(&ioat_chan->cleanup_lock); + + /* if we were able to expand the ring retry the allocation */ + if (retry) + goto retry; + + dev_dbg_ratelimited(to_dev(ioat_chan), + "%s: ring full! num_descs: %d (%x:%x:%x)\n", + __func__, num_descs, ioat_chan->head, + ioat_chan->tail, ioat_chan->issued); + + /* progress reclaim in the allocation failure case we may be + * called under bh_disabled so we need to trigger the timer + * event directly + */ + if (time_is_before_jiffies(ioat_chan->timer.expires) + && timer_pending(&ioat_chan->timer)) { + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); + ioat_timer_event((unsigned long)ioat_chan); } - dev_dbg(to_dev(&ioat->base), "%s: allocated: %d\n", - __func__, desc_id(new)); - prefetch(new->hw); - return new; + + return -ENOMEM; } -static struct dma_async_tx_descriptor * -ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, - dma_addr_t dma_src, size_t len, unsigned long flags) +static bool desc_has_ext(struct ioat_ring_ent *desc) { - struct ioat_dma_chan *ioat = to_ioat_chan(c); - struct ioat_desc_sw *desc; - size_t copy; - LIST_HEAD(chain); - dma_addr_t src = dma_src; - dma_addr_t dest = dma_dest; - size_t total_len = len; - struct ioat_dma_descriptor *hw = NULL; - int tx_cnt = 0; - - spin_lock_bh(&ioat->desc_lock); - desc = ioat1_dma_get_next_descriptor(ioat); - do { - if (!desc) - break; - - tx_cnt++; - copy = min_t(size_t, len, ioat->xfercap); + struct ioat_dma_descriptor *hw = desc->hw; - hw = desc->hw; - hw->size = copy; - hw->ctl = 0; - hw->src_addr = src; - hw->dst_addr = dest; + if (hw->ctl_f.op == IOAT_OP_XOR || + hw->ctl_f.op == IOAT_OP_XOR_VAL) { + struct ioat_xor_descriptor *xor = desc->xor; - list_add_tail(&desc->node, &chain); + if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5) + return true; + } else if (hw->ctl_f.op == IOAT_OP_PQ || + hw->ctl_f.op == IOAT_OP_PQ_VAL) { + struct ioat_pq_descriptor *pq = desc->pq; - len -= copy; - dest += copy; - src += copy; - if (len) { - struct ioat_desc_sw *next; - - async_tx_ack(&desc->txd); - next = ioat1_dma_get_next_descriptor(ioat); - hw->next = next ? next->txd.phys : 0; - dump_desc_dbg(ioat, desc); - desc = next; - } else - hw->next = 0; - } while (len); - - if (!desc) { - struct ioat_chan_common *chan = &ioat->base; - - dev_err(to_dev(chan), - "chan%d - get_next_desc failed\n", chan_num(chan)); - list_splice(&chain, &ioat->free_desc); - spin_unlock_bh(&ioat->desc_lock); - return NULL; + if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3) + return true; } - spin_unlock_bh(&ioat->desc_lock); - desc->txd.flags = flags; - desc->len = total_len; - list_splice(&chain, &desc->tx_list); - hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); - hw->ctl_f.compl_write = 1; - hw->tx_cnt = tx_cnt; - dump_desc_dbg(ioat, desc); - - return &desc->txd; + return false; } -static void ioat1_cleanup_event(unsigned long data) +static void +ioat_free_sed(struct ioatdma_device *ioat_dma, struct ioat_sed_ent *sed) { - struct ioat_dma_chan *ioat = to_ioat_chan((void *) data); - struct ioat_chan_common *chan = &ioat->base; - - ioat1_cleanup(ioat); - if (!test_bit(IOAT_RUN, &chan->state)) + if (!sed) return; - writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); + + dma_pool_free(ioat_dma->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma); + kmem_cache_free(ioat_sed_cache, sed); } -dma_addr_t ioat_get_current_completion(struct ioat_chan_common *chan) +static u64 ioat_get_current_completion(struct ioatdma_chan *ioat_chan) { - dma_addr_t phys_complete; + u64 phys_complete; u64 completion; - completion = *chan->completion; + completion = *ioat_chan->completion; phys_complete = ioat_chansts_to_addr(completion); - dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__, + dev_dbg(to_dev(ioat_chan), "%s: phys_complete: %#llx\n", __func__, (unsigned long long) phys_complete); - if (is_ioat_halted(completion)) { - u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - dev_err(to_dev(chan), "Channel halted, chanerr = %x\n", - chanerr); - - /* TODO do something to salvage the situation */ - } - return phys_complete; } -bool ioat_cleanup_preamble(struct ioat_chan_common *chan, - dma_addr_t *phys_complete) +static bool ioat_cleanup_preamble(struct ioatdma_chan *ioat_chan, + u64 *phys_complete) { - *phys_complete = ioat_get_current_completion(chan); - if (*phys_complete == chan->last_completion) + *phys_complete = ioat_get_current_completion(ioat_chan); + if (*phys_complete == ioat_chan->last_completion) return false; - clear_bit(IOAT_COMPLETION_ACK, &chan->state); - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + + clear_bit(IOAT_COMPLETION_ACK, &ioat_chan->state); + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); return true; } -static void __cleanup(struct ioat_dma_chan *ioat, dma_addr_t phys_complete) +static void +desc_get_errstat(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc) { - struct ioat_chan_common *chan = &ioat->base; - struct list_head *_desc, *n; - struct dma_async_tx_descriptor *tx; + struct ioat_dma_descriptor *hw = desc->hw; - dev_dbg(to_dev(chan), "%s: phys_complete: %llx\n", - __func__, (unsigned long long) phys_complete); - list_for_each_safe(_desc, n, &ioat->used_desc) { - struct ioat_desc_sw *desc; + switch (hw->ctl_f.op) { + case IOAT_OP_PQ_VAL: + case IOAT_OP_PQ_VAL_16S: + { + struct ioat_pq_descriptor *pq = desc->pq; - prefetch(n); - desc = list_entry(_desc, typeof(*desc), node); - tx = &desc->txd; - /* - * Incoming DMA requests may use multiple descriptors, - * due to exceeding xfercap, perhaps. If so, only the - * last one will have a cookie, and require unmapping. - */ - dump_desc_dbg(ioat, desc); - if (tx->cookie) { - dma_cookie_complete(tx); - dma_descriptor_unmap(tx); - ioat->active -= desc->hw->tx_cnt; - if (tx->callback) { - tx->callback(tx->callback_param); - tx->callback = NULL; - } - } + /* check if there's error written */ + if (!pq->dwbes_f.wbes) + return; - if (tx->phys != phys_complete) { - /* - * a completed entry, but not the last, so clean - * up if the client is done with the descriptor - */ - if (async_tx_test_ack(tx)) - list_move_tail(&desc->node, &ioat->free_desc); - } else { - /* - * last used desc. Do not remove, so we can - * append from it. - */ - - /* if nothing else is pending, cancel the - * completion timeout - */ - if (n == &ioat->used_desc) { - dev_dbg(to_dev(chan), - "%s cancel completion timeout\n", - __func__); - clear_bit(IOAT_COMPLETION_PENDING, &chan->state); - } + /* need to set a chanerr var for checking to clear later */ - /* TODO check status bits? */ - break; - } - } + if (pq->dwbes_f.p_val_err) + *desc->result |= SUM_CHECK_P_RESULT; + + if (pq->dwbes_f.q_val_err) + *desc->result |= SUM_CHECK_Q_RESULT; - chan->last_completion = phys_complete; + return; + } + default: + return; + } } /** - * ioat1_cleanup - cleanup up finished descriptors - * @chan: ioat channel to be cleaned up - * - * To prevent lock contention we defer cleanup when the locks are - * contended with a terminal timeout that forces cleanup and catches - * completion notification errors. + * __cleanup - reclaim used descriptors + * @ioat: channel (ring) to clean */ -static void ioat1_cleanup(struct ioat_dma_chan *ioat) +static void __cleanup(struct ioatdma_chan *ioat_chan, dma_addr_t phys_complete) { - struct ioat_chan_common *chan = &ioat->base; - dma_addr_t phys_complete; - - prefetch(chan->completion); - - if (!spin_trylock_bh(&chan->cleanup_lock)) - return; + struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; + struct ioat_ring_ent *desc; + bool seen_current = false; + int idx = ioat_chan->tail, i; + u16 active; - if (!ioat_cleanup_preamble(chan, &phys_complete)) { - spin_unlock_bh(&chan->cleanup_lock); - return; - } + dev_dbg(to_dev(ioat_chan), "%s: head: %#x tail: %#x issued: %#x\n", + __func__, ioat_chan->head, ioat_chan->tail, ioat_chan->issued); - if (!spin_trylock_bh(&ioat->desc_lock)) { - spin_unlock_bh(&chan->cleanup_lock); + /* + * At restart of the channel, the completion address and the + * channel status will be 0 due to starting a new chain. Since + * it's new chain and the first descriptor "fails", there is + * nothing to clean up. We do not want to reap the entire submitted + * chain due to this 0 address value and then BUG. + */ + if (!phys_complete) return; - } - __cleanup(ioat, phys_complete); + active = ioat_ring_active(ioat_chan); + for (i = 0; i < active && !seen_current; i++) { + struct dma_async_tx_descriptor *tx; - spin_unlock_bh(&ioat->desc_lock); - spin_unlock_bh(&chan->cleanup_lock); -} - -static void ioat1_timer_event(unsigned long data) -{ - struct ioat_dma_chan *ioat = to_ioat_chan((void *) data); - struct ioat_chan_common *chan = &ioat->base; + smp_read_barrier_depends(); + prefetch(ioat_get_ring_ent(ioat_chan, idx + i + 1)); + desc = ioat_get_ring_ent(ioat_chan, idx + i); + dump_desc_dbg(ioat_chan, desc); - dev_dbg(to_dev(chan), "%s: state: %lx\n", __func__, chan->state); + /* set err stat if we are using dwbes */ + if (ioat_dma->cap & IOAT_CAP_DWBES) + desc_get_errstat(ioat_chan, desc); - spin_lock_bh(&chan->cleanup_lock); - if (test_and_clear_bit(IOAT_RESET_PENDING, &chan->state)) { - struct ioat_desc_sw *desc; - - spin_lock_bh(&ioat->desc_lock); + tx = &desc->txd; + if (tx->cookie) { + dma_cookie_complete(tx); + dma_descriptor_unmap(tx); + if (tx->callback) { + tx->callback(tx->callback_param); + tx->callback = NULL; + } + } - /* restart active descriptors */ - desc = to_ioat_desc(ioat->used_desc.prev); - ioat_set_chainaddr(ioat, desc->txd.phys); - ioat_start(chan); + if (tx->phys == phys_complete) + seen_current = true; - ioat->pending = 0; - set_bit(IOAT_COMPLETION_PENDING, &chan->state); - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - spin_unlock_bh(&ioat->desc_lock); - } else if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { - dma_addr_t phys_complete; + /* skip extended descriptors */ + if (desc_has_ext(desc)) { + BUG_ON(i + 1 >= active); + i++; + } - spin_lock_bh(&ioat->desc_lock); - /* if we haven't made progress and we have already - * acknowledged a pending completion once, then be more - * forceful with a restart - */ - if (ioat_cleanup_preamble(chan, &phys_complete)) - __cleanup(ioat, phys_complete); - else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) - ioat1_reset_channel(ioat); - else { - u64 status = ioat_chansts(chan); - - /* manually update the last completion address */ - if (ioat_chansts_to_addr(status) != 0) - *chan->completion = status; - - set_bit(IOAT_COMPLETION_ACK, &chan->state); - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + /* cleanup super extended descriptors */ + if (desc->sed) { + ioat_free_sed(ioat_dma, desc->sed); + desc->sed = NULL; } - spin_unlock_bh(&ioat->desc_lock); } - spin_unlock_bh(&chan->cleanup_lock); -} - -enum dma_status -ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie, - struct dma_tx_state *txstate) -{ - struct ioat_chan_common *chan = to_chan_common(c); - struct ioatdma_device *device = chan->device; - enum dma_status ret; - ret = dma_cookie_status(c, cookie, txstate); - if (ret == DMA_COMPLETE) - return ret; + /* finish all descriptor reads before incrementing tail */ + smp_mb(); + ioat_chan->tail = idx + i; + /* no active descs have written a completion? */ + BUG_ON(active && !seen_current); + ioat_chan->last_completion = phys_complete; - device->cleanup_fn((unsigned long) c); + if (active - i == 0) { + dev_dbg(to_dev(ioat_chan), "%s: cancel completion timeout\n", + __func__); + mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); + } - return dma_cookie_status(c, cookie, txstate); + /* 5 microsecond delay per pending descriptor */ + writew(min((5 * (active - i)), IOAT_INTRDELAY_MASK), + ioat_chan->ioat_dma->reg_base + IOAT_INTRDELAY_OFFSET); } -static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat) +static void ioat_cleanup(struct ioatdma_chan *ioat_chan) { - struct ioat_chan_common *chan = &ioat->base; - struct ioat_desc_sw *desc; - struct ioat_dma_descriptor *hw; + u64 phys_complete; - spin_lock_bh(&ioat->desc_lock); + spin_lock_bh(&ioat_chan->cleanup_lock); - desc = ioat1_dma_get_next_descriptor(ioat); + if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) + __cleanup(ioat_chan, phys_complete); - if (!desc) { - dev_err(to_dev(chan), - "Unable to start null desc - get next desc failed\n"); - spin_unlock_bh(&ioat->desc_lock); - return; - } + if (is_ioat_halted(*ioat_chan->completion)) { + u32 chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); - hw = desc->hw; - hw->ctl = 0; - hw->ctl_f.null = 1; - hw->ctl_f.int_en = 1; - hw->ctl_f.compl_write = 1; - /* set size to non-zero value (channel returns error when size is 0) */ - hw->size = NULL_DESC_BUFFER_SIZE; - hw->src_addr = 0; - hw->dst_addr = 0; - async_tx_ack(&desc->txd); - hw->next = 0; - list_add_tail(&desc->node, &ioat->used_desc); - dump_desc_dbg(ioat, desc); + if (chanerr & IOAT_CHANERR_HANDLE_MASK) { + mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); + ioat_eh(ioat_chan); + } + } - ioat_set_chainaddr(ioat, desc->txd.phys); - ioat_start(chan); - spin_unlock_bh(&ioat->desc_lock); + spin_unlock_bh(&ioat_chan->cleanup_lock); } -/* - * Perform a IOAT transaction to verify the HW works. - */ -#define IOAT_TEST_SIZE 2000 +void ioat_cleanup_event(unsigned long data) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan((void *)data); + + ioat_cleanup(ioat_chan); + if (!test_bit(IOAT_RUN, &ioat_chan->state)) + return; + writew(IOAT_CHANCTRL_RUN, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); +} -static void ioat_dma_test_callback(void *dma_async_param) +static void ioat_restart_channel(struct ioatdma_chan *ioat_chan) { - struct completion *cmp = dma_async_param; + u64 phys_complete; + + ioat_quiesce(ioat_chan, 0); + if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) + __cleanup(ioat_chan, phys_complete); - complete(cmp); + __ioat_restart_chan(ioat_chan); } -/** - * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works. - * @device: device to be tested - */ -int ioat_dma_self_test(struct ioatdma_device *device) +static void ioat_eh(struct ioatdma_chan *ioat_chan) { - int i; - u8 *src; - u8 *dest; - struct dma_device *dma = &device->common; - struct device *dev = &device->pdev->dev; - struct dma_chan *dma_chan; + struct pci_dev *pdev = to_pdev(ioat_chan); + struct ioat_dma_descriptor *hw; struct dma_async_tx_descriptor *tx; - dma_addr_t dma_dest, dma_src; - dma_cookie_t cookie; - int err = 0; - struct completion cmp; - unsigned long tmo; - unsigned long flags; - - src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); - if (!src) - return -ENOMEM; - dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); - if (!dest) { - kfree(src); - return -ENOMEM; - } + u64 phys_complete; + struct ioat_ring_ent *desc; + u32 err_handled = 0; + u32 chanerr_int; + u32 chanerr; - /* Fill in src buffer */ - for (i = 0; i < IOAT_TEST_SIZE; i++) - src[i] = (u8)i; - - /* Start copy, using first DMA channel */ - dma_chan = container_of(dma->channels.next, struct dma_chan, - device_node); - if (dma->device_alloc_chan_resources(dma_chan) < 1) { - dev_err(dev, "selftest cannot allocate chan resource\n"); - err = -ENODEV; - goto out; - } + /* cleanup so tail points to descriptor that caused the error */ + if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) + __cleanup(ioat_chan, phys_complete); - dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE); - if (dma_mapping_error(dev, dma_src)) { - dev_err(dev, "mapping src buffer failed\n"); - goto free_resources; - } - dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(dev, dma_dest)) { - dev_err(dev, "mapping dest buffer failed\n"); - goto unmap_src; - } - flags = DMA_PREP_INTERRUPT; - tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src, - IOAT_TEST_SIZE, flags); - if (!tx) { - dev_err(dev, "Self-test prep failed, disabling\n"); - err = -ENODEV; - goto unmap_dma; - } + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr_int); - async_tx_ack(tx); - init_completion(&cmp); - tx->callback = ioat_dma_test_callback; - tx->callback_param = &cmp; - cookie = tx->tx_submit(tx); - if (cookie < 0) { - dev_err(dev, "Self-test setup failed, disabling\n"); - err = -ENODEV; - goto unmap_dma; - } - dma->device_issue_pending(dma_chan); + dev_dbg(to_dev(ioat_chan), "%s: error = %x:%x\n", + __func__, chanerr, chanerr_int); - tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail); + hw = desc->hw; + dump_desc_dbg(ioat_chan, desc); - if (tmo == 0 || - dma->device_tx_status(dma_chan, cookie, NULL) - != DMA_COMPLETE) { - dev_err(dev, "Self-test copy timed out, disabling\n"); - err = -ENODEV; - goto unmap_dma; - } - if (memcmp(src, dest, IOAT_TEST_SIZE)) { - dev_err(dev, "Self-test copy failed compare, disabling\n"); - err = -ENODEV; - goto free_resources; + switch (hw->ctl_f.op) { + case IOAT_OP_XOR_VAL: + if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) { + *desc->result |= SUM_CHECK_P_RESULT; + err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR; + } + break; + case IOAT_OP_PQ_VAL: + case IOAT_OP_PQ_VAL_16S: + if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) { + *desc->result |= SUM_CHECK_P_RESULT; + err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR; + } + if (chanerr & IOAT_CHANERR_XOR_Q_ERR) { + *desc->result |= SUM_CHECK_Q_RESULT; + err_handled |= IOAT_CHANERR_XOR_Q_ERR; + } + break; } -unmap_dma: - dma_unmap_single(dev, dma_dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE); -unmap_src: - dma_unmap_single(dev, dma_src, IOAT_TEST_SIZE, DMA_TO_DEVICE); -free_resources: - dma->device_free_chan_resources(dma_chan); -out: - kfree(src); - kfree(dest); - return err; -} - -static char ioat_interrupt_style[32] = "msix"; -module_param_string(ioat_interrupt_style, ioat_interrupt_style, - sizeof(ioat_interrupt_style), 0644); -MODULE_PARM_DESC(ioat_interrupt_style, - "set ioat interrupt style: msix (default), msi, intx"); - -/** - * ioat_dma_setup_interrupts - setup interrupt handler - * @device: ioat device - */ -int ioat_dma_setup_interrupts(struct ioatdma_device *device) -{ - struct ioat_chan_common *chan; - struct pci_dev *pdev = device->pdev; - struct device *dev = &pdev->dev; - struct msix_entry *msix; - int i, j, msixcnt; - int err = -EINVAL; - u8 intrctrl = 0; - - if (!strcmp(ioat_interrupt_style, "msix")) - goto msix; - if (!strcmp(ioat_interrupt_style, "msi")) - goto msi; - if (!strcmp(ioat_interrupt_style, "intx")) - goto intx; - dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style); - goto err_no_irq; - -msix: - /* The number of MSI-X vectors should equal the number of channels */ - msixcnt = device->common.chancnt; - for (i = 0; i < msixcnt; i++) - device->msix_entries[i].entry = i; - - err = pci_enable_msix_exact(pdev, device->msix_entries, msixcnt); - if (err) - goto msi; - - for (i = 0; i < msixcnt; i++) { - msix = &device->msix_entries[i]; - chan = ioat_chan_by_index(device, i); - err = devm_request_irq(dev, msix->vector, - ioat_dma_do_interrupt_msix, 0, - "ioat-msix", chan); - if (err) { - for (j = 0; j < i; j++) { - msix = &device->msix_entries[j]; - chan = ioat_chan_by_index(device, j); - devm_free_irq(dev, msix->vector, chan); + /* fault on unhandled error or spurious halt */ + if (chanerr ^ err_handled || chanerr == 0) { + dev_err(to_dev(ioat_chan), "%s: fatal error (%x:%x)\n", + __func__, chanerr, err_handled); + BUG(); + } else { /* cleanup the faulty descriptor */ + tx = &desc->txd; + if (tx->cookie) { + dma_cookie_complete(tx); + dma_descriptor_unmap(tx); + if (tx->callback) { + tx->callback(tx->callback_param); + tx->callback = NULL; } - goto msi; } } - intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL; - device->irq_mode = IOAT_MSIX; - goto done; -msi: - err = pci_enable_msi(pdev); - if (err) - goto intx; + writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr_int); - err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0, - "ioat-msi", device); - if (err) { - pci_disable_msi(pdev); - goto intx; - } - device->irq_mode = IOAT_MSI; - goto done; - -intx: - err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, - IRQF_SHARED, "ioat-intx", device); - if (err) - goto err_no_irq; - - device->irq_mode = IOAT_INTX; -done: - if (device->intr_quirk) - device->intr_quirk(device); - intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN; - writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET); - return 0; - -err_no_irq: - /* Disable all interrupt generation */ - writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); - device->irq_mode = IOAT_NOIRQ; - dev_err(dev, "no usable interrupts\n"); - return err; -} -EXPORT_SYMBOL(ioat_dma_setup_interrupts); + /* mark faulting descriptor as complete */ + *ioat_chan->completion = desc->txd.phys; -static void ioat_disable_interrupts(struct ioatdma_device *device) -{ - /* Disable all interrupt generation */ - writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); + spin_lock_bh(&ioat_chan->prep_lock); + ioat_restart_channel(ioat_chan); + spin_unlock_bh(&ioat_chan->prep_lock); } -int ioat_probe(struct ioatdma_device *device) +static void check_active(struct ioatdma_chan *ioat_chan) { - int err = -ENODEV; - struct dma_device *dma = &device->common; - struct pci_dev *pdev = device->pdev; - struct device *dev = &pdev->dev; - - /* DMA coherent memory pool for DMA descriptor allocations */ - device->dma_pool = pci_pool_create("dma_desc_pool", pdev, - sizeof(struct ioat_dma_descriptor), - 64, 0); - if (!device->dma_pool) { - err = -ENOMEM; - goto err_dma_pool; - } - - device->completion_pool = pci_pool_create("completion_pool", pdev, - sizeof(u64), SMP_CACHE_BYTES, - SMP_CACHE_BYTES); - - if (!device->completion_pool) { - err = -ENOMEM; - goto err_completion_pool; + if (ioat_ring_active(ioat_chan)) { + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); + return; } - device->enumerate_channels(device); - - dma_cap_set(DMA_MEMCPY, dma->cap_mask); - dma->dev = &pdev->dev; + if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state)) + mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); + else if (ioat_chan->alloc_order > ioat_get_alloc_order()) { + /* if the ring is idle, empty, and oversized try to step + * down the size + */ + reshape_ring(ioat_chan, ioat_chan->alloc_order - 1); - if (!dma->chancnt) { - dev_err(dev, "channel enumeration error\n"); - goto err_setup_interrupts; + /* keep shrinking until we get back to our minimum + * default size + */ + if (ioat_chan->alloc_order > ioat_get_alloc_order()) + mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); } - err = ioat_dma_setup_interrupts(device); - if (err) - goto err_setup_interrupts; +} - err = device->self_test(device); - if (err) - goto err_self_test; +void ioat_timer_event(unsigned long data) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan((void *)data); + dma_addr_t phys_complete; + u64 status; - return 0; + status = ioat_chansts(ioat_chan); -err_self_test: - ioat_disable_interrupts(device); -err_setup_interrupts: - pci_pool_destroy(device->completion_pool); -err_completion_pool: - pci_pool_destroy(device->dma_pool); -err_dma_pool: - return err; -} + /* when halted due to errors check for channel + * programming errors before advancing the completion state + */ + if (is_ioat_halted(status)) { + u32 chanerr; -int ioat_register(struct ioatdma_device *device) -{ - int err = dma_async_device_register(&device->common); + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + dev_err(to_dev(ioat_chan), "%s: Channel halted (%x)\n", + __func__, chanerr); + if (test_bit(IOAT_RUN, &ioat_chan->state)) + BUG_ON(is_ioat_bug(chanerr)); + else /* we never got off the ground */ + return; + } - if (err) { - ioat_disable_interrupts(device); - pci_pool_destroy(device->completion_pool); - pci_pool_destroy(device->dma_pool); + /* if we haven't made progress and we have already + * acknowledged a pending completion once, then be more + * forceful with a restart + */ + spin_lock_bh(&ioat_chan->cleanup_lock); + if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) + __cleanup(ioat_chan, phys_complete); + else if (test_bit(IOAT_COMPLETION_ACK, &ioat_chan->state)) { + spin_lock_bh(&ioat_chan->prep_lock); + ioat_restart_channel(ioat_chan); + spin_unlock_bh(&ioat_chan->prep_lock); + spin_unlock_bh(&ioat_chan->cleanup_lock); + return; + } else { + set_bit(IOAT_COMPLETION_ACK, &ioat_chan->state); + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); } - return err; -} -/* ioat1_intr_quirk - fix up dma ctrl register to enable / disable msi */ -static void ioat1_intr_quirk(struct ioatdma_device *device) -{ - struct pci_dev *pdev = device->pdev; - u32 dmactrl; - - pci_read_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, &dmactrl); - if (pdev->msi_enabled) - dmactrl |= IOAT_PCI_DMACTRL_MSI_EN; - else - dmactrl &= ~IOAT_PCI_DMACTRL_MSI_EN; - pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl); + if (ioat_ring_active(ioat_chan)) + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); + else { + spin_lock_bh(&ioat_chan->prep_lock); + check_active(ioat_chan); + spin_unlock_bh(&ioat_chan->prep_lock); + } + spin_unlock_bh(&ioat_chan->cleanup_lock); } -static ssize_t ring_size_show(struct dma_chan *c, char *page) +enum dma_status +ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie, + struct dma_tx_state *txstate) { - struct ioat_dma_chan *ioat = to_ioat_chan(c); + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + enum dma_status ret; - return sprintf(page, "%d\n", ioat->desccount); -} -static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size); + ret = dma_cookie_status(c, cookie, txstate); + if (ret == DMA_COMPLETE) + return ret; -static ssize_t ring_active_show(struct dma_chan *c, char *page) -{ - struct ioat_dma_chan *ioat = to_ioat_chan(c); + ioat_cleanup(ioat_chan); - return sprintf(page, "%d\n", ioat->active); + return dma_cookie_status(c, cookie, txstate); } -static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active); -static ssize_t cap_show(struct dma_chan *c, char *page) +static int ioat_irq_reinit(struct ioatdma_device *ioat_dma) { - struct dma_device *dma = c->device; + struct pci_dev *pdev = ioat_dma->pdev; + int irq = pdev->irq, i; - return sprintf(page, "copy%s%s%s%s%s\n", - dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "", - dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "", - dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "", - dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "", - dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : ""); + if (!is_bwd_ioat(pdev)) + return 0; -} -struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap); - -static ssize_t version_show(struct dma_chan *c, char *page) -{ - struct dma_device *dma = c->device; - struct ioatdma_device *device = to_ioatdma_device(dma); + switch (ioat_dma->irq_mode) { + case IOAT_MSIX: + for (i = 0; i < ioat_dma->dma_dev.chancnt; i++) { + struct msix_entry *msix = &ioat_dma->msix_entries[i]; + struct ioatdma_chan *ioat_chan; - return sprintf(page, "%d.%d\n", - device->version >> 4, device->version & 0xf); -} -struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version); - -static struct attribute *ioat1_attrs[] = { - &ring_size_attr.attr, - &ring_active_attr.attr, - &ioat_cap_attr.attr, - &ioat_version_attr.attr, - NULL, -}; - -static ssize_t -ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page) -{ - struct ioat_sysfs_entry *entry; - struct ioat_chan_common *chan; + ioat_chan = ioat_chan_by_index(ioat_dma, i); + devm_free_irq(&pdev->dev, msix->vector, ioat_chan); + } - entry = container_of(attr, struct ioat_sysfs_entry, attr); - chan = container_of(kobj, struct ioat_chan_common, kobj); + pci_disable_msix(pdev); + break; + case IOAT_MSI: + pci_disable_msi(pdev); + /* fall through */ + case IOAT_INTX: + devm_free_irq(&pdev->dev, irq, ioat_dma); + break; + default: + return 0; + } + ioat_dma->irq_mode = IOAT_NOIRQ; - if (!entry->show) - return -EIO; - return entry->show(&chan->common, page); + return ioat_dma_setup_interrupts(ioat_dma); } -const struct sysfs_ops ioat_sysfs_ops = { - .show = ioat_attr_show, -}; - -static struct kobj_type ioat1_ktype = { - .sysfs_ops = &ioat_sysfs_ops, - .default_attrs = ioat1_attrs, -}; - -void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type) +int ioat_reset_hw(struct ioatdma_chan *ioat_chan) { - struct dma_device *dma = &device->common; - struct dma_chan *c; + /* throw away whatever the channel was doing and get it + * initialized, with ioat3 specific workarounds + */ + struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; + struct pci_dev *pdev = ioat_dma->pdev; + u32 chanerr; + u16 dev_id; + int err; + + ioat_quiesce(ioat_chan, msecs_to_jiffies(100)); - list_for_each_entry(c, &dma->channels, device_node) { - struct ioat_chan_common *chan = to_chan_common(c); - struct kobject *parent = &c->dev->device.kobj; - int err; + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); - err = kobject_init_and_add(&chan->kobj, type, parent, "quickdata"); + if (ioat_dma->version < IOAT_VER_3_3) { + /* clear any pending errors */ + err = pci_read_config_dword(pdev, + IOAT_PCI_CHANERR_INT_OFFSET, &chanerr); if (err) { - dev_warn(to_dev(chan), - "sysfs init error (%d), continuing...\n", err); - kobject_put(&chan->kobj); - set_bit(IOAT_KOBJ_INIT_FAIL, &chan->state); + dev_err(&pdev->dev, + "channel error register unreachable\n"); + return err; } - } -} + pci_write_config_dword(pdev, + IOAT_PCI_CHANERR_INT_OFFSET, chanerr); -void ioat_kobject_del(struct ioatdma_device *device) -{ - struct dma_device *dma = &device->common; - struct dma_chan *c; - - list_for_each_entry(c, &dma->channels, device_node) { - struct ioat_chan_common *chan = to_chan_common(c); - - if (!test_bit(IOAT_KOBJ_INIT_FAIL, &chan->state)) { - kobject_del(&chan->kobj); - kobject_put(&chan->kobj); + /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit + * (workaround for spurious config parity error after restart) + */ + pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); + if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) { + pci_write_config_dword(pdev, + IOAT_PCI_DMAUNCERRSTS_OFFSET, + 0x10); } } -} -int ioat1_dma_probe(struct ioatdma_device *device, int dca) -{ - struct pci_dev *pdev = device->pdev; - struct dma_device *dma; - int err; + err = ioat_reset_sync(ioat_chan, msecs_to_jiffies(200)); + if (!err) + err = ioat_irq_reinit(ioat_dma); - device->intr_quirk = ioat1_intr_quirk; - device->enumerate_channels = ioat1_enumerate_channels; - device->self_test = ioat_dma_self_test; - device->timer_fn = ioat1_timer_event; - device->cleanup_fn = ioat1_cleanup_event; - dma = &device->common; - dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy; - dma->device_issue_pending = ioat1_dma_memcpy_issue_pending; - dma->device_alloc_chan_resources = ioat1_dma_alloc_chan_resources; - dma->device_free_chan_resources = ioat1_dma_free_chan_resources; - dma->device_tx_status = ioat_dma_tx_status; - - err = ioat_probe(device); - if (err) - return err; - err = ioat_register(device); if (err) - return err; - ioat_kobject_add(device, &ioat1_ktype); - - if (dca) - device->dca = ioat_dca_init(pdev, device->reg_base); + dev_err(&pdev->dev, "Failed to reset: %d\n", err); return err; } - -void ioat_dma_remove(struct ioatdma_device *device) -{ - struct dma_device *dma = &device->common; - - ioat_disable_interrupts(device); - - ioat_kobject_del(device); - - dma_async_device_unregister(dma); - - pci_pool_destroy(device->dma_pool); - pci_pool_destroy(device->completion_pool); - - INIT_LIST_HEAD(&dma->channels); -} diff --git a/kernel/drivers/dma/ioat/dma.h b/kernel/drivers/dma/ioat/dma.h index 30f5c7eed..8f4e607d5 100644 --- a/kernel/drivers/dma/ioat/dma.h +++ b/kernel/drivers/dma/ioat/dma.h @@ -18,26 +18,32 @@ #define IOATDMA_H #include <linux/dmaengine.h> -#include "hw.h" -#include "registers.h" #include <linux/init.h> #include <linux/dmapool.h> #include <linux/cache.h> #include <linux/pci_ids.h> -#include <net/tcp.h> +#include <linux/circ_buf.h> +#include <linux/interrupt.h> +#include "registers.h" +#include "hw.h" #define IOAT_DMA_VERSION "4.00" -#define IOAT_LOW_COMPLETION_MASK 0xffffffc0 #define IOAT_DMA_DCA_ANY_CPU ~0 -#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common) -#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) -#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, txd) -#define to_dev(ioat_chan) (&(ioat_chan)->device->pdev->dev) -#define to_pdev(ioat_chan) ((ioat_chan)->device->pdev) +#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, dma_dev) +#define to_dev(ioat_chan) (&(ioat_chan)->ioat_dma->pdev->dev) +#define to_pdev(ioat_chan) ((ioat_chan)->ioat_dma->pdev) + +#define chan_num(ch) ((int)((ch)->reg_base - (ch)->ioat_dma->reg_base) / 0x80) -#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80) +/* ioat hardware assumes at least two sources for raid operations */ +#define src_cnt_to_sw(x) ((x) + 2) +#define src_cnt_to_hw(x) ((x) - 2) +#define ndest_to_sw(x) ((x) + 1) +#define ndest_to_hw(x) ((x) - 1) +#define src16_cnt_to_sw(x) ((x) + 9) +#define src16_cnt_to_hw(x) ((x) - 9) /* * workaround for IOAT ver.3.0 null descriptor issue @@ -57,19 +63,15 @@ enum ioat_irq_mode { * @pdev: PCI-Express device * @reg_base: MMIO register space base address * @dma_pool: for allocating DMA descriptors - * @common: embedded struct dma_device + * @completion_pool: DMA buffers for completion ops + * @sed_hw_pool: DMA super descriptor pools + * @dma_dev: embedded struct dma_device * @version: version of ioatdma device * @msix_entries: irq handlers * @idx: per channel data * @dca: direct cache access context - * @intr_quirk: interrupt setup quirk (for ioat_v1 devices) - * @enumerate_channels: hw version specific channel enumeration - * @reset_hw: hw version specific channel (re)initialization - * @cleanup_fn: select between the v2 and v3 cleanup routines - * @timer_fn: select between the v2 and v3 timer watchdog routines - * @self_test: hardware version specific self test for each supported op type - * - * Note: the v3 cleanup routine supports raid operations + * @irq_mode: interrupt mode (INTX, MSI, MSIX) + * @cap: read DMA capabilities register */ struct ioatdma_device { struct pci_dev *pdev; @@ -78,28 +80,23 @@ struct ioatdma_device { struct pci_pool *completion_pool; #define MAX_SED_POOLS 5 struct dma_pool *sed_hw_pool[MAX_SED_POOLS]; - struct dma_device common; + struct dma_device dma_dev; u8 version; - struct msix_entry msix_entries[4]; - struct ioat_chan_common *idx[4]; +#define IOAT_MAX_CHANS 4 + struct msix_entry msix_entries[IOAT_MAX_CHANS]; + struct ioatdma_chan *idx[IOAT_MAX_CHANS]; struct dca_provider *dca; enum ioat_irq_mode irq_mode; u32 cap; - void (*intr_quirk)(struct ioatdma_device *device); - int (*enumerate_channels)(struct ioatdma_device *device); - int (*reset_hw)(struct ioat_chan_common *chan); - void (*cleanup_fn)(unsigned long data); - void (*timer_fn)(unsigned long data); - int (*self_test)(struct ioatdma_device *device); }; -struct ioat_chan_common { - struct dma_chan common; +struct ioatdma_chan { + struct dma_chan dma_chan; void __iomem *reg_base; dma_addr_t last_completion; spinlock_t cleanup_lock; unsigned long state; - #define IOAT_COMPLETION_PENDING 0 + #define IOAT_CHAN_DOWN 0 #define IOAT_COMPLETION_ACK 1 #define IOAT_RESET_PENDING 2 #define IOAT_KOBJ_INIT_FAIL 3 @@ -110,11 +107,32 @@ struct ioat_chan_common { #define COMPLETION_TIMEOUT msecs_to_jiffies(100) #define IDLE_TIMEOUT msecs_to_jiffies(2000) #define RESET_DELAY msecs_to_jiffies(100) - struct ioatdma_device *device; + struct ioatdma_device *ioat_dma; dma_addr_t completion_dma; u64 *completion; struct tasklet_struct cleanup_task; struct kobject kobj; + +/* ioat v2 / v3 channel attributes + * @xfercap_log; log2 of channel max transfer length (for fast division) + * @head: allocated index + * @issued: hardware notification point + * @tail: cleanup index + * @dmacount: identical to 'head' except for occasionally resetting to zero + * @alloc_order: log2 of the number of allocated descriptors + * @produce: number of descriptors to produce at submit time + * @ring: software ring buffer implementation of hardware ring + * @prep_lock: serializes descriptor preparation (producers) + */ + size_t xfercap_log; + u16 head; + u16 issued; + u16 tail; + u16 dmacount; + u16 alloc_order; + u16 produce; + struct ioat_ring_ent **ring; + spinlock_t prep_lock; }; struct ioat_sysfs_entry { @@ -123,28 +141,11 @@ struct ioat_sysfs_entry { }; /** - * struct ioat_dma_chan - internal representation of a DMA channel - */ -struct ioat_dma_chan { - struct ioat_chan_common base; - - size_t xfercap; /* XFERCAP register value expanded out */ - - spinlock_t desc_lock; - struct list_head free_desc; - struct list_head used_desc; - - int pending; - u16 desccount; - u16 active; -}; - -/** * struct ioat_sed_ent - wrapper around super extended hardware descriptor * @hw: hardware SED - * @sed_dma: dma address for the SED - * @list: list member + * @dma: dma address for the SED * @parent: point to the dma descriptor that's the parent + * @hw_pool: descriptor pool index */ struct ioat_sed_ent { struct ioat_sed_raw_descriptor *hw; @@ -153,39 +154,57 @@ struct ioat_sed_ent { unsigned int hw_pool; }; -static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c) -{ - return container_of(c, struct ioat_chan_common, common); -} - -static inline struct ioat_dma_chan *to_ioat_chan(struct dma_chan *c) -{ - struct ioat_chan_common *chan = to_chan_common(c); - - return container_of(chan, struct ioat_dma_chan, base); -} - -/* wrapper around hardware descriptor format + additional software fields */ - /** - * struct ioat_desc_sw - wrapper around hardware descriptor + * struct ioat_ring_ent - wrapper around hardware descriptor * @hw: hardware DMA descriptor (for memcpy) - * @node: this descriptor will either be on the free list, - * or attached to a transaction list (tx_list) + * @xor: hardware xor descriptor + * @xor_ex: hardware xor extension descriptor + * @pq: hardware pq descriptor + * @pq_ex: hardware pq extension descriptor + * @pqu: hardware pq update descriptor + * @raw: hardware raw (un-typed) descriptor * @txd: the generic software descriptor for all engines + * @len: total transaction length for unmap + * @result: asynchronous result of validate operations * @id: identifier for debug + * @sed: pointer to super extended descriptor sw desc */ -struct ioat_desc_sw { - struct ioat_dma_descriptor *hw; - struct list_head node; + +struct ioat_ring_ent { + union { + struct ioat_dma_descriptor *hw; + struct ioat_xor_descriptor *xor; + struct ioat_xor_ext_descriptor *xor_ex; + struct ioat_pq_descriptor *pq; + struct ioat_pq_ext_descriptor *pq_ex; + struct ioat_pq_update_descriptor *pqu; + struct ioat_raw_descriptor *raw; + }; size_t len; - struct list_head tx_list; struct dma_async_tx_descriptor txd; + enum sum_check_flags *result; #ifdef DEBUG int id; #endif + struct ioat_sed_ent *sed; }; +extern const struct sysfs_ops ioat_sysfs_ops; +extern struct ioat_sysfs_entry ioat_version_attr; +extern struct ioat_sysfs_entry ioat_cap_attr; +extern int ioat_pending_level; +extern int ioat_ring_alloc_order; +extern struct kobj_type ioat_ktype; +extern struct kmem_cache *ioat_cache; +extern int ioat_ring_max_alloc_order; +extern struct kmem_cache *ioat_sed_cache; + +static inline struct ioatdma_chan *to_ioat_chan(struct dma_chan *c) +{ + return container_of(c, struct ioatdma_chan, dma_chan); +} + +/* wrapper around hardware descriptor format + additional software fields */ #ifdef DEBUG #define set_desc_id(desc, i) ((desc)->id = (i)) #define desc_id(desc) ((desc)->id) @@ -195,10 +214,10 @@ struct ioat_desc_sw { #endif static inline void -__dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw, +__dump_desc_dbg(struct ioatdma_chan *ioat_chan, struct ioat_dma_descriptor *hw, struct dma_async_tx_descriptor *tx, int id) { - struct device *dev = to_dev(chan); + struct device *dev = to_dev(ioat_chan); dev_dbg(dev, "desc[%d]: (%#llx->%#llx) cookie: %d flags: %#x" " ctl: %#10.8x (op: %#x int_en: %d compl: %d)\n", id, @@ -208,25 +227,25 @@ __dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw, } #define dump_desc_dbg(c, d) \ - ({ if (d) __dump_desc_dbg(&c->base, d->hw, &d->txd, desc_id(d)); 0; }) + ({ if (d) __dump_desc_dbg(c, d->hw, &d->txd, desc_id(d)); 0; }) -static inline struct ioat_chan_common * -ioat_chan_by_index(struct ioatdma_device *device, int index) +static inline struct ioatdma_chan * +ioat_chan_by_index(struct ioatdma_device *ioat_dma, int index) { - return device->idx[index]; + return ioat_dma->idx[index]; } -static inline u64 ioat_chansts_32(struct ioat_chan_common *chan) +static inline u64 ioat_chansts_32(struct ioatdma_chan *ioat_chan) { - u8 ver = chan->device->version; + u8 ver = ioat_chan->ioat_dma->version; u64 status; u32 status_lo; /* We need to read the low address first as this causes the * chipset to latch the upper bits for the subsequent read */ - status_lo = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_LOW(ver)); - status = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_HIGH(ver)); + status_lo = readl(ioat_chan->reg_base + IOAT_CHANSTS_OFFSET_LOW(ver)); + status = readl(ioat_chan->reg_base + IOAT_CHANSTS_OFFSET_HIGH(ver)); status <<= 32; status |= status_lo; @@ -235,16 +254,16 @@ static inline u64 ioat_chansts_32(struct ioat_chan_common *chan) #if BITS_PER_LONG == 64 -static inline u64 ioat_chansts(struct ioat_chan_common *chan) +static inline u64 ioat_chansts(struct ioatdma_chan *ioat_chan) { - u8 ver = chan->device->version; + u8 ver = ioat_chan->ioat_dma->version; u64 status; /* With IOAT v3.3 the status register is 64bit. */ if (ver >= IOAT_VER_3_3) - status = readq(chan->reg_base + IOAT_CHANSTS_OFFSET(ver)); + status = readq(ioat_chan->reg_base + IOAT_CHANSTS_OFFSET(ver)); else - status = ioat_chansts_32(chan); + status = ioat_chansts_32(ioat_chan); return status; } @@ -253,56 +272,41 @@ static inline u64 ioat_chansts(struct ioat_chan_common *chan) #define ioat_chansts ioat_chansts_32 #endif -static inline void ioat_start(struct ioat_chan_common *chan) -{ - u8 ver = chan->device->version; - - writeb(IOAT_CHANCMD_START, chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); -} - static inline u64 ioat_chansts_to_addr(u64 status) { return status & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; } -static inline u32 ioat_chanerr(struct ioat_chan_common *chan) +static inline u32 ioat_chanerr(struct ioatdma_chan *ioat_chan) { - return readl(chan->reg_base + IOAT_CHANERR_OFFSET); + return readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); } -static inline void ioat_suspend(struct ioat_chan_common *chan) +static inline void ioat_suspend(struct ioatdma_chan *ioat_chan) { - u8 ver = chan->device->version; + u8 ver = ioat_chan->ioat_dma->version; - writeb(IOAT_CHANCMD_SUSPEND, chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); + writeb(IOAT_CHANCMD_SUSPEND, + ioat_chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); } -static inline void ioat_reset(struct ioat_chan_common *chan) +static inline void ioat_reset(struct ioatdma_chan *ioat_chan) { - u8 ver = chan->device->version; + u8 ver = ioat_chan->ioat_dma->version; - writeb(IOAT_CHANCMD_RESET, chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); + writeb(IOAT_CHANCMD_RESET, + ioat_chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); } -static inline bool ioat_reset_pending(struct ioat_chan_common *chan) +static inline bool ioat_reset_pending(struct ioatdma_chan *ioat_chan) { - u8 ver = chan->device->version; + u8 ver = ioat_chan->ioat_dma->version; u8 cmd; - cmd = readb(chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); + cmd = readb(ioat_chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); return (cmd & IOAT_CHANCMD_RESET) == IOAT_CHANCMD_RESET; } -static inline void ioat_set_chainaddr(struct ioat_dma_chan *ioat, u64 addr) -{ - struct ioat_chan_common *chan = &ioat->base; - - writel(addr & 0x00000000FFFFFFFF, - chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); - writel(addr >> 32, - chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); -} - static inline bool is_ioat_active(unsigned long status) { return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE); @@ -329,24 +333,111 @@ static inline bool is_ioat_bug(unsigned long err) return !!err; } -int ioat_probe(struct ioatdma_device *device); -int ioat_register(struct ioatdma_device *device); -int ioat1_dma_probe(struct ioatdma_device *dev, int dca); -int ioat_dma_self_test(struct ioatdma_device *device); -void ioat_dma_remove(struct ioatdma_device *device); +#define IOAT_MAX_ORDER 16 +#define ioat_get_alloc_order() \ + (min(ioat_ring_alloc_order, IOAT_MAX_ORDER)) +#define ioat_get_max_alloc_order() \ + (min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER)) + +static inline u32 ioat_ring_size(struct ioatdma_chan *ioat_chan) +{ + return 1 << ioat_chan->alloc_order; +} + +/* count of descriptors in flight with the engine */ +static inline u16 ioat_ring_active(struct ioatdma_chan *ioat_chan) +{ + return CIRC_CNT(ioat_chan->head, ioat_chan->tail, + ioat_ring_size(ioat_chan)); +} + +/* count of descriptors pending submission to hardware */ +static inline u16 ioat_ring_pending(struct ioatdma_chan *ioat_chan) +{ + return CIRC_CNT(ioat_chan->head, ioat_chan->issued, + ioat_ring_size(ioat_chan)); +} + +static inline u32 ioat_ring_space(struct ioatdma_chan *ioat_chan) +{ + return ioat_ring_size(ioat_chan) - ioat_ring_active(ioat_chan); +} + +static inline u16 +ioat_xferlen_to_descs(struct ioatdma_chan *ioat_chan, size_t len) +{ + u16 num_descs = len >> ioat_chan->xfercap_log; + + num_descs += !!(len & ((1 << ioat_chan->xfercap_log) - 1)); + return num_descs; +} + +static inline struct ioat_ring_ent * +ioat_get_ring_ent(struct ioatdma_chan *ioat_chan, u16 idx) +{ + return ioat_chan->ring[idx & (ioat_ring_size(ioat_chan) - 1)]; +} + +static inline void +ioat_set_chainaddr(struct ioatdma_chan *ioat_chan, u64 addr) +{ + writel(addr & 0x00000000FFFFFFFF, + ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); + writel(addr >> 32, + ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); +} + +/* IOAT Prep functions */ +struct dma_async_tx_descriptor * +ioat_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags); +struct dma_async_tx_descriptor * +ioat_prep_interrupt_lock(struct dma_chan *c, unsigned long flags); +struct dma_async_tx_descriptor * +ioat_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags); +struct dma_async_tx_descriptor * +ioat_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, + unsigned int src_cnt, size_t len, + enum sum_check_flags *result, unsigned long flags); +struct dma_async_tx_descriptor * +ioat_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + unsigned long flags); +struct dma_async_tx_descriptor * +ioat_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + enum sum_check_flags *pqres, unsigned long flags); +struct dma_async_tx_descriptor * +ioat_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags); +struct dma_async_tx_descriptor * +ioat_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, + unsigned int src_cnt, size_t len, + enum sum_check_flags *result, unsigned long flags); + +/* IOAT Operation functions */ +irqreturn_t ioat_dma_do_interrupt(int irq, void *data); +irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data); +struct ioat_ring_ent ** +ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags); +void ioat_start_null_desc(struct ioatdma_chan *ioat_chan); +void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan); +int ioat_reset_hw(struct ioatdma_chan *ioat_chan); +enum dma_status +ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie, + struct dma_tx_state *txstate); +void ioat_cleanup_event(unsigned long data); +void ioat_timer_event(unsigned long data); +int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs); +void ioat_issue_pending(struct dma_chan *chan); +void ioat_timer_event(unsigned long data); + +/* IOAT Init functions */ +bool is_bwd_ioat(struct pci_dev *pdev); struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase); -dma_addr_t ioat_get_current_completion(struct ioat_chan_common *chan); -void ioat_init_channel(struct ioatdma_device *device, - struct ioat_chan_common *chan, int idx); -enum dma_status ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie, - struct dma_tx_state *txstate); -bool ioat_cleanup_preamble(struct ioat_chan_common *chan, - dma_addr_t *phys_complete); -void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type); -void ioat_kobject_del(struct ioatdma_device *device); -int ioat_dma_setup_interrupts(struct ioatdma_device *device); -void ioat_stop(struct ioat_chan_common *chan); -extern const struct sysfs_ops ioat_sysfs_ops; -extern struct ioat_sysfs_entry ioat_version_attr; -extern struct ioat_sysfs_entry ioat_cap_attr; +void ioat_kobject_add(struct ioatdma_device *ioat_dma, struct kobj_type *type); +void ioat_kobject_del(struct ioatdma_device *ioat_dma); +int ioat_dma_setup_interrupts(struct ioatdma_device *ioat_dma); +void ioat_stop(struct ioatdma_chan *ioat_chan); #endif /* IOATDMA_H */ diff --git a/kernel/drivers/dma/ioat/dma_v2.c b/kernel/drivers/dma/ioat/dma_v2.c deleted file mode 100644 index 69c7dfcad..000000000 --- a/kernel/drivers/dma/ioat/dma_v2.c +++ /dev/null @@ -1,916 +0,0 @@ -/* - * Intel I/OAT DMA Linux driver - * Copyright(c) 2004 - 2009 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - */ - -/* - * This driver supports an Intel I/OAT DMA engine (versions >= 2), which - * does asynchronous data movement and checksumming operations. - */ - -#include <linux/init.h> -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/pci.h> -#include <linux/interrupt.h> -#include <linux/dmaengine.h> -#include <linux/delay.h> -#include <linux/dma-mapping.h> -#include <linux/workqueue.h> -#include <linux/prefetch.h> -#include <linux/i7300_idle.h> -#include "dma.h" -#include "dma_v2.h" -#include "registers.h" -#include "hw.h" - -#include "../dmaengine.h" - -int ioat_ring_alloc_order = 8; -module_param(ioat_ring_alloc_order, int, 0644); -MODULE_PARM_DESC(ioat_ring_alloc_order, - "ioat2+: allocate 2^n descriptors per channel" - " (default: 8 max: 16)"); -static int ioat_ring_max_alloc_order = IOAT_MAX_ORDER; -module_param(ioat_ring_max_alloc_order, int, 0644); -MODULE_PARM_DESC(ioat_ring_max_alloc_order, - "ioat2+: upper limit for ring size (default: 16)"); - -void __ioat2_issue_pending(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - - ioat->dmacount += ioat2_ring_pending(ioat); - ioat->issued = ioat->head; - writew(ioat->dmacount, chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); - dev_dbg(to_dev(chan), - "%s: head: %#x tail: %#x issued: %#x count: %#x\n", - __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount); -} - -void ioat2_issue_pending(struct dma_chan *c) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - - if (ioat2_ring_pending(ioat)) { - spin_lock_bh(&ioat->prep_lock); - __ioat2_issue_pending(ioat); - spin_unlock_bh(&ioat->prep_lock); - } -} - -/** - * ioat2_update_pending - log pending descriptors - * @ioat: ioat2+ channel - * - * Check if the number of unsubmitted descriptors has exceeded the - * watermark. Called with prep_lock held - */ -static void ioat2_update_pending(struct ioat2_dma_chan *ioat) -{ - if (ioat2_ring_pending(ioat) > ioat_pending_level) - __ioat2_issue_pending(ioat); -} - -static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat) -{ - struct ioat_ring_ent *desc; - struct ioat_dma_descriptor *hw; - - if (ioat2_ring_space(ioat) < 1) { - dev_err(to_dev(&ioat->base), - "Unable to start null desc - ring full\n"); - return; - } - - dev_dbg(to_dev(&ioat->base), "%s: head: %#x tail: %#x issued: %#x\n", - __func__, ioat->head, ioat->tail, ioat->issued); - desc = ioat2_get_ring_ent(ioat, ioat->head); - - hw = desc->hw; - hw->ctl = 0; - hw->ctl_f.null = 1; - hw->ctl_f.int_en = 1; - hw->ctl_f.compl_write = 1; - /* set size to non-zero value (channel returns error when size is 0) */ - hw->size = NULL_DESC_BUFFER_SIZE; - hw->src_addr = 0; - hw->dst_addr = 0; - async_tx_ack(&desc->txd); - ioat2_set_chainaddr(ioat, desc->txd.phys); - dump_desc_dbg(ioat, desc); - wmb(); - ioat->head += 1; - __ioat2_issue_pending(ioat); -} - -static void ioat2_start_null_desc(struct ioat2_dma_chan *ioat) -{ - spin_lock_bh(&ioat->prep_lock); - __ioat2_start_null_desc(ioat); - spin_unlock_bh(&ioat->prep_lock); -} - -static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete) -{ - struct ioat_chan_common *chan = &ioat->base; - struct dma_async_tx_descriptor *tx; - struct ioat_ring_ent *desc; - bool seen_current = false; - u16 active; - int idx = ioat->tail, i; - - dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", - __func__, ioat->head, ioat->tail, ioat->issued); - - active = ioat2_ring_active(ioat); - for (i = 0; i < active && !seen_current; i++) { - smp_read_barrier_depends(); - prefetch(ioat2_get_ring_ent(ioat, idx + i + 1)); - desc = ioat2_get_ring_ent(ioat, idx + i); - tx = &desc->txd; - dump_desc_dbg(ioat, desc); - if (tx->cookie) { - dma_descriptor_unmap(tx); - dma_cookie_complete(tx); - if (tx->callback) { - tx->callback(tx->callback_param); - tx->callback = NULL; - } - } - - if (tx->phys == phys_complete) - seen_current = true; - } - smp_mb(); /* finish all descriptor reads before incrementing tail */ - ioat->tail = idx + i; - BUG_ON(active && !seen_current); /* no active descs have written a completion? */ - - chan->last_completion = phys_complete; - if (active - i == 0) { - dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", - __func__); - clear_bit(IOAT_COMPLETION_PENDING, &chan->state); - mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); - } -} - -/** - * ioat2_cleanup - clean finished descriptors (advance tail pointer) - * @chan: ioat channel to be cleaned up - */ -static void ioat2_cleanup(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - dma_addr_t phys_complete; - - spin_lock_bh(&chan->cleanup_lock); - if (ioat_cleanup_preamble(chan, &phys_complete)) - __cleanup(ioat, phys_complete); - spin_unlock_bh(&chan->cleanup_lock); -} - -void ioat2_cleanup_event(unsigned long data) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); - struct ioat_chan_common *chan = &ioat->base; - - ioat2_cleanup(ioat); - if (!test_bit(IOAT_RUN, &chan->state)) - return; - writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); -} - -void __ioat2_restart_chan(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - - /* set the tail to be re-issued */ - ioat->issued = ioat->tail; - ioat->dmacount = 0; - set_bit(IOAT_COMPLETION_PENDING, &chan->state); - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - - dev_dbg(to_dev(chan), - "%s: head: %#x tail: %#x issued: %#x count: %#x\n", - __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount); - - if (ioat2_ring_pending(ioat)) { - struct ioat_ring_ent *desc; - - desc = ioat2_get_ring_ent(ioat, ioat->tail); - ioat2_set_chainaddr(ioat, desc->txd.phys); - __ioat2_issue_pending(ioat); - } else - __ioat2_start_null_desc(ioat); -} - -int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo) -{ - unsigned long end = jiffies + tmo; - int err = 0; - u32 status; - - status = ioat_chansts(chan); - if (is_ioat_active(status) || is_ioat_idle(status)) - ioat_suspend(chan); - while (is_ioat_active(status) || is_ioat_idle(status)) { - if (tmo && time_after(jiffies, end)) { - err = -ETIMEDOUT; - break; - } - status = ioat_chansts(chan); - cpu_relax(); - } - - return err; -} - -int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo) -{ - unsigned long end = jiffies + tmo; - int err = 0; - - ioat_reset(chan); - while (ioat_reset_pending(chan)) { - if (end && time_after(jiffies, end)) { - err = -ETIMEDOUT; - break; - } - cpu_relax(); - } - - return err; -} - -static void ioat2_restart_channel(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - dma_addr_t phys_complete; - - ioat2_quiesce(chan, 0); - if (ioat_cleanup_preamble(chan, &phys_complete)) - __cleanup(ioat, phys_complete); - - __ioat2_restart_chan(ioat); -} - -static void check_active(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - - if (ioat2_ring_active(ioat)) { - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - return; - } - - if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &chan->state)) - mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); - else if (ioat->alloc_order > ioat_get_alloc_order()) { - /* if the ring is idle, empty, and oversized try to step - * down the size - */ - reshape_ring(ioat, ioat->alloc_order - 1); - - /* keep shrinking until we get back to our minimum - * default size - */ - if (ioat->alloc_order > ioat_get_alloc_order()) - mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); - } - -} - -void ioat2_timer_event(unsigned long data) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); - struct ioat_chan_common *chan = &ioat->base; - dma_addr_t phys_complete; - u64 status; - - status = ioat_chansts(chan); - - /* when halted due to errors check for channel - * programming errors before advancing the completion state - */ - if (is_ioat_halted(status)) { - u32 chanerr; - - chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - dev_err(to_dev(chan), "%s: Channel halted (%x)\n", - __func__, chanerr); - if (test_bit(IOAT_RUN, &chan->state)) - BUG_ON(is_ioat_bug(chanerr)); - else /* we never got off the ground */ - return; - } - - /* if we haven't made progress and we have already - * acknowledged a pending completion once, then be more - * forceful with a restart - */ - spin_lock_bh(&chan->cleanup_lock); - if (ioat_cleanup_preamble(chan, &phys_complete)) - __cleanup(ioat, phys_complete); - else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) { - spin_lock_bh(&ioat->prep_lock); - ioat2_restart_channel(ioat); - spin_unlock_bh(&ioat->prep_lock); - spin_unlock_bh(&chan->cleanup_lock); - return; - } else { - set_bit(IOAT_COMPLETION_ACK, &chan->state); - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - } - - - if (ioat2_ring_active(ioat)) - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - else { - spin_lock_bh(&ioat->prep_lock); - check_active(ioat); - spin_unlock_bh(&ioat->prep_lock); - } - spin_unlock_bh(&chan->cleanup_lock); -} - -static int ioat2_reset_hw(struct ioat_chan_common *chan) -{ - /* throw away whatever the channel was doing and get it initialized */ - u32 chanerr; - - ioat2_quiesce(chan, msecs_to_jiffies(100)); - - chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); - - return ioat2_reset_sync(chan, msecs_to_jiffies(200)); -} - -/** - * ioat2_enumerate_channels - find and initialize the device's channels - * @device: the device to be enumerated - */ -int ioat2_enumerate_channels(struct ioatdma_device *device) -{ - struct ioat2_dma_chan *ioat; - struct device *dev = &device->pdev->dev; - struct dma_device *dma = &device->common; - u8 xfercap_log; - int i; - - INIT_LIST_HEAD(&dma->channels); - dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); - dma->chancnt &= 0x1f; /* bits [4:0] valid */ - if (dma->chancnt > ARRAY_SIZE(device->idx)) { - dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n", - dma->chancnt, ARRAY_SIZE(device->idx)); - dma->chancnt = ARRAY_SIZE(device->idx); - } - xfercap_log = readb(device->reg_base + IOAT_XFERCAP_OFFSET); - xfercap_log &= 0x1f; /* bits [4:0] valid */ - if (xfercap_log == 0) - return 0; - dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log); - - /* FIXME which i/oat version is i7300? */ -#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL - if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) - dma->chancnt--; -#endif - for (i = 0; i < dma->chancnt; i++) { - ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL); - if (!ioat) - break; - - ioat_init_channel(device, &ioat->base, i); - ioat->xfercap_log = xfercap_log; - spin_lock_init(&ioat->prep_lock); - if (device->reset_hw(&ioat->base)) { - i = 0; - break; - } - } - dma->chancnt = i; - return i; -} - -static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx) -{ - struct dma_chan *c = tx->chan; - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - struct ioat_chan_common *chan = &ioat->base; - dma_cookie_t cookie; - - cookie = dma_cookie_assign(tx); - dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie); - - if (!test_and_set_bit(IOAT_CHAN_ACTIVE, &chan->state)) - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - - /* make descriptor updates visible before advancing ioat->head, - * this is purposefully not smp_wmb() since we are also - * publishing the descriptor updates to a dma device - */ - wmb(); - - ioat->head += ioat->produce; - - ioat2_update_pending(ioat); - spin_unlock_bh(&ioat->prep_lock); - - return cookie; -} - -static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan, gfp_t flags) -{ - struct ioat_dma_descriptor *hw; - struct ioat_ring_ent *desc; - struct ioatdma_device *dma; - dma_addr_t phys; - - dma = to_ioatdma_device(chan->device); - hw = pci_pool_alloc(dma->dma_pool, flags, &phys); - if (!hw) - return NULL; - memset(hw, 0, sizeof(*hw)); - - desc = kmem_cache_zalloc(ioat2_cache, flags); - if (!desc) { - pci_pool_free(dma->dma_pool, hw, phys); - return NULL; - } - - dma_async_tx_descriptor_init(&desc->txd, chan); - desc->txd.tx_submit = ioat2_tx_submit_unlock; - desc->hw = hw; - desc->txd.phys = phys; - return desc; -} - -static void ioat2_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan) -{ - struct ioatdma_device *dma; - - dma = to_ioatdma_device(chan->device); - pci_pool_free(dma->dma_pool, desc->hw, desc->txd.phys); - kmem_cache_free(ioat2_cache, desc); -} - -static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags) -{ - struct ioat_ring_ent **ring; - int descs = 1 << order; - int i; - - if (order > ioat_get_max_alloc_order()) - return NULL; - - /* allocate the array to hold the software ring */ - ring = kcalloc(descs, sizeof(*ring), flags); - if (!ring) - return NULL; - for (i = 0; i < descs; i++) { - ring[i] = ioat2_alloc_ring_ent(c, flags); - if (!ring[i]) { - while (i--) - ioat2_free_ring_ent(ring[i], c); - kfree(ring); - return NULL; - } - set_desc_id(ring[i], i); - } - - /* link descs */ - for (i = 0; i < descs-1; i++) { - struct ioat_ring_ent *next = ring[i+1]; - struct ioat_dma_descriptor *hw = ring[i]->hw; - - hw->next = next->txd.phys; - } - ring[i]->hw->next = ring[0]->txd.phys; - - return ring; -} - -void ioat2_free_chan_resources(struct dma_chan *c); - -/* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring - * @chan: channel to be initialized - */ -int ioat2_alloc_chan_resources(struct dma_chan *c) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - struct ioat_chan_common *chan = &ioat->base; - struct ioat_ring_ent **ring; - u64 status; - int order; - int i = 0; - - /* have we already been set up? */ - if (ioat->ring) - return 1 << ioat->alloc_order; - - /* Setup register to interrupt and write completion status on error */ - writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET); - - /* allocate a completion writeback area */ - /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ - chan->completion = pci_pool_alloc(chan->device->completion_pool, - GFP_KERNEL, &chan->completion_dma); - if (!chan->completion) - return -ENOMEM; - - memset(chan->completion, 0, sizeof(*chan->completion)); - writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF, - chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); - writel(((u64) chan->completion_dma) >> 32, - chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); - - order = ioat_get_alloc_order(); - ring = ioat2_alloc_ring(c, order, GFP_KERNEL); - if (!ring) - return -ENOMEM; - - spin_lock_bh(&chan->cleanup_lock); - spin_lock_bh(&ioat->prep_lock); - ioat->ring = ring; - ioat->head = 0; - ioat->issued = 0; - ioat->tail = 0; - ioat->alloc_order = order; - set_bit(IOAT_RUN, &chan->state); - spin_unlock_bh(&ioat->prep_lock); - spin_unlock_bh(&chan->cleanup_lock); - - ioat2_start_null_desc(ioat); - - /* check that we got off the ground */ - do { - udelay(1); - status = ioat_chansts(chan); - } while (i++ < 20 && !is_ioat_active(status) && !is_ioat_idle(status)); - - if (is_ioat_active(status) || is_ioat_idle(status)) { - return 1 << ioat->alloc_order; - } else { - u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - - dev_WARN(to_dev(chan), - "failed to start channel chanerr: %#x\n", chanerr); - ioat2_free_chan_resources(c); - return -EFAULT; - } -} - -bool reshape_ring(struct ioat2_dma_chan *ioat, int order) -{ - /* reshape differs from normal ring allocation in that we want - * to allocate a new software ring while only - * extending/truncating the hardware ring - */ - struct ioat_chan_common *chan = &ioat->base; - struct dma_chan *c = &chan->common; - const u32 curr_size = ioat2_ring_size(ioat); - const u16 active = ioat2_ring_active(ioat); - const u32 new_size = 1 << order; - struct ioat_ring_ent **ring; - u16 i; - - if (order > ioat_get_max_alloc_order()) - return false; - - /* double check that we have at least 1 free descriptor */ - if (active == curr_size) - return false; - - /* when shrinking, verify that we can hold the current active - * set in the new ring - */ - if (active >= new_size) - return false; - - /* allocate the array to hold the software ring */ - ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT); - if (!ring) - return false; - - /* allocate/trim descriptors as needed */ - if (new_size > curr_size) { - /* copy current descriptors to the new ring */ - for (i = 0; i < curr_size; i++) { - u16 curr_idx = (ioat->tail+i) & (curr_size-1); - u16 new_idx = (ioat->tail+i) & (new_size-1); - - ring[new_idx] = ioat->ring[curr_idx]; - set_desc_id(ring[new_idx], new_idx); - } - - /* add new descriptors to the ring */ - for (i = curr_size; i < new_size; i++) { - u16 new_idx = (ioat->tail+i) & (new_size-1); - - ring[new_idx] = ioat2_alloc_ring_ent(c, GFP_NOWAIT); - if (!ring[new_idx]) { - while (i--) { - u16 new_idx = (ioat->tail+i) & (new_size-1); - - ioat2_free_ring_ent(ring[new_idx], c); - } - kfree(ring); - return false; - } - set_desc_id(ring[new_idx], new_idx); - } - - /* hw link new descriptors */ - for (i = curr_size-1; i < new_size; i++) { - u16 new_idx = (ioat->tail+i) & (new_size-1); - struct ioat_ring_ent *next = ring[(new_idx+1) & (new_size-1)]; - struct ioat_dma_descriptor *hw = ring[new_idx]->hw; - - hw->next = next->txd.phys; - } - } else { - struct ioat_dma_descriptor *hw; - struct ioat_ring_ent *next; - - /* copy current descriptors to the new ring, dropping the - * removed descriptors - */ - for (i = 0; i < new_size; i++) { - u16 curr_idx = (ioat->tail+i) & (curr_size-1); - u16 new_idx = (ioat->tail+i) & (new_size-1); - - ring[new_idx] = ioat->ring[curr_idx]; - set_desc_id(ring[new_idx], new_idx); - } - - /* free deleted descriptors */ - for (i = new_size; i < curr_size; i++) { - struct ioat_ring_ent *ent; - - ent = ioat2_get_ring_ent(ioat, ioat->tail+i); - ioat2_free_ring_ent(ent, c); - } - - /* fix up hardware ring */ - hw = ring[(ioat->tail+new_size-1) & (new_size-1)]->hw; - next = ring[(ioat->tail+new_size) & (new_size-1)]; - hw->next = next->txd.phys; - } - - dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n", - __func__, new_size); - - kfree(ioat->ring); - ioat->ring = ring; - ioat->alloc_order = order; - - return true; -} - -/** - * ioat2_check_space_lock - verify space and grab ring producer lock - * @ioat: ioat2,3 channel (ring) to operate on - * @num_descs: allocation length - */ -int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs) -{ - struct ioat_chan_common *chan = &ioat->base; - bool retry; - - retry: - spin_lock_bh(&ioat->prep_lock); - /* never allow the last descriptor to be consumed, we need at - * least one free at all times to allow for on-the-fly ring - * resizing. - */ - if (likely(ioat2_ring_space(ioat) > num_descs)) { - dev_dbg(to_dev(chan), "%s: num_descs: %d (%x:%x:%x)\n", - __func__, num_descs, ioat->head, ioat->tail, ioat->issued); - ioat->produce = num_descs; - return 0; /* with ioat->prep_lock held */ - } - retry = test_and_set_bit(IOAT_RESHAPE_PENDING, &chan->state); - spin_unlock_bh(&ioat->prep_lock); - - /* is another cpu already trying to expand the ring? */ - if (retry) - goto retry; - - spin_lock_bh(&chan->cleanup_lock); - spin_lock_bh(&ioat->prep_lock); - retry = reshape_ring(ioat, ioat->alloc_order + 1); - clear_bit(IOAT_RESHAPE_PENDING, &chan->state); - spin_unlock_bh(&ioat->prep_lock); - spin_unlock_bh(&chan->cleanup_lock); - - /* if we were able to expand the ring retry the allocation */ - if (retry) - goto retry; - - if (printk_ratelimit()) - dev_dbg(to_dev(chan), "%s: ring full! num_descs: %d (%x:%x:%x)\n", - __func__, num_descs, ioat->head, ioat->tail, ioat->issued); - - /* progress reclaim in the allocation failure case we may be - * called under bh_disabled so we need to trigger the timer - * event directly - */ - if (time_is_before_jiffies(chan->timer.expires) - && timer_pending(&chan->timer)) { - struct ioatdma_device *device = chan->device; - - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - device->timer_fn((unsigned long) &chan->common); - } - - return -ENOMEM; -} - -struct dma_async_tx_descriptor * -ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, - dma_addr_t dma_src, size_t len, unsigned long flags) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - struct ioat_dma_descriptor *hw; - struct ioat_ring_ent *desc; - dma_addr_t dst = dma_dest; - dma_addr_t src = dma_src; - size_t total_len = len; - int num_descs, idx, i; - - num_descs = ioat2_xferlen_to_descs(ioat, len); - if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs) == 0) - idx = ioat->head; - else - return NULL; - i = 0; - do { - size_t copy = min_t(size_t, len, 1 << ioat->xfercap_log); - - desc = ioat2_get_ring_ent(ioat, idx + i); - hw = desc->hw; - - hw->size = copy; - hw->ctl = 0; - hw->src_addr = src; - hw->dst_addr = dst; - - len -= copy; - dst += copy; - src += copy; - dump_desc_dbg(ioat, desc); - } while (++i < num_descs); - - desc->txd.flags = flags; - desc->len = total_len; - hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); - hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); - hw->ctl_f.compl_write = 1; - dump_desc_dbg(ioat, desc); - /* we leave the channel locked to ensure in order submission */ - - return &desc->txd; -} - -/** - * ioat2_free_chan_resources - release all the descriptors - * @chan: the channel to be cleaned - */ -void ioat2_free_chan_resources(struct dma_chan *c) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - struct ioat_chan_common *chan = &ioat->base; - struct ioatdma_device *device = chan->device; - struct ioat_ring_ent *desc; - const u16 total_descs = 1 << ioat->alloc_order; - int descs; - int i; - - /* Before freeing channel resources first check - * if they have been previously allocated for this channel. - */ - if (!ioat->ring) - return; - - ioat_stop(chan); - device->reset_hw(chan); - - spin_lock_bh(&chan->cleanup_lock); - spin_lock_bh(&ioat->prep_lock); - descs = ioat2_ring_space(ioat); - dev_dbg(to_dev(chan), "freeing %d idle descriptors\n", descs); - for (i = 0; i < descs; i++) { - desc = ioat2_get_ring_ent(ioat, ioat->head + i); - ioat2_free_ring_ent(desc, c); - } - - if (descs < total_descs) - dev_err(to_dev(chan), "Freeing %d in use descriptors!\n", - total_descs - descs); - - for (i = 0; i < total_descs - descs; i++) { - desc = ioat2_get_ring_ent(ioat, ioat->tail + i); - dump_desc_dbg(ioat, desc); - ioat2_free_ring_ent(desc, c); - } - - kfree(ioat->ring); - ioat->ring = NULL; - ioat->alloc_order = 0; - pci_pool_free(device->completion_pool, chan->completion, - chan->completion_dma); - spin_unlock_bh(&ioat->prep_lock); - spin_unlock_bh(&chan->cleanup_lock); - - chan->last_completion = 0; - chan->completion_dma = 0; - ioat->dmacount = 0; -} - -static ssize_t ring_size_show(struct dma_chan *c, char *page) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - - return sprintf(page, "%d\n", (1 << ioat->alloc_order) & ~1); -} -static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size); - -static ssize_t ring_active_show(struct dma_chan *c, char *page) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - - /* ...taken outside the lock, no need to be precise */ - return sprintf(page, "%d\n", ioat2_ring_active(ioat)); -} -static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active); - -static struct attribute *ioat2_attrs[] = { - &ring_size_attr.attr, - &ring_active_attr.attr, - &ioat_cap_attr.attr, - &ioat_version_attr.attr, - NULL, -}; - -struct kobj_type ioat2_ktype = { - .sysfs_ops = &ioat_sysfs_ops, - .default_attrs = ioat2_attrs, -}; - -int ioat2_dma_probe(struct ioatdma_device *device, int dca) -{ - struct pci_dev *pdev = device->pdev; - struct dma_device *dma; - struct dma_chan *c; - struct ioat_chan_common *chan; - int err; - - device->enumerate_channels = ioat2_enumerate_channels; - device->reset_hw = ioat2_reset_hw; - device->cleanup_fn = ioat2_cleanup_event; - device->timer_fn = ioat2_timer_event; - device->self_test = ioat_dma_self_test; - dma = &device->common; - dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; - dma->device_issue_pending = ioat2_issue_pending; - dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; - dma->device_free_chan_resources = ioat2_free_chan_resources; - dma->device_tx_status = ioat_dma_tx_status; - - err = ioat_probe(device); - if (err) - return err; - - list_for_each_entry(c, &dma->channels, device_node) { - chan = to_chan_common(c); - writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU, - chan->reg_base + IOAT_DCACTRL_OFFSET); - } - - err = ioat_register(device); - if (err) - return err; - - ioat_kobject_add(device, &ioat2_ktype); - - if (dca) - device->dca = ioat2_dca_init(pdev, device->reg_base); - - return err; -} diff --git a/kernel/drivers/dma/ioat/dma_v2.h b/kernel/drivers/dma/ioat/dma_v2.h deleted file mode 100644 index bf24ebe87..000000000 --- a/kernel/drivers/dma/ioat/dma_v2.h +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in the - * file called COPYING. - */ -#ifndef IOATDMA_V2_H -#define IOATDMA_V2_H - -#include <linux/dmaengine.h> -#include <linux/circ_buf.h> -#include "dma.h" -#include "hw.h" - - -extern int ioat_pending_level; -extern int ioat_ring_alloc_order; - -/* - * workaround for IOAT ver.3.0 null descriptor issue - * (channel returns error when size is 0) - */ -#define NULL_DESC_BUFFER_SIZE 1 - -#define IOAT_MAX_ORDER 16 -#define ioat_get_alloc_order() \ - (min(ioat_ring_alloc_order, IOAT_MAX_ORDER)) -#define ioat_get_max_alloc_order() \ - (min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER)) - -/* struct ioat2_dma_chan - ioat v2 / v3 channel attributes - * @base: common ioat channel parameters - * @xfercap_log; log2 of channel max transfer length (for fast division) - * @head: allocated index - * @issued: hardware notification point - * @tail: cleanup index - * @dmacount: identical to 'head' except for occasionally resetting to zero - * @alloc_order: log2 of the number of allocated descriptors - * @produce: number of descriptors to produce at submit time - * @ring: software ring buffer implementation of hardware ring - * @prep_lock: serializes descriptor preparation (producers) - */ -struct ioat2_dma_chan { - struct ioat_chan_common base; - size_t xfercap_log; - u16 head; - u16 issued; - u16 tail; - u16 dmacount; - u16 alloc_order; - u16 produce; - struct ioat_ring_ent **ring; - spinlock_t prep_lock; -}; - -static inline struct ioat2_dma_chan *to_ioat2_chan(struct dma_chan *c) -{ - struct ioat_chan_common *chan = to_chan_common(c); - - return container_of(chan, struct ioat2_dma_chan, base); -} - -static inline u32 ioat2_ring_size(struct ioat2_dma_chan *ioat) -{ - return 1 << ioat->alloc_order; -} - -/* count of descriptors in flight with the engine */ -static inline u16 ioat2_ring_active(struct ioat2_dma_chan *ioat) -{ - return CIRC_CNT(ioat->head, ioat->tail, ioat2_ring_size(ioat)); -} - -/* count of descriptors pending submission to hardware */ -static inline u16 ioat2_ring_pending(struct ioat2_dma_chan *ioat) -{ - return CIRC_CNT(ioat->head, ioat->issued, ioat2_ring_size(ioat)); -} - -static inline u32 ioat2_ring_space(struct ioat2_dma_chan *ioat) -{ - return ioat2_ring_size(ioat) - ioat2_ring_active(ioat); -} - -static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len) -{ - u16 num_descs = len >> ioat->xfercap_log; - - num_descs += !!(len & ((1 << ioat->xfercap_log) - 1)); - return num_descs; -} - -/** - * struct ioat_ring_ent - wrapper around hardware descriptor - * @hw: hardware DMA descriptor (for memcpy) - * @fill: hardware fill descriptor - * @xor: hardware xor descriptor - * @xor_ex: hardware xor extension descriptor - * @pq: hardware pq descriptor - * @pq_ex: hardware pq extension descriptor - * @pqu: hardware pq update descriptor - * @raw: hardware raw (un-typed) descriptor - * @txd: the generic software descriptor for all engines - * @len: total transaction length for unmap - * @result: asynchronous result of validate operations - * @id: identifier for debug - */ - -struct ioat_ring_ent { - union { - struct ioat_dma_descriptor *hw; - struct ioat_xor_descriptor *xor; - struct ioat_xor_ext_descriptor *xor_ex; - struct ioat_pq_descriptor *pq; - struct ioat_pq_ext_descriptor *pq_ex; - struct ioat_pq_update_descriptor *pqu; - struct ioat_raw_descriptor *raw; - }; - size_t len; - struct dma_async_tx_descriptor txd; - enum sum_check_flags *result; - #ifdef DEBUG - int id; - #endif - struct ioat_sed_ent *sed; -}; - -static inline struct ioat_ring_ent * -ioat2_get_ring_ent(struct ioat2_dma_chan *ioat, u16 idx) -{ - return ioat->ring[idx & (ioat2_ring_size(ioat) - 1)]; -} - -static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr) -{ - struct ioat_chan_common *chan = &ioat->base; - - writel(addr & 0x00000000FFFFFFFF, - chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); - writel(addr >> 32, - chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); -} - -int ioat2_dma_probe(struct ioatdma_device *dev, int dca); -int ioat3_dma_probe(struct ioatdma_device *dev, int dca); -struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); -struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); -int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs); -int ioat2_enumerate_channels(struct ioatdma_device *device); -struct dma_async_tx_descriptor * -ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, - dma_addr_t dma_src, size_t len, unsigned long flags); -void ioat2_issue_pending(struct dma_chan *chan); -int ioat2_alloc_chan_resources(struct dma_chan *c); -void ioat2_free_chan_resources(struct dma_chan *c); -void __ioat2_restart_chan(struct ioat2_dma_chan *ioat); -bool reshape_ring(struct ioat2_dma_chan *ioat, int order); -void __ioat2_issue_pending(struct ioat2_dma_chan *ioat); -void ioat2_cleanup_event(unsigned long data); -void ioat2_timer_event(unsigned long data); -int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo); -int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo); -extern struct kobj_type ioat2_ktype; -extern struct kmem_cache *ioat2_cache; -#endif /* IOATDMA_V2_H */ diff --git a/kernel/drivers/dma/ioat/dma_v3.c b/kernel/drivers/dma/ioat/dma_v3.c deleted file mode 100644 index 64790a45e..000000000 --- a/kernel/drivers/dma/ioat/dma_v3.c +++ /dev/null @@ -1,1717 +0,0 @@ -/* - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * BSD LICENSE - * - * Copyright(c) 2004-2009 Intel Corporation. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * Support routines for v3+ hardware - */ -#include <linux/module.h> -#include <linux/pci.h> -#include <linux/gfp.h> -#include <linux/dmaengine.h> -#include <linux/dma-mapping.h> -#include <linux/prefetch.h> -#include "../dmaengine.h" -#include "registers.h" -#include "hw.h" -#include "dma.h" -#include "dma_v2.h" - -extern struct kmem_cache *ioat3_sed_cache; - -/* ioat hardware assumes at least two sources for raid operations */ -#define src_cnt_to_sw(x) ((x) + 2) -#define src_cnt_to_hw(x) ((x) - 2) -#define ndest_to_sw(x) ((x) + 1) -#define ndest_to_hw(x) ((x) - 1) -#define src16_cnt_to_sw(x) ((x) + 9) -#define src16_cnt_to_hw(x) ((x) - 9) - -/* provide a lookup table for setting the source address in the base or - * extended descriptor of an xor or pq descriptor - */ -static const u8 xor_idx_to_desc = 0xe0; -static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 }; -static const u8 pq_idx_to_desc = 0xf8; -static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2 }; -static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 }; -static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7, - 0, 1, 2, 3, 4, 5, 6 }; - -static void ioat3_eh(struct ioat2_dma_chan *ioat); - -static void xor_set_src(struct ioat_raw_descriptor *descs[2], - dma_addr_t addr, u32 offset, int idx) -{ - struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; - - raw->field[xor_idx_to_field[idx]] = addr + offset; -} - -static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx) -{ - struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; - - return raw->field[pq_idx_to_field[idx]]; -} - -static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx) -{ - struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; - - return raw->field[pq16_idx_to_field[idx]]; -} - -static void pq_set_src(struct ioat_raw_descriptor *descs[2], - dma_addr_t addr, u32 offset, u8 coef, int idx) -{ - struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0]; - struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; - - raw->field[pq_idx_to_field[idx]] = addr + offset; - pq->coef[idx] = coef; -} - -static bool is_jf_ioat(struct pci_dev *pdev) -{ - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IOAT_JSF0: - case PCI_DEVICE_ID_INTEL_IOAT_JSF1: - case PCI_DEVICE_ID_INTEL_IOAT_JSF2: - case PCI_DEVICE_ID_INTEL_IOAT_JSF3: - case PCI_DEVICE_ID_INTEL_IOAT_JSF4: - case PCI_DEVICE_ID_INTEL_IOAT_JSF5: - case PCI_DEVICE_ID_INTEL_IOAT_JSF6: - case PCI_DEVICE_ID_INTEL_IOAT_JSF7: - case PCI_DEVICE_ID_INTEL_IOAT_JSF8: - case PCI_DEVICE_ID_INTEL_IOAT_JSF9: - return true; - default: - return false; - } -} - -static bool is_snb_ioat(struct pci_dev *pdev) -{ - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IOAT_SNB0: - case PCI_DEVICE_ID_INTEL_IOAT_SNB1: - case PCI_DEVICE_ID_INTEL_IOAT_SNB2: - case PCI_DEVICE_ID_INTEL_IOAT_SNB3: - case PCI_DEVICE_ID_INTEL_IOAT_SNB4: - case PCI_DEVICE_ID_INTEL_IOAT_SNB5: - case PCI_DEVICE_ID_INTEL_IOAT_SNB6: - case PCI_DEVICE_ID_INTEL_IOAT_SNB7: - case PCI_DEVICE_ID_INTEL_IOAT_SNB8: - case PCI_DEVICE_ID_INTEL_IOAT_SNB9: - return true; - default: - return false; - } -} - -static bool is_ivb_ioat(struct pci_dev *pdev) -{ - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IOAT_IVB0: - case PCI_DEVICE_ID_INTEL_IOAT_IVB1: - case PCI_DEVICE_ID_INTEL_IOAT_IVB2: - case PCI_DEVICE_ID_INTEL_IOAT_IVB3: - case PCI_DEVICE_ID_INTEL_IOAT_IVB4: - case PCI_DEVICE_ID_INTEL_IOAT_IVB5: - case PCI_DEVICE_ID_INTEL_IOAT_IVB6: - case PCI_DEVICE_ID_INTEL_IOAT_IVB7: - case PCI_DEVICE_ID_INTEL_IOAT_IVB8: - case PCI_DEVICE_ID_INTEL_IOAT_IVB9: - return true; - default: - return false; - } - -} - -static bool is_hsw_ioat(struct pci_dev *pdev) -{ - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IOAT_HSW0: - case PCI_DEVICE_ID_INTEL_IOAT_HSW1: - case PCI_DEVICE_ID_INTEL_IOAT_HSW2: - case PCI_DEVICE_ID_INTEL_IOAT_HSW3: - case PCI_DEVICE_ID_INTEL_IOAT_HSW4: - case PCI_DEVICE_ID_INTEL_IOAT_HSW5: - case PCI_DEVICE_ID_INTEL_IOAT_HSW6: - case PCI_DEVICE_ID_INTEL_IOAT_HSW7: - case PCI_DEVICE_ID_INTEL_IOAT_HSW8: - case PCI_DEVICE_ID_INTEL_IOAT_HSW9: - return true; - default: - return false; - } - -} - -static bool is_xeon_cb32(struct pci_dev *pdev) -{ - return is_jf_ioat(pdev) || is_snb_ioat(pdev) || is_ivb_ioat(pdev) || - is_hsw_ioat(pdev); -} - -static bool is_bwd_ioat(struct pci_dev *pdev) -{ - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IOAT_BWD0: - case PCI_DEVICE_ID_INTEL_IOAT_BWD1: - case PCI_DEVICE_ID_INTEL_IOAT_BWD2: - case PCI_DEVICE_ID_INTEL_IOAT_BWD3: - /* even though not Atom, BDX-DE has same DMA silicon */ - case PCI_DEVICE_ID_INTEL_IOAT_BDXDE0: - case PCI_DEVICE_ID_INTEL_IOAT_BDXDE1: - case PCI_DEVICE_ID_INTEL_IOAT_BDXDE2: - case PCI_DEVICE_ID_INTEL_IOAT_BDXDE3: - return true; - default: - return false; - } -} - -static bool is_bwd_noraid(struct pci_dev *pdev) -{ - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IOAT_BWD2: - case PCI_DEVICE_ID_INTEL_IOAT_BWD3: - case PCI_DEVICE_ID_INTEL_IOAT_BDXDE0: - case PCI_DEVICE_ID_INTEL_IOAT_BDXDE1: - case PCI_DEVICE_ID_INTEL_IOAT_BDXDE2: - case PCI_DEVICE_ID_INTEL_IOAT_BDXDE3: - return true; - default: - return false; - } - -} - -static void pq16_set_src(struct ioat_raw_descriptor *desc[3], - dma_addr_t addr, u32 offset, u8 coef, unsigned idx) -{ - struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0]; - struct ioat_pq16a_descriptor *pq16 = - (struct ioat_pq16a_descriptor *)desc[1]; - struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; - - raw->field[pq16_idx_to_field[idx]] = addr + offset; - - if (idx < 8) - pq->coef[idx] = coef; - else - pq16->coef[idx - 8] = coef; -} - -static struct ioat_sed_ent * -ioat3_alloc_sed(struct ioatdma_device *device, unsigned int hw_pool) -{ - struct ioat_sed_ent *sed; - gfp_t flags = __GFP_ZERO | GFP_ATOMIC; - - sed = kmem_cache_alloc(ioat3_sed_cache, flags); - if (!sed) - return NULL; - - sed->hw_pool = hw_pool; - sed->hw = dma_pool_alloc(device->sed_hw_pool[hw_pool], - flags, &sed->dma); - if (!sed->hw) { - kmem_cache_free(ioat3_sed_cache, sed); - return NULL; - } - - return sed; -} - -static void ioat3_free_sed(struct ioatdma_device *device, struct ioat_sed_ent *sed) -{ - if (!sed) - return; - - dma_pool_free(device->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma); - kmem_cache_free(ioat3_sed_cache, sed); -} - -static bool desc_has_ext(struct ioat_ring_ent *desc) -{ - struct ioat_dma_descriptor *hw = desc->hw; - - if (hw->ctl_f.op == IOAT_OP_XOR || - hw->ctl_f.op == IOAT_OP_XOR_VAL) { - struct ioat_xor_descriptor *xor = desc->xor; - - if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5) - return true; - } else if (hw->ctl_f.op == IOAT_OP_PQ || - hw->ctl_f.op == IOAT_OP_PQ_VAL) { - struct ioat_pq_descriptor *pq = desc->pq; - - if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3) - return true; - } - - return false; -} - -static u64 ioat3_get_current_completion(struct ioat_chan_common *chan) -{ - u64 phys_complete; - u64 completion; - - completion = *chan->completion; - phys_complete = ioat_chansts_to_addr(completion); - - dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__, - (unsigned long long) phys_complete); - - return phys_complete; -} - -static bool ioat3_cleanup_preamble(struct ioat_chan_common *chan, - u64 *phys_complete) -{ - *phys_complete = ioat3_get_current_completion(chan); - if (*phys_complete == chan->last_completion) - return false; - - clear_bit(IOAT_COMPLETION_ACK, &chan->state); - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - - return true; -} - -static void -desc_get_errstat(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc) -{ - struct ioat_dma_descriptor *hw = desc->hw; - - switch (hw->ctl_f.op) { - case IOAT_OP_PQ_VAL: - case IOAT_OP_PQ_VAL_16S: - { - struct ioat_pq_descriptor *pq = desc->pq; - - /* check if there's error written */ - if (!pq->dwbes_f.wbes) - return; - - /* need to set a chanerr var for checking to clear later */ - - if (pq->dwbes_f.p_val_err) - *desc->result |= SUM_CHECK_P_RESULT; - - if (pq->dwbes_f.q_val_err) - *desc->result |= SUM_CHECK_Q_RESULT; - - return; - } - default: - return; - } -} - -/** - * __cleanup - reclaim used descriptors - * @ioat: channel (ring) to clean - * - * The difference from the dma_v2.c __cleanup() is that this routine - * handles extended descriptors and dma-unmapping raid operations. - */ -static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete) -{ - struct ioat_chan_common *chan = &ioat->base; - struct ioatdma_device *device = chan->device; - struct ioat_ring_ent *desc; - bool seen_current = false; - int idx = ioat->tail, i; - u16 active; - - dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", - __func__, ioat->head, ioat->tail, ioat->issued); - - /* - * At restart of the channel, the completion address and the - * channel status will be 0 due to starting a new chain. Since - * it's new chain and the first descriptor "fails", there is - * nothing to clean up. We do not want to reap the entire submitted - * chain due to this 0 address value and then BUG. - */ - if (!phys_complete) - return; - - active = ioat2_ring_active(ioat); - for (i = 0; i < active && !seen_current; i++) { - struct dma_async_tx_descriptor *tx; - - smp_read_barrier_depends(); - prefetch(ioat2_get_ring_ent(ioat, idx + i + 1)); - desc = ioat2_get_ring_ent(ioat, idx + i); - dump_desc_dbg(ioat, desc); - - /* set err stat if we are using dwbes */ - if (device->cap & IOAT_CAP_DWBES) - desc_get_errstat(ioat, desc); - - tx = &desc->txd; - if (tx->cookie) { - dma_cookie_complete(tx); - dma_descriptor_unmap(tx); - if (tx->callback) { - tx->callback(tx->callback_param); - tx->callback = NULL; - } - } - - if (tx->phys == phys_complete) - seen_current = true; - - /* skip extended descriptors */ - if (desc_has_ext(desc)) { - BUG_ON(i + 1 >= active); - i++; - } - - /* cleanup super extended descriptors */ - if (desc->sed) { - ioat3_free_sed(device, desc->sed); - desc->sed = NULL; - } - } - smp_mb(); /* finish all descriptor reads before incrementing tail */ - ioat->tail = idx + i; - BUG_ON(active && !seen_current); /* no active descs have written a completion? */ - chan->last_completion = phys_complete; - - if (active - i == 0) { - dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", - __func__); - clear_bit(IOAT_COMPLETION_PENDING, &chan->state); - mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); - } - /* 5 microsecond delay per pending descriptor */ - writew(min((5 * (active - i)), IOAT_INTRDELAY_MASK), - chan->device->reg_base + IOAT_INTRDELAY_OFFSET); -} - -static void ioat3_cleanup(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - u64 phys_complete; - - spin_lock_bh(&chan->cleanup_lock); - - if (ioat3_cleanup_preamble(chan, &phys_complete)) - __cleanup(ioat, phys_complete); - - if (is_ioat_halted(*chan->completion)) { - u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - - if (chanerr & IOAT_CHANERR_HANDLE_MASK) { - mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); - ioat3_eh(ioat); - } - } - - spin_unlock_bh(&chan->cleanup_lock); -} - -static void ioat3_cleanup_event(unsigned long data) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); - struct ioat_chan_common *chan = &ioat->base; - - ioat3_cleanup(ioat); - if (!test_bit(IOAT_RUN, &chan->state)) - return; - writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); -} - -static void ioat3_restart_channel(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - u64 phys_complete; - - ioat2_quiesce(chan, 0); - if (ioat3_cleanup_preamble(chan, &phys_complete)) - __cleanup(ioat, phys_complete); - - __ioat2_restart_chan(ioat); -} - -static void ioat3_eh(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - struct pci_dev *pdev = to_pdev(chan); - struct ioat_dma_descriptor *hw; - struct dma_async_tx_descriptor *tx; - u64 phys_complete; - struct ioat_ring_ent *desc; - u32 err_handled = 0; - u32 chanerr_int; - u32 chanerr; - - /* cleanup so tail points to descriptor that caused the error */ - if (ioat3_cleanup_preamble(chan, &phys_complete)) - __cleanup(ioat, phys_complete); - - chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr_int); - - dev_dbg(to_dev(chan), "%s: error = %x:%x\n", - __func__, chanerr, chanerr_int); - - desc = ioat2_get_ring_ent(ioat, ioat->tail); - hw = desc->hw; - dump_desc_dbg(ioat, desc); - - switch (hw->ctl_f.op) { - case IOAT_OP_XOR_VAL: - if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) { - *desc->result |= SUM_CHECK_P_RESULT; - err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR; - } - break; - case IOAT_OP_PQ_VAL: - case IOAT_OP_PQ_VAL_16S: - if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) { - *desc->result |= SUM_CHECK_P_RESULT; - err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR; - } - if (chanerr & IOAT_CHANERR_XOR_Q_ERR) { - *desc->result |= SUM_CHECK_Q_RESULT; - err_handled |= IOAT_CHANERR_XOR_Q_ERR; - } - break; - } - - /* fault on unhandled error or spurious halt */ - if (chanerr ^ err_handled || chanerr == 0) { - dev_err(to_dev(chan), "%s: fatal error (%x:%x)\n", - __func__, chanerr, err_handled); - BUG(); - } else { /* cleanup the faulty descriptor */ - tx = &desc->txd; - if (tx->cookie) { - dma_cookie_complete(tx); - dma_descriptor_unmap(tx); - if (tx->callback) { - tx->callback(tx->callback_param); - tx->callback = NULL; - } - } - } - - writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); - pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr_int); - - /* mark faulting descriptor as complete */ - *chan->completion = desc->txd.phys; - - spin_lock_bh(&ioat->prep_lock); - ioat3_restart_channel(ioat); - spin_unlock_bh(&ioat->prep_lock); -} - -static void check_active(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - - if (ioat2_ring_active(ioat)) { - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - return; - } - - if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &chan->state)) - mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); - else if (ioat->alloc_order > ioat_get_alloc_order()) { - /* if the ring is idle, empty, and oversized try to step - * down the size - */ - reshape_ring(ioat, ioat->alloc_order - 1); - - /* keep shrinking until we get back to our minimum - * default size - */ - if (ioat->alloc_order > ioat_get_alloc_order()) - mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); - } - -} - -static void ioat3_timer_event(unsigned long data) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); - struct ioat_chan_common *chan = &ioat->base; - dma_addr_t phys_complete; - u64 status; - - status = ioat_chansts(chan); - - /* when halted due to errors check for channel - * programming errors before advancing the completion state - */ - if (is_ioat_halted(status)) { - u32 chanerr; - - chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - dev_err(to_dev(chan), "%s: Channel halted (%x)\n", - __func__, chanerr); - if (test_bit(IOAT_RUN, &chan->state)) - BUG_ON(is_ioat_bug(chanerr)); - else /* we never got off the ground */ - return; - } - - /* if we haven't made progress and we have already - * acknowledged a pending completion once, then be more - * forceful with a restart - */ - spin_lock_bh(&chan->cleanup_lock); - if (ioat_cleanup_preamble(chan, &phys_complete)) - __cleanup(ioat, phys_complete); - else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) { - spin_lock_bh(&ioat->prep_lock); - ioat3_restart_channel(ioat); - spin_unlock_bh(&ioat->prep_lock); - spin_unlock_bh(&chan->cleanup_lock); - return; - } else { - set_bit(IOAT_COMPLETION_ACK, &chan->state); - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - } - - - if (ioat2_ring_active(ioat)) - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - else { - spin_lock_bh(&ioat->prep_lock); - check_active(ioat); - spin_unlock_bh(&ioat->prep_lock); - } - spin_unlock_bh(&chan->cleanup_lock); -} - -static enum dma_status -ioat3_tx_status(struct dma_chan *c, dma_cookie_t cookie, - struct dma_tx_state *txstate) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - enum dma_status ret; - - ret = dma_cookie_status(c, cookie, txstate); - if (ret == DMA_COMPLETE) - return ret; - - ioat3_cleanup(ioat); - - return dma_cookie_status(c, cookie, txstate); -} - -static struct dma_async_tx_descriptor * -__ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, - dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, - size_t len, unsigned long flags) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - struct ioat_ring_ent *compl_desc; - struct ioat_ring_ent *desc; - struct ioat_ring_ent *ext; - size_t total_len = len; - struct ioat_xor_descriptor *xor; - struct ioat_xor_ext_descriptor *xor_ex = NULL; - struct ioat_dma_descriptor *hw; - int num_descs, with_ext, idx, i; - u32 offset = 0; - u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR; - - BUG_ON(src_cnt < 2); - - num_descs = ioat2_xferlen_to_descs(ioat, len); - /* we need 2x the number of descriptors to cover greater than 5 - * sources - */ - if (src_cnt > 5) { - with_ext = 1; - num_descs *= 2; - } else - with_ext = 0; - - /* completion writes from the raid engine may pass completion - * writes from the legacy engine, so we need one extra null - * (legacy) descriptor to ensure all completion writes arrive in - * order. - */ - if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs+1) == 0) - idx = ioat->head; - else - return NULL; - i = 0; - do { - struct ioat_raw_descriptor *descs[2]; - size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); - int s; - - desc = ioat2_get_ring_ent(ioat, idx + i); - xor = desc->xor; - - /* save a branch by unconditionally retrieving the - * extended descriptor xor_set_src() knows to not write - * to it in the single descriptor case - */ - ext = ioat2_get_ring_ent(ioat, idx + i + 1); - xor_ex = ext->xor_ex; - - descs[0] = (struct ioat_raw_descriptor *) xor; - descs[1] = (struct ioat_raw_descriptor *) xor_ex; - for (s = 0; s < src_cnt; s++) - xor_set_src(descs, src[s], offset, s); - xor->size = xfer_size; - xor->dst_addr = dest + offset; - xor->ctl = 0; - xor->ctl_f.op = op; - xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt); - - len -= xfer_size; - offset += xfer_size; - dump_desc_dbg(ioat, desc); - } while ((i += 1 + with_ext) < num_descs); - - /* last xor descriptor carries the unmap parameters and fence bit */ - desc->txd.flags = flags; - desc->len = total_len; - if (result) - desc->result = result; - xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE); - - /* completion descriptor carries interrupt bit */ - compl_desc = ioat2_get_ring_ent(ioat, idx + i); - compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; - hw = compl_desc->hw; - hw->ctl = 0; - hw->ctl_f.null = 1; - hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); - hw->ctl_f.compl_write = 1; - hw->size = NULL_DESC_BUFFER_SIZE; - dump_desc_dbg(ioat, compl_desc); - - /* we leave the channel locked to ensure in order submission */ - return &compl_desc->txd; -} - -static struct dma_async_tx_descriptor * -ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, - unsigned int src_cnt, size_t len, unsigned long flags) -{ - return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags); -} - -static struct dma_async_tx_descriptor * -ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, - unsigned int src_cnt, size_t len, - enum sum_check_flags *result, unsigned long flags) -{ - /* the cleanup routine only sets bits on validate failure, it - * does not clear bits on validate success... so clear it here - */ - *result = 0; - - return __ioat3_prep_xor_lock(chan, result, src[0], &src[1], - src_cnt - 1, len, flags); -} - -static void -dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext) -{ - struct device *dev = to_dev(&ioat->base); - struct ioat_pq_descriptor *pq = desc->pq; - struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL; - struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex }; - int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); - int i; - - dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" - " sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" - " src_cnt: %d)\n", - desc_id(desc), (unsigned long long) desc->txd.phys, - (unsigned long long) (pq_ex ? pq_ex->next : pq->next), - desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en, - pq->ctl_f.compl_write, - pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", - pq->ctl_f.src_cnt); - for (i = 0; i < src_cnt; i++) - dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, - (unsigned long long) pq_get_src(descs, i), pq->coef[i]); - dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); - dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); - dev_dbg(dev, "\tNEXT: %#llx\n", pq->next); -} - -static void dump_pq16_desc_dbg(struct ioat2_dma_chan *ioat, - struct ioat_ring_ent *desc) -{ - struct device *dev = to_dev(&ioat->base); - struct ioat_pq_descriptor *pq = desc->pq; - struct ioat_raw_descriptor *descs[] = { (void *)pq, - (void *)pq, - (void *)pq }; - int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt); - int i; - - if (desc->sed) { - descs[1] = (void *)desc->sed->hw; - descs[2] = (void *)desc->sed->hw + 64; - } - - dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" - " sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" - " src_cnt: %d)\n", - desc_id(desc), (unsigned long long) desc->txd.phys, - (unsigned long long) pq->next, - desc->txd.flags, pq->size, pq->ctl, - pq->ctl_f.op, pq->ctl_f.int_en, - pq->ctl_f.compl_write, - pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", - pq->ctl_f.src_cnt); - for (i = 0; i < src_cnt; i++) { - dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, - (unsigned long long) pq16_get_src(descs, i), - pq->coef[i]); - } - dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); - dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); -} - -static struct dma_async_tx_descriptor * -__ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, - const dma_addr_t *dst, const dma_addr_t *src, - unsigned int src_cnt, const unsigned char *scf, - size_t len, unsigned long flags) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - struct ioat_chan_common *chan = &ioat->base; - struct ioatdma_device *device = chan->device; - struct ioat_ring_ent *compl_desc; - struct ioat_ring_ent *desc; - struct ioat_ring_ent *ext; - size_t total_len = len; - struct ioat_pq_descriptor *pq; - struct ioat_pq_ext_descriptor *pq_ex = NULL; - struct ioat_dma_descriptor *hw; - u32 offset = 0; - u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; - int i, s, idx, with_ext, num_descs; - int cb32 = (device->version < IOAT_VER_3_3) ? 1 : 0; - - dev_dbg(to_dev(chan), "%s\n", __func__); - /* the engine requires at least two sources (we provide - * at least 1 implied source in the DMA_PREP_CONTINUE case) - */ - BUG_ON(src_cnt + dmaf_continue(flags) < 2); - - num_descs = ioat2_xferlen_to_descs(ioat, len); - /* we need 2x the number of descriptors to cover greater than 3 - * sources (we need 1 extra source in the q-only continuation - * case and 3 extra sources in the p+q continuation case. - */ - if (src_cnt + dmaf_p_disabled_continue(flags) > 3 || - (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) { - with_ext = 1; - num_descs *= 2; - } else - with_ext = 0; - - /* completion writes from the raid engine may pass completion - * writes from the legacy engine, so we need one extra null - * (legacy) descriptor to ensure all completion writes arrive in - * order. - */ - if (likely(num_descs) && - ioat2_check_space_lock(ioat, num_descs + cb32) == 0) - idx = ioat->head; - else - return NULL; - i = 0; - do { - struct ioat_raw_descriptor *descs[2]; - size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); - - desc = ioat2_get_ring_ent(ioat, idx + i); - pq = desc->pq; - - /* save a branch by unconditionally retrieving the - * extended descriptor pq_set_src() knows to not write - * to it in the single descriptor case - */ - ext = ioat2_get_ring_ent(ioat, idx + i + with_ext); - pq_ex = ext->pq_ex; - - descs[0] = (struct ioat_raw_descriptor *) pq; - descs[1] = (struct ioat_raw_descriptor *) pq_ex; - - for (s = 0; s < src_cnt; s++) - pq_set_src(descs, src[s], offset, scf[s], s); - - /* see the comment for dma_maxpq in include/linux/dmaengine.h */ - if (dmaf_p_disabled_continue(flags)) - pq_set_src(descs, dst[1], offset, 1, s++); - else if (dmaf_continue(flags)) { - pq_set_src(descs, dst[0], offset, 0, s++); - pq_set_src(descs, dst[1], offset, 1, s++); - pq_set_src(descs, dst[1], offset, 0, s++); - } - pq->size = xfer_size; - pq->p_addr = dst[0] + offset; - pq->q_addr = dst[1] + offset; - pq->ctl = 0; - pq->ctl_f.op = op; - /* we turn on descriptor write back error status */ - if (device->cap & IOAT_CAP_DWBES) - pq->ctl_f.wb_en = result ? 1 : 0; - pq->ctl_f.src_cnt = src_cnt_to_hw(s); - pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); - pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); - - len -= xfer_size; - offset += xfer_size; - } while ((i += 1 + with_ext) < num_descs); - - /* last pq descriptor carries the unmap parameters and fence bit */ - desc->txd.flags = flags; - desc->len = total_len; - if (result) - desc->result = result; - pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); - dump_pq_desc_dbg(ioat, desc, ext); - - if (!cb32) { - pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); - pq->ctl_f.compl_write = 1; - compl_desc = desc; - } else { - /* completion descriptor carries interrupt bit */ - compl_desc = ioat2_get_ring_ent(ioat, idx + i); - compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; - hw = compl_desc->hw; - hw->ctl = 0; - hw->ctl_f.null = 1; - hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); - hw->ctl_f.compl_write = 1; - hw->size = NULL_DESC_BUFFER_SIZE; - dump_desc_dbg(ioat, compl_desc); - } - - - /* we leave the channel locked to ensure in order submission */ - return &compl_desc->txd; -} - -static struct dma_async_tx_descriptor * -__ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result, - const dma_addr_t *dst, const dma_addr_t *src, - unsigned int src_cnt, const unsigned char *scf, - size_t len, unsigned long flags) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - struct ioat_chan_common *chan = &ioat->base; - struct ioatdma_device *device = chan->device; - struct ioat_ring_ent *desc; - size_t total_len = len; - struct ioat_pq_descriptor *pq; - u32 offset = 0; - u8 op; - int i, s, idx, num_descs; - - /* this function is only called with 9-16 sources */ - op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S; - - dev_dbg(to_dev(chan), "%s\n", __func__); - - num_descs = ioat2_xferlen_to_descs(ioat, len); - - /* - * 16 source pq is only available on cb3.3 and has no completion - * write hw bug. - */ - if (num_descs && ioat2_check_space_lock(ioat, num_descs) == 0) - idx = ioat->head; - else - return NULL; - - i = 0; - - do { - struct ioat_raw_descriptor *descs[4]; - size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); - - desc = ioat2_get_ring_ent(ioat, idx + i); - pq = desc->pq; - - descs[0] = (struct ioat_raw_descriptor *) pq; - - desc->sed = ioat3_alloc_sed(device, (src_cnt-2) >> 3); - if (!desc->sed) { - dev_err(to_dev(chan), - "%s: no free sed entries\n", __func__); - return NULL; - } - - pq->sed_addr = desc->sed->dma; - desc->sed->parent = desc; - - descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw; - descs[2] = (void *)descs[1] + 64; - - for (s = 0; s < src_cnt; s++) - pq16_set_src(descs, src[s], offset, scf[s], s); - - /* see the comment for dma_maxpq in include/linux/dmaengine.h */ - if (dmaf_p_disabled_continue(flags)) - pq16_set_src(descs, dst[1], offset, 1, s++); - else if (dmaf_continue(flags)) { - pq16_set_src(descs, dst[0], offset, 0, s++); - pq16_set_src(descs, dst[1], offset, 1, s++); - pq16_set_src(descs, dst[1], offset, 0, s++); - } - - pq->size = xfer_size; - pq->p_addr = dst[0] + offset; - pq->q_addr = dst[1] + offset; - pq->ctl = 0; - pq->ctl_f.op = op; - pq->ctl_f.src_cnt = src16_cnt_to_hw(s); - /* we turn on descriptor write back error status */ - if (device->cap & IOAT_CAP_DWBES) - pq->ctl_f.wb_en = result ? 1 : 0; - pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); - pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); - - len -= xfer_size; - offset += xfer_size; - } while (++i < num_descs); - - /* last pq descriptor carries the unmap parameters and fence bit */ - desc->txd.flags = flags; - desc->len = total_len; - if (result) - desc->result = result; - pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); - - /* with cb3.3 we should be able to do completion w/o a null desc */ - pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); - pq->ctl_f.compl_write = 1; - - dump_pq16_desc_dbg(ioat, desc); - - /* we leave the channel locked to ensure in order submission */ - return &desc->txd; -} - -static int src_cnt_flags(unsigned int src_cnt, unsigned long flags) -{ - if (dmaf_p_disabled_continue(flags)) - return src_cnt + 1; - else if (dmaf_continue(flags)) - return src_cnt + 3; - else - return src_cnt; -} - -static struct dma_async_tx_descriptor * -ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, - unsigned int src_cnt, const unsigned char *scf, size_t len, - unsigned long flags) -{ - /* specify valid address for disabled result */ - if (flags & DMA_PREP_PQ_DISABLE_P) - dst[0] = dst[1]; - if (flags & DMA_PREP_PQ_DISABLE_Q) - dst[1] = dst[0]; - - /* handle the single source multiply case from the raid6 - * recovery path - */ - if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) { - dma_addr_t single_source[2]; - unsigned char single_source_coef[2]; - - BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q); - single_source[0] = src[0]; - single_source[1] = src[0]; - single_source_coef[0] = scf[0]; - single_source_coef[1] = 0; - - return src_cnt_flags(src_cnt, flags) > 8 ? - __ioat3_prep_pq16_lock(chan, NULL, dst, single_source, - 2, single_source_coef, len, - flags) : - __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2, - single_source_coef, len, flags); - - } else { - return src_cnt_flags(src_cnt, flags) > 8 ? - __ioat3_prep_pq16_lock(chan, NULL, dst, src, src_cnt, - scf, len, flags) : - __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, - scf, len, flags); - } -} - -static struct dma_async_tx_descriptor * -ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, - unsigned int src_cnt, const unsigned char *scf, size_t len, - enum sum_check_flags *pqres, unsigned long flags) -{ - /* specify valid address for disabled result */ - if (flags & DMA_PREP_PQ_DISABLE_P) - pq[0] = pq[1]; - if (flags & DMA_PREP_PQ_DISABLE_Q) - pq[1] = pq[0]; - - /* the cleanup routine only sets bits on validate failure, it - * does not clear bits on validate success... so clear it here - */ - *pqres = 0; - - return src_cnt_flags(src_cnt, flags) > 8 ? - __ioat3_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len, - flags) : - __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, - flags); -} - -static struct dma_async_tx_descriptor * -ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, - unsigned int src_cnt, size_t len, unsigned long flags) -{ - unsigned char scf[src_cnt]; - dma_addr_t pq[2]; - - memset(scf, 0, src_cnt); - pq[0] = dst; - flags |= DMA_PREP_PQ_DISABLE_Q; - pq[1] = dst; /* specify valid address for disabled result */ - - return src_cnt_flags(src_cnt, flags) > 8 ? - __ioat3_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len, - flags) : - __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, - flags); -} - -static struct dma_async_tx_descriptor * -ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, - unsigned int src_cnt, size_t len, - enum sum_check_flags *result, unsigned long flags) -{ - unsigned char scf[src_cnt]; - dma_addr_t pq[2]; - - /* the cleanup routine only sets bits on validate failure, it - * does not clear bits on validate success... so clear it here - */ - *result = 0; - - memset(scf, 0, src_cnt); - pq[0] = src[0]; - flags |= DMA_PREP_PQ_DISABLE_Q; - pq[1] = pq[0]; /* specify valid address for disabled result */ - - return src_cnt_flags(src_cnt, flags) > 8 ? - __ioat3_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1, - scf, len, flags) : - __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, - scf, len, flags); -} - -static struct dma_async_tx_descriptor * -ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - struct ioat_ring_ent *desc; - struct ioat_dma_descriptor *hw; - - if (ioat2_check_space_lock(ioat, 1) == 0) - desc = ioat2_get_ring_ent(ioat, ioat->head); - else - return NULL; - - hw = desc->hw; - hw->ctl = 0; - hw->ctl_f.null = 1; - hw->ctl_f.int_en = 1; - hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); - hw->ctl_f.compl_write = 1; - hw->size = NULL_DESC_BUFFER_SIZE; - hw->src_addr = 0; - hw->dst_addr = 0; - - desc->txd.flags = flags; - desc->len = 1; - - dump_desc_dbg(ioat, desc); - - /* we leave the channel locked to ensure in order submission */ - return &desc->txd; -} - -static void ioat3_dma_test_callback(void *dma_async_param) -{ - struct completion *cmp = dma_async_param; - - complete(cmp); -} - -#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */ -static int ioat_xor_val_self_test(struct ioatdma_device *device) -{ - int i, src_idx; - struct page *dest; - struct page *xor_srcs[IOAT_NUM_SRC_TEST]; - struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1]; - dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1]; - dma_addr_t dest_dma; - struct dma_async_tx_descriptor *tx; - struct dma_chan *dma_chan; - dma_cookie_t cookie; - u8 cmp_byte = 0; - u32 cmp_word; - u32 xor_val_result; - int err = 0; - struct completion cmp; - unsigned long tmo; - struct device *dev = &device->pdev->dev; - struct dma_device *dma = &device->common; - u8 op = 0; - - dev_dbg(dev, "%s\n", __func__); - - if (!dma_has_cap(DMA_XOR, dma->cap_mask)) - return 0; - - for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { - xor_srcs[src_idx] = alloc_page(GFP_KERNEL); - if (!xor_srcs[src_idx]) { - while (src_idx--) - __free_page(xor_srcs[src_idx]); - return -ENOMEM; - } - } - - dest = alloc_page(GFP_KERNEL); - if (!dest) { - while (src_idx--) - __free_page(xor_srcs[src_idx]); - return -ENOMEM; - } - - /* Fill in src buffers */ - for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { - u8 *ptr = page_address(xor_srcs[src_idx]); - for (i = 0; i < PAGE_SIZE; i++) - ptr[i] = (1 << src_idx); - } - - for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) - cmp_byte ^= (u8) (1 << src_idx); - - cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | - (cmp_byte << 8) | cmp_byte; - - memset(page_address(dest), 0, PAGE_SIZE); - - dma_chan = container_of(dma->channels.next, struct dma_chan, - device_node); - if (dma->device_alloc_chan_resources(dma_chan) < 1) { - err = -ENODEV; - goto out; - } - - /* test xor */ - op = IOAT_OP_XOR; - - dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(dev, dest_dma)) - goto dma_unmap; - - for (i = 0; i < IOAT_NUM_SRC_TEST; i++) - dma_srcs[i] = DMA_ERROR_CODE; - for (i = 0; i < IOAT_NUM_SRC_TEST; i++) { - dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE, - DMA_TO_DEVICE); - if (dma_mapping_error(dev, dma_srcs[i])) - goto dma_unmap; - } - tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs, - IOAT_NUM_SRC_TEST, PAGE_SIZE, - DMA_PREP_INTERRUPT); - - if (!tx) { - dev_err(dev, "Self-test xor prep failed\n"); - err = -ENODEV; - goto dma_unmap; - } - - async_tx_ack(tx); - init_completion(&cmp); - tx->callback = ioat3_dma_test_callback; - tx->callback_param = &cmp; - cookie = tx->tx_submit(tx); - if (cookie < 0) { - dev_err(dev, "Self-test xor setup failed\n"); - err = -ENODEV; - goto dma_unmap; - } - dma->device_issue_pending(dma_chan); - - tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); - - if (tmo == 0 || - dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { - dev_err(dev, "Self-test xor timed out\n"); - err = -ENODEV; - goto dma_unmap; - } - - for (i = 0; i < IOAT_NUM_SRC_TEST; i++) - dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); - - dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); - for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) { - u32 *ptr = page_address(dest); - if (ptr[i] != cmp_word) { - dev_err(dev, "Self-test xor failed compare\n"); - err = -ENODEV; - goto free_resources; - } - } - dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); - - dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); - - /* skip validate if the capability is not present */ - if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask)) - goto free_resources; - - op = IOAT_OP_XOR_VAL; - - /* validate the sources with the destintation page */ - for (i = 0; i < IOAT_NUM_SRC_TEST; i++) - xor_val_srcs[i] = xor_srcs[i]; - xor_val_srcs[i] = dest; - - xor_val_result = 1; - - for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) - dma_srcs[i] = DMA_ERROR_CODE; - for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) { - dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, - DMA_TO_DEVICE); - if (dma_mapping_error(dev, dma_srcs[i])) - goto dma_unmap; - } - tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, - IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, - &xor_val_result, DMA_PREP_INTERRUPT); - if (!tx) { - dev_err(dev, "Self-test zero prep failed\n"); - err = -ENODEV; - goto dma_unmap; - } - - async_tx_ack(tx); - init_completion(&cmp); - tx->callback = ioat3_dma_test_callback; - tx->callback_param = &cmp; - cookie = tx->tx_submit(tx); - if (cookie < 0) { - dev_err(dev, "Self-test zero setup failed\n"); - err = -ENODEV; - goto dma_unmap; - } - dma->device_issue_pending(dma_chan); - - tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); - - if (tmo == 0 || - dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { - dev_err(dev, "Self-test validate timed out\n"); - err = -ENODEV; - goto dma_unmap; - } - - for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) - dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); - - if (xor_val_result != 0) { - dev_err(dev, "Self-test validate failed compare\n"); - err = -ENODEV; - goto free_resources; - } - - memset(page_address(dest), 0, PAGE_SIZE); - - /* test for non-zero parity sum */ - op = IOAT_OP_XOR_VAL; - - xor_val_result = 0; - for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) - dma_srcs[i] = DMA_ERROR_CODE; - for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) { - dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, - DMA_TO_DEVICE); - if (dma_mapping_error(dev, dma_srcs[i])) - goto dma_unmap; - } - tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, - IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, - &xor_val_result, DMA_PREP_INTERRUPT); - if (!tx) { - dev_err(dev, "Self-test 2nd zero prep failed\n"); - err = -ENODEV; - goto dma_unmap; - } - - async_tx_ack(tx); - init_completion(&cmp); - tx->callback = ioat3_dma_test_callback; - tx->callback_param = &cmp; - cookie = tx->tx_submit(tx); - if (cookie < 0) { - dev_err(dev, "Self-test 2nd zero setup failed\n"); - err = -ENODEV; - goto dma_unmap; - } - dma->device_issue_pending(dma_chan); - - tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); - - if (tmo == 0 || - dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { - dev_err(dev, "Self-test 2nd validate timed out\n"); - err = -ENODEV; - goto dma_unmap; - } - - if (xor_val_result != SUM_CHECK_P_RESULT) { - dev_err(dev, "Self-test validate failed compare\n"); - err = -ENODEV; - goto dma_unmap; - } - - for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) - dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); - - goto free_resources; -dma_unmap: - if (op == IOAT_OP_XOR) { - if (dest_dma != DMA_ERROR_CODE) - dma_unmap_page(dev, dest_dma, PAGE_SIZE, - DMA_FROM_DEVICE); - for (i = 0; i < IOAT_NUM_SRC_TEST; i++) - if (dma_srcs[i] != DMA_ERROR_CODE) - dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, - DMA_TO_DEVICE); - } else if (op == IOAT_OP_XOR_VAL) { - for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) - if (dma_srcs[i] != DMA_ERROR_CODE) - dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, - DMA_TO_DEVICE); - } -free_resources: - dma->device_free_chan_resources(dma_chan); -out: - src_idx = IOAT_NUM_SRC_TEST; - while (src_idx--) - __free_page(xor_srcs[src_idx]); - __free_page(dest); - return err; -} - -static int ioat3_dma_self_test(struct ioatdma_device *device) -{ - int rc = ioat_dma_self_test(device); - - if (rc) - return rc; - - rc = ioat_xor_val_self_test(device); - if (rc) - return rc; - - return 0; -} - -static int ioat3_irq_reinit(struct ioatdma_device *device) -{ - struct pci_dev *pdev = device->pdev; - int irq = pdev->irq, i; - - if (!is_bwd_ioat(pdev)) - return 0; - - switch (device->irq_mode) { - case IOAT_MSIX: - for (i = 0; i < device->common.chancnt; i++) { - struct msix_entry *msix = &device->msix_entries[i]; - struct ioat_chan_common *chan; - - chan = ioat_chan_by_index(device, i); - devm_free_irq(&pdev->dev, msix->vector, chan); - } - - pci_disable_msix(pdev); - break; - case IOAT_MSI: - pci_disable_msi(pdev); - /* fall through */ - case IOAT_INTX: - devm_free_irq(&pdev->dev, irq, device); - break; - default: - return 0; - } - device->irq_mode = IOAT_NOIRQ; - - return ioat_dma_setup_interrupts(device); -} - -static int ioat3_reset_hw(struct ioat_chan_common *chan) -{ - /* throw away whatever the channel was doing and get it - * initialized, with ioat3 specific workarounds - */ - struct ioatdma_device *device = chan->device; - struct pci_dev *pdev = device->pdev; - u32 chanerr; - u16 dev_id; - int err; - - ioat2_quiesce(chan, msecs_to_jiffies(100)); - - chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); - - if (device->version < IOAT_VER_3_3) { - /* clear any pending errors */ - err = pci_read_config_dword(pdev, - IOAT_PCI_CHANERR_INT_OFFSET, &chanerr); - if (err) { - dev_err(&pdev->dev, - "channel error register unreachable\n"); - return err; - } - pci_write_config_dword(pdev, - IOAT_PCI_CHANERR_INT_OFFSET, chanerr); - - /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit - * (workaround for spurious config parity error after restart) - */ - pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); - if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) { - pci_write_config_dword(pdev, - IOAT_PCI_DMAUNCERRSTS_OFFSET, - 0x10); - } - } - - err = ioat2_reset_sync(chan, msecs_to_jiffies(200)); - if (!err) - err = ioat3_irq_reinit(device); - - if (err) - dev_err(&pdev->dev, "Failed to reset: %d\n", err); - - return err; -} - -static void ioat3_intr_quirk(struct ioatdma_device *device) -{ - struct dma_device *dma; - struct dma_chan *c; - struct ioat_chan_common *chan; - u32 errmask; - - dma = &device->common; - - /* - * if we have descriptor write back error status, we mask the - * error interrupts - */ - if (device->cap & IOAT_CAP_DWBES) { - list_for_each_entry(c, &dma->channels, device_node) { - chan = to_chan_common(c); - errmask = readl(chan->reg_base + - IOAT_CHANERR_MASK_OFFSET); - errmask |= IOAT_CHANERR_XOR_P_OR_CRC_ERR | - IOAT_CHANERR_XOR_Q_ERR; - writel(errmask, chan->reg_base + - IOAT_CHANERR_MASK_OFFSET); - } - } -} - -int ioat3_dma_probe(struct ioatdma_device *device, int dca) -{ - struct pci_dev *pdev = device->pdev; - int dca_en = system_has_dca_enabled(pdev); - struct dma_device *dma; - struct dma_chan *c; - struct ioat_chan_common *chan; - bool is_raid_device = false; - int err; - - device->enumerate_channels = ioat2_enumerate_channels; - device->reset_hw = ioat3_reset_hw; - device->self_test = ioat3_dma_self_test; - device->intr_quirk = ioat3_intr_quirk; - dma = &device->common; - dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; - dma->device_issue_pending = ioat2_issue_pending; - dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; - dma->device_free_chan_resources = ioat2_free_chan_resources; - - dma_cap_set(DMA_INTERRUPT, dma->cap_mask); - dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock; - - device->cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET); - - if (is_xeon_cb32(pdev) || is_bwd_noraid(pdev)) - device->cap &= ~(IOAT_CAP_XOR | IOAT_CAP_PQ | IOAT_CAP_RAID16SS); - - /* dca is incompatible with raid operations */ - if (dca_en && (device->cap & (IOAT_CAP_XOR|IOAT_CAP_PQ))) - device->cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ); - - if (device->cap & IOAT_CAP_XOR) { - is_raid_device = true; - dma->max_xor = 8; - - dma_cap_set(DMA_XOR, dma->cap_mask); - dma->device_prep_dma_xor = ioat3_prep_xor; - - dma_cap_set(DMA_XOR_VAL, dma->cap_mask); - dma->device_prep_dma_xor_val = ioat3_prep_xor_val; - } - - if (device->cap & IOAT_CAP_PQ) { - is_raid_device = true; - - dma->device_prep_dma_pq = ioat3_prep_pq; - dma->device_prep_dma_pq_val = ioat3_prep_pq_val; - dma_cap_set(DMA_PQ, dma->cap_mask); - dma_cap_set(DMA_PQ_VAL, dma->cap_mask); - - if (device->cap & IOAT_CAP_RAID16SS) { - dma_set_maxpq(dma, 16, 0); - } else { - dma_set_maxpq(dma, 8, 0); - } - - if (!(device->cap & IOAT_CAP_XOR)) { - dma->device_prep_dma_xor = ioat3_prep_pqxor; - dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val; - dma_cap_set(DMA_XOR, dma->cap_mask); - dma_cap_set(DMA_XOR_VAL, dma->cap_mask); - - if (device->cap & IOAT_CAP_RAID16SS) { - dma->max_xor = 16; - } else { - dma->max_xor = 8; - } - } - } - - dma->device_tx_status = ioat3_tx_status; - device->cleanup_fn = ioat3_cleanup_event; - device->timer_fn = ioat3_timer_event; - - /* starting with CB3.3 super extended descriptors are supported */ - if (device->cap & IOAT_CAP_RAID16SS) { - char pool_name[14]; - int i; - - for (i = 0; i < MAX_SED_POOLS; i++) { - snprintf(pool_name, 14, "ioat_hw%d_sed", i); - - /* allocate SED DMA pool */ - device->sed_hw_pool[i] = dmam_pool_create(pool_name, - &pdev->dev, - SED_SIZE * (i + 1), 64, 0); - if (!device->sed_hw_pool[i]) - return -ENOMEM; - - } - } - - err = ioat_probe(device); - if (err) - return err; - - list_for_each_entry(c, &dma->channels, device_node) { - chan = to_chan_common(c); - writel(IOAT_DMA_DCA_ANY_CPU, - chan->reg_base + IOAT_DCACTRL_OFFSET); - } - - err = ioat_register(device); - if (err) - return err; - - ioat_kobject_add(device, &ioat2_ktype); - - if (dca) - device->dca = ioat3_dca_init(pdev, device->reg_base); - - return 0; -} diff --git a/kernel/drivers/dma/ioat/hw.h b/kernel/drivers/dma/ioat/hw.h index a3e731edc..690e3b4f8 100644 --- a/kernel/drivers/dma/ioat/hw.h +++ b/kernel/drivers/dma/ioat/hw.h @@ -21,11 +21,6 @@ #define IOAT_MMIO_BAR 0 /* CB device ID's */ -#define IOAT_PCI_DID_5000 0x1A38 -#define IOAT_PCI_DID_CNB 0x360B -#define IOAT_PCI_DID_SCNB 0x65FF -#define IOAT_PCI_DID_SNB 0x402F - #define PCI_DEVICE_ID_INTEL_IOAT_IVB0 0x0e20 #define PCI_DEVICE_ID_INTEL_IOAT_IVB1 0x0e21 #define PCI_DEVICE_ID_INTEL_IOAT_IVB2 0x0e22 @@ -58,6 +53,17 @@ #define PCI_DEVICE_ID_INTEL_IOAT_BDXDE2 0x6f52 #define PCI_DEVICE_ID_INTEL_IOAT_BDXDE3 0x6f53 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX0 0x6f20 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX1 0x6f21 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX2 0x6f22 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX3 0x6f23 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX4 0x6f24 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX5 0x6f25 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX6 0x6f26 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX7 0x6f27 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX8 0x6f2e +#define PCI_DEVICE_ID_INTEL_IOAT_BDX9 0x6f2f + #define IOAT_VER_1_2 0x12 /* Version 1.2 */ #define IOAT_VER_2_0 0x20 /* Version 2.0 */ #define IOAT_VER_3_0 0x30 /* Version 3.0 */ diff --git a/kernel/drivers/dma/ioat/init.c b/kernel/drivers/dma/ioat/init.c new file mode 100644 index 000000000..4ef0c5e07 --- /dev/null +++ b/kernel/drivers/dma/ioat/init.c @@ -0,0 +1,1424 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2004 - 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/dmaengine.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/workqueue.h> +#include <linux/prefetch.h> +#include <linux/dca.h> +#include <linux/aer.h> +#include "dma.h" +#include "registers.h" +#include "hw.h" + +#include "../dmaengine.h" + +MODULE_VERSION(IOAT_DMA_VERSION); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Intel Corporation"); + +static struct pci_device_id ioat_pci_tbl[] = { + /* I/OAT v3 platforms */ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) }, + + /* I/OAT v3.2 platforms */ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB9) }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB9) }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW9) }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX9) }, + + /* I/OAT v3.3 platforms */ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD3) }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE3) }, + + { 0, } +}; +MODULE_DEVICE_TABLE(pci, ioat_pci_tbl); + +static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id); +static void ioat_remove(struct pci_dev *pdev); +static void +ioat_init_channel(struct ioatdma_device *ioat_dma, + struct ioatdma_chan *ioat_chan, int idx); +static void ioat_intr_quirk(struct ioatdma_device *ioat_dma); +static int ioat_enumerate_channels(struct ioatdma_device *ioat_dma); +static int ioat3_dma_self_test(struct ioatdma_device *ioat_dma); + +static int ioat_dca_enabled = 1; +module_param(ioat_dca_enabled, int, 0644); +MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)"); +int ioat_pending_level = 4; +module_param(ioat_pending_level, int, 0644); +MODULE_PARM_DESC(ioat_pending_level, + "high-water mark for pushing ioat descriptors (default: 4)"); +int ioat_ring_alloc_order = 8; +module_param(ioat_ring_alloc_order, int, 0644); +MODULE_PARM_DESC(ioat_ring_alloc_order, + "ioat+: allocate 2^n descriptors per channel (default: 8 max: 16)"); +int ioat_ring_max_alloc_order = IOAT_MAX_ORDER; +module_param(ioat_ring_max_alloc_order, int, 0644); +MODULE_PARM_DESC(ioat_ring_max_alloc_order, + "ioat+: upper limit for ring size (default: 16)"); +static char ioat_interrupt_style[32] = "msix"; +module_param_string(ioat_interrupt_style, ioat_interrupt_style, + sizeof(ioat_interrupt_style), 0644); +MODULE_PARM_DESC(ioat_interrupt_style, + "set ioat interrupt style: msix (default), msi, intx"); + +struct kmem_cache *ioat_cache; +struct kmem_cache *ioat_sed_cache; + +static bool is_jf_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_JSF0: + case PCI_DEVICE_ID_INTEL_IOAT_JSF1: + case PCI_DEVICE_ID_INTEL_IOAT_JSF2: + case PCI_DEVICE_ID_INTEL_IOAT_JSF3: + case PCI_DEVICE_ID_INTEL_IOAT_JSF4: + case PCI_DEVICE_ID_INTEL_IOAT_JSF5: + case PCI_DEVICE_ID_INTEL_IOAT_JSF6: + case PCI_DEVICE_ID_INTEL_IOAT_JSF7: + case PCI_DEVICE_ID_INTEL_IOAT_JSF8: + case PCI_DEVICE_ID_INTEL_IOAT_JSF9: + return true; + default: + return false; + } +} + +static bool is_snb_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_SNB0: + case PCI_DEVICE_ID_INTEL_IOAT_SNB1: + case PCI_DEVICE_ID_INTEL_IOAT_SNB2: + case PCI_DEVICE_ID_INTEL_IOAT_SNB3: + case PCI_DEVICE_ID_INTEL_IOAT_SNB4: + case PCI_DEVICE_ID_INTEL_IOAT_SNB5: + case PCI_DEVICE_ID_INTEL_IOAT_SNB6: + case PCI_DEVICE_ID_INTEL_IOAT_SNB7: + case PCI_DEVICE_ID_INTEL_IOAT_SNB8: + case PCI_DEVICE_ID_INTEL_IOAT_SNB9: + return true; + default: + return false; + } +} + +static bool is_ivb_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_IVB0: + case PCI_DEVICE_ID_INTEL_IOAT_IVB1: + case PCI_DEVICE_ID_INTEL_IOAT_IVB2: + case PCI_DEVICE_ID_INTEL_IOAT_IVB3: + case PCI_DEVICE_ID_INTEL_IOAT_IVB4: + case PCI_DEVICE_ID_INTEL_IOAT_IVB5: + case PCI_DEVICE_ID_INTEL_IOAT_IVB6: + case PCI_DEVICE_ID_INTEL_IOAT_IVB7: + case PCI_DEVICE_ID_INTEL_IOAT_IVB8: + case PCI_DEVICE_ID_INTEL_IOAT_IVB9: + return true; + default: + return false; + } + +} + +static bool is_hsw_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_HSW0: + case PCI_DEVICE_ID_INTEL_IOAT_HSW1: + case PCI_DEVICE_ID_INTEL_IOAT_HSW2: + case PCI_DEVICE_ID_INTEL_IOAT_HSW3: + case PCI_DEVICE_ID_INTEL_IOAT_HSW4: + case PCI_DEVICE_ID_INTEL_IOAT_HSW5: + case PCI_DEVICE_ID_INTEL_IOAT_HSW6: + case PCI_DEVICE_ID_INTEL_IOAT_HSW7: + case PCI_DEVICE_ID_INTEL_IOAT_HSW8: + case PCI_DEVICE_ID_INTEL_IOAT_HSW9: + return true; + default: + return false; + } + +} + +static bool is_bdx_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_BDX0: + case PCI_DEVICE_ID_INTEL_IOAT_BDX1: + case PCI_DEVICE_ID_INTEL_IOAT_BDX2: + case PCI_DEVICE_ID_INTEL_IOAT_BDX3: + case PCI_DEVICE_ID_INTEL_IOAT_BDX4: + case PCI_DEVICE_ID_INTEL_IOAT_BDX5: + case PCI_DEVICE_ID_INTEL_IOAT_BDX6: + case PCI_DEVICE_ID_INTEL_IOAT_BDX7: + case PCI_DEVICE_ID_INTEL_IOAT_BDX8: + case PCI_DEVICE_ID_INTEL_IOAT_BDX9: + return true; + default: + return false; + } +} + +static bool is_xeon_cb32(struct pci_dev *pdev) +{ + return is_jf_ioat(pdev) || is_snb_ioat(pdev) || is_ivb_ioat(pdev) || + is_hsw_ioat(pdev) || is_bdx_ioat(pdev); +} + +bool is_bwd_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_BWD0: + case PCI_DEVICE_ID_INTEL_IOAT_BWD1: + case PCI_DEVICE_ID_INTEL_IOAT_BWD2: + case PCI_DEVICE_ID_INTEL_IOAT_BWD3: + /* even though not Atom, BDX-DE has same DMA silicon */ + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE0: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE1: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE2: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE3: + return true; + default: + return false; + } +} + +static bool is_bwd_noraid(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_BWD2: + case PCI_DEVICE_ID_INTEL_IOAT_BWD3: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE0: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE1: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE2: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE3: + return true; + default: + return false; + } + +} + +/* + * Perform a IOAT transaction to verify the HW works. + */ +#define IOAT_TEST_SIZE 2000 + +static void ioat_dma_test_callback(void *dma_async_param) +{ + struct completion *cmp = dma_async_param; + + complete(cmp); +} + +/** + * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works. + * @ioat_dma: dma device to be tested + */ +static int ioat_dma_self_test(struct ioatdma_device *ioat_dma) +{ + int i; + u8 *src; + u8 *dest; + struct dma_device *dma = &ioat_dma->dma_dev; + struct device *dev = &ioat_dma->pdev->dev; + struct dma_chan *dma_chan; + struct dma_async_tx_descriptor *tx; + dma_addr_t dma_dest, dma_src; + dma_cookie_t cookie; + int err = 0; + struct completion cmp; + unsigned long tmo; + unsigned long flags; + + src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); + if (!src) + return -ENOMEM; + dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); + if (!dest) { + kfree(src); + return -ENOMEM; + } + + /* Fill in src buffer */ + for (i = 0; i < IOAT_TEST_SIZE; i++) + src[i] = (u8)i; + + /* Start copy, using first DMA channel */ + dma_chan = container_of(dma->channels.next, struct dma_chan, + device_node); + if (dma->device_alloc_chan_resources(dma_chan) < 1) { + dev_err(dev, "selftest cannot allocate chan resource\n"); + err = -ENODEV; + goto out; + } + + dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_src)) { + dev_err(dev, "mapping src buffer failed\n"); + goto free_resources; + } + dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(dev, dma_dest)) { + dev_err(dev, "mapping dest buffer failed\n"); + goto unmap_src; + } + flags = DMA_PREP_INTERRUPT; + tx = ioat_dma->dma_dev.device_prep_dma_memcpy(dma_chan, dma_dest, + dma_src, IOAT_TEST_SIZE, + flags); + if (!tx) { + dev_err(dev, "Self-test prep failed, disabling\n"); + err = -ENODEV; + goto unmap_dma; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test setup failed, disabling\n"); + err = -ENODEV; + goto unmap_dma; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (tmo == 0 || + dma->device_tx_status(dma_chan, cookie, NULL) + != DMA_COMPLETE) { + dev_err(dev, "Self-test copy timed out, disabling\n"); + err = -ENODEV; + goto unmap_dma; + } + if (memcmp(src, dest, IOAT_TEST_SIZE)) { + dev_err(dev, "Self-test copy failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + +unmap_dma: + dma_unmap_single(dev, dma_dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE); +unmap_src: + dma_unmap_single(dev, dma_src, IOAT_TEST_SIZE, DMA_TO_DEVICE); +free_resources: + dma->device_free_chan_resources(dma_chan); +out: + kfree(src); + kfree(dest); + return err; +} + +/** + * ioat_dma_setup_interrupts - setup interrupt handler + * @ioat_dma: ioat dma device + */ +int ioat_dma_setup_interrupts(struct ioatdma_device *ioat_dma) +{ + struct ioatdma_chan *ioat_chan; + struct pci_dev *pdev = ioat_dma->pdev; + struct device *dev = &pdev->dev; + struct msix_entry *msix; + int i, j, msixcnt; + int err = -EINVAL; + u8 intrctrl = 0; + + if (!strcmp(ioat_interrupt_style, "msix")) + goto msix; + if (!strcmp(ioat_interrupt_style, "msi")) + goto msi; + if (!strcmp(ioat_interrupt_style, "intx")) + goto intx; + dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style); + goto err_no_irq; + +msix: + /* The number of MSI-X vectors should equal the number of channels */ + msixcnt = ioat_dma->dma_dev.chancnt; + for (i = 0; i < msixcnt; i++) + ioat_dma->msix_entries[i].entry = i; + + err = pci_enable_msix_exact(pdev, ioat_dma->msix_entries, msixcnt); + if (err) + goto msi; + + for (i = 0; i < msixcnt; i++) { + msix = &ioat_dma->msix_entries[i]; + ioat_chan = ioat_chan_by_index(ioat_dma, i); + err = devm_request_irq(dev, msix->vector, + ioat_dma_do_interrupt_msix, 0, + "ioat-msix", ioat_chan); + if (err) { + for (j = 0; j < i; j++) { + msix = &ioat_dma->msix_entries[j]; + ioat_chan = ioat_chan_by_index(ioat_dma, j); + devm_free_irq(dev, msix->vector, ioat_chan); + } + goto msi; + } + } + intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL; + ioat_dma->irq_mode = IOAT_MSIX; + goto done; + +msi: + err = pci_enable_msi(pdev); + if (err) + goto intx; + + err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0, + "ioat-msi", ioat_dma); + if (err) { + pci_disable_msi(pdev); + goto intx; + } + ioat_dma->irq_mode = IOAT_MSI; + goto done; + +intx: + err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, + IRQF_SHARED, "ioat-intx", ioat_dma); + if (err) + goto err_no_irq; + + ioat_dma->irq_mode = IOAT_INTX; +done: + if (is_bwd_ioat(pdev)) + ioat_intr_quirk(ioat_dma); + intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN; + writeb(intrctrl, ioat_dma->reg_base + IOAT_INTRCTRL_OFFSET); + return 0; + +err_no_irq: + /* Disable all interrupt generation */ + writeb(0, ioat_dma->reg_base + IOAT_INTRCTRL_OFFSET); + ioat_dma->irq_mode = IOAT_NOIRQ; + dev_err(dev, "no usable interrupts\n"); + return err; +} + +static void ioat_disable_interrupts(struct ioatdma_device *ioat_dma) +{ + /* Disable all interrupt generation */ + writeb(0, ioat_dma->reg_base + IOAT_INTRCTRL_OFFSET); +} + +static int ioat_probe(struct ioatdma_device *ioat_dma) +{ + int err = -ENODEV; + struct dma_device *dma = &ioat_dma->dma_dev; + struct pci_dev *pdev = ioat_dma->pdev; + struct device *dev = &pdev->dev; + + /* DMA coherent memory pool for DMA descriptor allocations */ + ioat_dma->dma_pool = pci_pool_create("dma_desc_pool", pdev, + sizeof(struct ioat_dma_descriptor), + 64, 0); + if (!ioat_dma->dma_pool) { + err = -ENOMEM; + goto err_dma_pool; + } + + ioat_dma->completion_pool = pci_pool_create("completion_pool", pdev, + sizeof(u64), + SMP_CACHE_BYTES, + SMP_CACHE_BYTES); + + if (!ioat_dma->completion_pool) { + err = -ENOMEM; + goto err_completion_pool; + } + + ioat_enumerate_channels(ioat_dma); + + dma_cap_set(DMA_MEMCPY, dma->cap_mask); + dma->dev = &pdev->dev; + + if (!dma->chancnt) { + dev_err(dev, "channel enumeration error\n"); + goto err_setup_interrupts; + } + + err = ioat_dma_setup_interrupts(ioat_dma); + if (err) + goto err_setup_interrupts; + + err = ioat3_dma_self_test(ioat_dma); + if (err) + goto err_self_test; + + return 0; + +err_self_test: + ioat_disable_interrupts(ioat_dma); +err_setup_interrupts: + pci_pool_destroy(ioat_dma->completion_pool); +err_completion_pool: + pci_pool_destroy(ioat_dma->dma_pool); +err_dma_pool: + return err; +} + +static int ioat_register(struct ioatdma_device *ioat_dma) +{ + int err = dma_async_device_register(&ioat_dma->dma_dev); + + if (err) { + ioat_disable_interrupts(ioat_dma); + pci_pool_destroy(ioat_dma->completion_pool); + pci_pool_destroy(ioat_dma->dma_pool); + } + + return err; +} + +static void ioat_dma_remove(struct ioatdma_device *ioat_dma) +{ + struct dma_device *dma = &ioat_dma->dma_dev; + + ioat_disable_interrupts(ioat_dma); + + ioat_kobject_del(ioat_dma); + + dma_async_device_unregister(dma); + + pci_pool_destroy(ioat_dma->dma_pool); + pci_pool_destroy(ioat_dma->completion_pool); + + INIT_LIST_HEAD(&dma->channels); +} + +/** + * ioat_enumerate_channels - find and initialize the device's channels + * @ioat_dma: the ioat dma device to be enumerated + */ +static int ioat_enumerate_channels(struct ioatdma_device *ioat_dma) +{ + struct ioatdma_chan *ioat_chan; + struct device *dev = &ioat_dma->pdev->dev; + struct dma_device *dma = &ioat_dma->dma_dev; + u8 xfercap_log; + int i; + + INIT_LIST_HEAD(&dma->channels); + dma->chancnt = readb(ioat_dma->reg_base + IOAT_CHANCNT_OFFSET); + dma->chancnt &= 0x1f; /* bits [4:0] valid */ + if (dma->chancnt > ARRAY_SIZE(ioat_dma->idx)) { + dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n", + dma->chancnt, ARRAY_SIZE(ioat_dma->idx)); + dma->chancnt = ARRAY_SIZE(ioat_dma->idx); + } + xfercap_log = readb(ioat_dma->reg_base + IOAT_XFERCAP_OFFSET); + xfercap_log &= 0x1f; /* bits [4:0] valid */ + if (xfercap_log == 0) + return 0; + dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log); + + for (i = 0; i < dma->chancnt; i++) { + ioat_chan = devm_kzalloc(dev, sizeof(*ioat_chan), GFP_KERNEL); + if (!ioat_chan) + break; + + ioat_init_channel(ioat_dma, ioat_chan, i); + ioat_chan->xfercap_log = xfercap_log; + spin_lock_init(&ioat_chan->prep_lock); + if (ioat_reset_hw(ioat_chan)) { + i = 0; + break; + } + } + dma->chancnt = i; + return i; +} + +/** + * ioat_free_chan_resources - release all the descriptors + * @chan: the channel to be cleaned + */ +static void ioat_free_chan_resources(struct dma_chan *c) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; + struct ioat_ring_ent *desc; + const int total_descs = 1 << ioat_chan->alloc_order; + int descs; + int i; + + /* Before freeing channel resources first check + * if they have been previously allocated for this channel. + */ + if (!ioat_chan->ring) + return; + + ioat_stop(ioat_chan); + ioat_reset_hw(ioat_chan); + + spin_lock_bh(&ioat_chan->cleanup_lock); + spin_lock_bh(&ioat_chan->prep_lock); + descs = ioat_ring_space(ioat_chan); + dev_dbg(to_dev(ioat_chan), "freeing %d idle descriptors\n", descs); + for (i = 0; i < descs; i++) { + desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head + i); + ioat_free_ring_ent(desc, c); + } + + if (descs < total_descs) + dev_err(to_dev(ioat_chan), "Freeing %d in use descriptors!\n", + total_descs - descs); + + for (i = 0; i < total_descs - descs; i++) { + desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail + i); + dump_desc_dbg(ioat_chan, desc); + ioat_free_ring_ent(desc, c); + } + + kfree(ioat_chan->ring); + ioat_chan->ring = NULL; + ioat_chan->alloc_order = 0; + pci_pool_free(ioat_dma->completion_pool, ioat_chan->completion, + ioat_chan->completion_dma); + spin_unlock_bh(&ioat_chan->prep_lock); + spin_unlock_bh(&ioat_chan->cleanup_lock); + + ioat_chan->last_completion = 0; + ioat_chan->completion_dma = 0; + ioat_chan->dmacount = 0; +} + +/* ioat_alloc_chan_resources - allocate/initialize ioat descriptor ring + * @chan: channel to be initialized + */ +static int ioat_alloc_chan_resources(struct dma_chan *c) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioat_ring_ent **ring; + u64 status; + int order; + int i = 0; + u32 chanerr; + + /* have we already been set up? */ + if (ioat_chan->ring) + return 1 << ioat_chan->alloc_order; + + /* Setup register to interrupt and write completion status on error */ + writew(IOAT_CHANCTRL_RUN, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); + + /* allocate a completion writeback area */ + /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ + ioat_chan->completion = + pci_pool_alloc(ioat_chan->ioat_dma->completion_pool, + GFP_KERNEL, &ioat_chan->completion_dma); + if (!ioat_chan->completion) + return -ENOMEM; + + memset(ioat_chan->completion, 0, sizeof(*ioat_chan->completion)); + writel(((u64)ioat_chan->completion_dma) & 0x00000000FFFFFFFF, + ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); + writel(((u64)ioat_chan->completion_dma) >> 32, + ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); + + order = ioat_get_alloc_order(); + ring = ioat_alloc_ring(c, order, GFP_KERNEL); + if (!ring) + return -ENOMEM; + + spin_lock_bh(&ioat_chan->cleanup_lock); + spin_lock_bh(&ioat_chan->prep_lock); + ioat_chan->ring = ring; + ioat_chan->head = 0; + ioat_chan->issued = 0; + ioat_chan->tail = 0; + ioat_chan->alloc_order = order; + set_bit(IOAT_RUN, &ioat_chan->state); + spin_unlock_bh(&ioat_chan->prep_lock); + spin_unlock_bh(&ioat_chan->cleanup_lock); + + ioat_start_null_desc(ioat_chan); + + /* check that we got off the ground */ + do { + udelay(1); + status = ioat_chansts(ioat_chan); + } while (i++ < 20 && !is_ioat_active(status) && !is_ioat_idle(status)); + + if (is_ioat_active(status) || is_ioat_idle(status)) + return 1 << ioat_chan->alloc_order; + + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + + dev_WARN(to_dev(ioat_chan), + "failed to start channel chanerr: %#x\n", chanerr); + ioat_free_chan_resources(c); + return -EFAULT; +} + +/* common channel initialization */ +static void +ioat_init_channel(struct ioatdma_device *ioat_dma, + struct ioatdma_chan *ioat_chan, int idx) +{ + struct dma_device *dma = &ioat_dma->dma_dev; + struct dma_chan *c = &ioat_chan->dma_chan; + unsigned long data = (unsigned long) c; + + ioat_chan->ioat_dma = ioat_dma; + ioat_chan->reg_base = ioat_dma->reg_base + (0x80 * (idx + 1)); + spin_lock_init(&ioat_chan->cleanup_lock); + ioat_chan->dma_chan.device = dma; + dma_cookie_init(&ioat_chan->dma_chan); + list_add_tail(&ioat_chan->dma_chan.device_node, &dma->channels); + ioat_dma->idx[idx] = ioat_chan; + init_timer(&ioat_chan->timer); + ioat_chan->timer.function = ioat_timer_event; + ioat_chan->timer.data = data; + tasklet_init(&ioat_chan->cleanup_task, ioat_cleanup_event, data); +} + +#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */ +static int ioat_xor_val_self_test(struct ioatdma_device *ioat_dma) +{ + int i, src_idx; + struct page *dest; + struct page *xor_srcs[IOAT_NUM_SRC_TEST]; + struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1]; + dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1]; + dma_addr_t dest_dma; + struct dma_async_tx_descriptor *tx; + struct dma_chan *dma_chan; + dma_cookie_t cookie; + u8 cmp_byte = 0; + u32 cmp_word; + u32 xor_val_result; + int err = 0; + struct completion cmp; + unsigned long tmo; + struct device *dev = &ioat_dma->pdev->dev; + struct dma_device *dma = &ioat_dma->dma_dev; + u8 op = 0; + + dev_dbg(dev, "%s\n", __func__); + + if (!dma_has_cap(DMA_XOR, dma->cap_mask)) + return 0; + + for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { + xor_srcs[src_idx] = alloc_page(GFP_KERNEL); + if (!xor_srcs[src_idx]) { + while (src_idx--) + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + } + + dest = alloc_page(GFP_KERNEL); + if (!dest) { + while (src_idx--) + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + + /* Fill in src buffers */ + for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { + u8 *ptr = page_address(xor_srcs[src_idx]); + + for (i = 0; i < PAGE_SIZE; i++) + ptr[i] = (1 << src_idx); + } + + for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) + cmp_byte ^= (u8) (1 << src_idx); + + cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | + (cmp_byte << 8) | cmp_byte; + + memset(page_address(dest), 0, PAGE_SIZE); + + dma_chan = container_of(dma->channels.next, struct dma_chan, + device_node); + if (dma->device_alloc_chan_resources(dma_chan) < 1) { + err = -ENODEV; + goto out; + } + + /* test xor */ + op = IOAT_OP_XOR; + + dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(dev, dest_dma)) + goto dma_unmap; + + for (i = 0; i < IOAT_NUM_SRC_TEST; i++) + dma_srcs[i] = DMA_ERROR_CODE; + for (i = 0; i < IOAT_NUM_SRC_TEST; i++) { + dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_srcs[i])) + goto dma_unmap; + } + tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs, + IOAT_NUM_SRC_TEST, PAGE_SIZE, + DMA_PREP_INTERRUPT); + + if (!tx) { + dev_err(dev, "Self-test xor prep failed\n"); + err = -ENODEV; + goto dma_unmap; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test xor setup failed\n"); + err = -ENODEV; + goto dma_unmap; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (tmo == 0 || + dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { + dev_err(dev, "Self-test xor timed out\n"); + err = -ENODEV; + goto dma_unmap; + } + + for (i = 0; i < IOAT_NUM_SRC_TEST; i++) + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); + + dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); + for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) { + u32 *ptr = page_address(dest); + + if (ptr[i] != cmp_word) { + dev_err(dev, "Self-test xor failed compare\n"); + err = -ENODEV; + goto free_resources; + } + } + dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); + + dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); + + /* skip validate if the capability is not present */ + if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask)) + goto free_resources; + + op = IOAT_OP_XOR_VAL; + + /* validate the sources with the destintation page */ + for (i = 0; i < IOAT_NUM_SRC_TEST; i++) + xor_val_srcs[i] = xor_srcs[i]; + xor_val_srcs[i] = dest; + + xor_val_result = 1; + + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) + dma_srcs[i] = DMA_ERROR_CODE; + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) { + dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_srcs[i])) + goto dma_unmap; + } + tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, + IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, + &xor_val_result, DMA_PREP_INTERRUPT); + if (!tx) { + dev_err(dev, "Self-test zero prep failed\n"); + err = -ENODEV; + goto dma_unmap; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test zero setup failed\n"); + err = -ENODEV; + goto dma_unmap; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (tmo == 0 || + dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { + dev_err(dev, "Self-test validate timed out\n"); + err = -ENODEV; + goto dma_unmap; + } + + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); + + if (xor_val_result != 0) { + dev_err(dev, "Self-test validate failed compare\n"); + err = -ENODEV; + goto free_resources; + } + + memset(page_address(dest), 0, PAGE_SIZE); + + /* test for non-zero parity sum */ + op = IOAT_OP_XOR_VAL; + + xor_val_result = 0; + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) + dma_srcs[i] = DMA_ERROR_CODE; + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) { + dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_srcs[i])) + goto dma_unmap; + } + tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, + IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, + &xor_val_result, DMA_PREP_INTERRUPT); + if (!tx) { + dev_err(dev, "Self-test 2nd zero prep failed\n"); + err = -ENODEV; + goto dma_unmap; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test 2nd zero setup failed\n"); + err = -ENODEV; + goto dma_unmap; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (tmo == 0 || + dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { + dev_err(dev, "Self-test 2nd validate timed out\n"); + err = -ENODEV; + goto dma_unmap; + } + + if (xor_val_result != SUM_CHECK_P_RESULT) { + dev_err(dev, "Self-test validate failed compare\n"); + err = -ENODEV; + goto dma_unmap; + } + + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); + + goto free_resources; +dma_unmap: + if (op == IOAT_OP_XOR) { + if (dest_dma != DMA_ERROR_CODE) + dma_unmap_page(dev, dest_dma, PAGE_SIZE, + DMA_FROM_DEVICE); + for (i = 0; i < IOAT_NUM_SRC_TEST; i++) + if (dma_srcs[i] != DMA_ERROR_CODE) + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, + DMA_TO_DEVICE); + } else if (op == IOAT_OP_XOR_VAL) { + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) + if (dma_srcs[i] != DMA_ERROR_CODE) + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, + DMA_TO_DEVICE); + } +free_resources: + dma->device_free_chan_resources(dma_chan); +out: + src_idx = IOAT_NUM_SRC_TEST; + while (src_idx--) + __free_page(xor_srcs[src_idx]); + __free_page(dest); + return err; +} + +static int ioat3_dma_self_test(struct ioatdma_device *ioat_dma) +{ + int rc; + + rc = ioat_dma_self_test(ioat_dma); + if (rc) + return rc; + + rc = ioat_xor_val_self_test(ioat_dma); + + return rc; +} + +static void ioat_intr_quirk(struct ioatdma_device *ioat_dma) +{ + struct dma_device *dma; + struct dma_chan *c; + struct ioatdma_chan *ioat_chan; + u32 errmask; + + dma = &ioat_dma->dma_dev; + + /* + * if we have descriptor write back error status, we mask the + * error interrupts + */ + if (ioat_dma->cap & IOAT_CAP_DWBES) { + list_for_each_entry(c, &dma->channels, device_node) { + ioat_chan = to_ioat_chan(c); + errmask = readl(ioat_chan->reg_base + + IOAT_CHANERR_MASK_OFFSET); + errmask |= IOAT_CHANERR_XOR_P_OR_CRC_ERR | + IOAT_CHANERR_XOR_Q_ERR; + writel(errmask, ioat_chan->reg_base + + IOAT_CHANERR_MASK_OFFSET); + } + } +} + +static int ioat3_dma_probe(struct ioatdma_device *ioat_dma, int dca) +{ + struct pci_dev *pdev = ioat_dma->pdev; + int dca_en = system_has_dca_enabled(pdev); + struct dma_device *dma; + struct dma_chan *c; + struct ioatdma_chan *ioat_chan; + bool is_raid_device = false; + int err; + + dma = &ioat_dma->dma_dev; + dma->device_prep_dma_memcpy = ioat_dma_prep_memcpy_lock; + dma->device_issue_pending = ioat_issue_pending; + dma->device_alloc_chan_resources = ioat_alloc_chan_resources; + dma->device_free_chan_resources = ioat_free_chan_resources; + + dma_cap_set(DMA_INTERRUPT, dma->cap_mask); + dma->device_prep_dma_interrupt = ioat_prep_interrupt_lock; + + ioat_dma->cap = readl(ioat_dma->reg_base + IOAT_DMA_CAP_OFFSET); + + if (is_xeon_cb32(pdev) || is_bwd_noraid(pdev)) + ioat_dma->cap &= + ~(IOAT_CAP_XOR | IOAT_CAP_PQ | IOAT_CAP_RAID16SS); + + /* dca is incompatible with raid operations */ + if (dca_en && (ioat_dma->cap & (IOAT_CAP_XOR|IOAT_CAP_PQ))) + ioat_dma->cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ); + + if (ioat_dma->cap & IOAT_CAP_XOR) { + is_raid_device = true; + dma->max_xor = 8; + + dma_cap_set(DMA_XOR, dma->cap_mask); + dma->device_prep_dma_xor = ioat_prep_xor; + + dma_cap_set(DMA_XOR_VAL, dma->cap_mask); + dma->device_prep_dma_xor_val = ioat_prep_xor_val; + } + + if (ioat_dma->cap & IOAT_CAP_PQ) { + is_raid_device = true; + + dma->device_prep_dma_pq = ioat_prep_pq; + dma->device_prep_dma_pq_val = ioat_prep_pq_val; + dma_cap_set(DMA_PQ, dma->cap_mask); + dma_cap_set(DMA_PQ_VAL, dma->cap_mask); + + if (ioat_dma->cap & IOAT_CAP_RAID16SS) + dma_set_maxpq(dma, 16, 0); + else + dma_set_maxpq(dma, 8, 0); + + if (!(ioat_dma->cap & IOAT_CAP_XOR)) { + dma->device_prep_dma_xor = ioat_prep_pqxor; + dma->device_prep_dma_xor_val = ioat_prep_pqxor_val; + dma_cap_set(DMA_XOR, dma->cap_mask); + dma_cap_set(DMA_XOR_VAL, dma->cap_mask); + + if (ioat_dma->cap & IOAT_CAP_RAID16SS) + dma->max_xor = 16; + else + dma->max_xor = 8; + } + } + + dma->device_tx_status = ioat_tx_status; + + /* starting with CB3.3 super extended descriptors are supported */ + if (ioat_dma->cap & IOAT_CAP_RAID16SS) { + char pool_name[14]; + int i; + + for (i = 0; i < MAX_SED_POOLS; i++) { + snprintf(pool_name, 14, "ioat_hw%d_sed", i); + + /* allocate SED DMA pool */ + ioat_dma->sed_hw_pool[i] = dmam_pool_create(pool_name, + &pdev->dev, + SED_SIZE * (i + 1), 64, 0); + if (!ioat_dma->sed_hw_pool[i]) + return -ENOMEM; + + } + } + + if (!(ioat_dma->cap & (IOAT_CAP_XOR | IOAT_CAP_PQ))) + dma_cap_set(DMA_PRIVATE, dma->cap_mask); + + err = ioat_probe(ioat_dma); + if (err) + return err; + + list_for_each_entry(c, &dma->channels, device_node) { + ioat_chan = to_ioat_chan(c); + writel(IOAT_DMA_DCA_ANY_CPU, + ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); + } + + err = ioat_register(ioat_dma); + if (err) + return err; + + ioat_kobject_add(ioat_dma, &ioat_ktype); + + if (dca) + ioat_dma->dca = ioat_dca_init(pdev, ioat_dma->reg_base); + + return 0; +} + +static void ioat_shutdown(struct pci_dev *pdev) +{ + struct ioatdma_device *ioat_dma = pci_get_drvdata(pdev); + struct ioatdma_chan *ioat_chan; + int i; + + if (!ioat_dma) + return; + + for (i = 0; i < IOAT_MAX_CHANS; i++) { + ioat_chan = ioat_dma->idx[i]; + if (!ioat_chan) + continue; + + spin_lock_bh(&ioat_chan->prep_lock); + set_bit(IOAT_CHAN_DOWN, &ioat_chan->state); + del_timer_sync(&ioat_chan->timer); + spin_unlock_bh(&ioat_chan->prep_lock); + /* this should quiesce then reset */ + ioat_reset_hw(ioat_chan); + } + + ioat_disable_interrupts(ioat_dma); +} + +void ioat_resume(struct ioatdma_device *ioat_dma) +{ + struct ioatdma_chan *ioat_chan; + u32 chanerr; + int i; + + for (i = 0; i < IOAT_MAX_CHANS; i++) { + ioat_chan = ioat_dma->idx[i]; + if (!ioat_chan) + continue; + + spin_lock_bh(&ioat_chan->prep_lock); + clear_bit(IOAT_CHAN_DOWN, &ioat_chan->state); + spin_unlock_bh(&ioat_chan->prep_lock); + + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + + /* no need to reset as shutdown already did that */ + } +} + +#define DRV_NAME "ioatdma" + +static pci_ers_result_t ioat_pcie_error_detected(struct pci_dev *pdev, + enum pci_channel_state error) +{ + dev_dbg(&pdev->dev, "%s: PCIe AER error %d\n", DRV_NAME, error); + + /* quiesce and block I/O */ + ioat_shutdown(pdev); + + return PCI_ERS_RESULT_NEED_RESET; +} + +static pci_ers_result_t ioat_pcie_error_slot_reset(struct pci_dev *pdev) +{ + pci_ers_result_t result = PCI_ERS_RESULT_RECOVERED; + int err; + + dev_dbg(&pdev->dev, "%s post reset handling\n", DRV_NAME); + + if (pci_enable_device_mem(pdev) < 0) { + dev_err(&pdev->dev, + "Failed to enable PCIe device after reset.\n"); + result = PCI_ERS_RESULT_DISCONNECT; + } else { + pci_set_master(pdev); + pci_restore_state(pdev); + pci_save_state(pdev); + pci_wake_from_d3(pdev, false); + } + + err = pci_cleanup_aer_uncorrect_error_status(pdev); + if (err) { + dev_err(&pdev->dev, + "AER uncorrect error status clear failed: %#x\n", err); + } + + return result; +} + +static void ioat_pcie_error_resume(struct pci_dev *pdev) +{ + struct ioatdma_device *ioat_dma = pci_get_drvdata(pdev); + + dev_dbg(&pdev->dev, "%s: AER handling resuming\n", DRV_NAME); + + /* initialize and bring everything back */ + ioat_resume(ioat_dma); +} + +static const struct pci_error_handlers ioat_err_handler = { + .error_detected = ioat_pcie_error_detected, + .slot_reset = ioat_pcie_error_slot_reset, + .resume = ioat_pcie_error_resume, +}; + +static struct pci_driver ioat_pci_driver = { + .name = DRV_NAME, + .id_table = ioat_pci_tbl, + .probe = ioat_pci_probe, + .remove = ioat_remove, + .shutdown = ioat_shutdown, + .err_handler = &ioat_err_handler, +}; + +static struct ioatdma_device * +alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase) +{ + struct device *dev = &pdev->dev; + struct ioatdma_device *d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL); + + if (!d) + return NULL; + d->pdev = pdev; + d->reg_base = iobase; + return d; +} + +static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + void __iomem * const *iomap; + struct device *dev = &pdev->dev; + struct ioatdma_device *device; + int err; + + err = pcim_enable_device(pdev); + if (err) + return err; + + err = pcim_iomap_regions(pdev, 1 << IOAT_MMIO_BAR, DRV_NAME); + if (err) + return err; + iomap = pcim_iomap_table(pdev); + if (!iomap) + return -ENOMEM; + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) + return err; + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) + return err; + + device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]); + if (!device) + return -ENOMEM; + pci_set_master(pdev); + pci_set_drvdata(pdev, device); + + device->version = readb(device->reg_base + IOAT_VER_OFFSET); + if (device->version >= IOAT_VER_3_0) { + err = ioat3_dma_probe(device, ioat_dca_enabled); + + if (device->version >= IOAT_VER_3_3) + pci_enable_pcie_error_reporting(pdev); + } else + return -ENODEV; + + if (err) { + dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n"); + pci_disable_pcie_error_reporting(pdev); + return -ENODEV; + } + + return 0; +} + +static void ioat_remove(struct pci_dev *pdev) +{ + struct ioatdma_device *device = pci_get_drvdata(pdev); + + if (!device) + return; + + dev_err(&pdev->dev, "Removing dma and dca services\n"); + if (device->dca) { + unregister_dca_provider(device->dca, &pdev->dev); + free_dca_provider(device->dca); + device->dca = NULL; + } + + pci_disable_pcie_error_reporting(pdev); + ioat_dma_remove(device); +} + +static int __init ioat_init_module(void) +{ + int err = -ENOMEM; + + pr_info("%s: Intel(R) QuickData Technology Driver %s\n", + DRV_NAME, IOAT_DMA_VERSION); + + ioat_cache = kmem_cache_create("ioat", sizeof(struct ioat_ring_ent), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!ioat_cache) + return -ENOMEM; + + ioat_sed_cache = KMEM_CACHE(ioat_sed_ent, 0); + if (!ioat_sed_cache) + goto err_ioat_cache; + + err = pci_register_driver(&ioat_pci_driver); + if (err) + goto err_ioat3_cache; + + return 0; + + err_ioat3_cache: + kmem_cache_destroy(ioat_sed_cache); + + err_ioat_cache: + kmem_cache_destroy(ioat_cache); + + return err; +} +module_init(ioat_init_module); + +static void __exit ioat_exit_module(void) +{ + pci_unregister_driver(&ioat_pci_driver); + kmem_cache_destroy(ioat_cache); +} +module_exit(ioat_exit_module); diff --git a/kernel/drivers/dma/ioat/pci.c b/kernel/drivers/dma/ioat/pci.c deleted file mode 100644 index 76f0dc688..000000000 --- a/kernel/drivers/dma/ioat/pci.c +++ /dev/null @@ -1,258 +0,0 @@ -/* - * Intel I/OAT DMA Linux driver - * Copyright(c) 2007 - 2009 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - */ - -/* - * This driver supports an Intel I/OAT DMA engine, which does asynchronous - * copy operations. - */ - -#include <linux/init.h> -#include <linux/module.h> -#include <linux/pci.h> -#include <linux/interrupt.h> -#include <linux/dca.h> -#include <linux/slab.h> -#include "dma.h" -#include "dma_v2.h" -#include "registers.h" -#include "hw.h" - -MODULE_VERSION(IOAT_DMA_VERSION); -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_AUTHOR("Intel Corporation"); - -static struct pci_device_id ioat_pci_tbl[] = { - /* I/OAT v1 platforms */ - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) }, - { PCI_VDEVICE(UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) }, - - /* I/OAT v2 platforms */ - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) }, - - /* I/OAT v3 platforms */ - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) }, - - /* I/OAT v3.2 platforms */ - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) }, - - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB0) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB1) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB2) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB3) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB4) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB5) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB6) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB7) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB8) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB9) }, - - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB0) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB1) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB2) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB3) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB4) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB5) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB6) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB7) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB8) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB9) }, - - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW0) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW1) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW2) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW3) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW4) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW5) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW6) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW7) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW8) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW9) }, - - /* I/OAT v3.3 platforms */ - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD0) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD1) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD2) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD3) }, - - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE0) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE1) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE2) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE3) }, - - { 0, } -}; -MODULE_DEVICE_TABLE(pci, ioat_pci_tbl); - -static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id); -static void ioat_remove(struct pci_dev *pdev); - -static int ioat_dca_enabled = 1; -module_param(ioat_dca_enabled, int, 0644); -MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)"); - -struct kmem_cache *ioat2_cache; -struct kmem_cache *ioat3_sed_cache; - -#define DRV_NAME "ioatdma" - -static struct pci_driver ioat_pci_driver = { - .name = DRV_NAME, - .id_table = ioat_pci_tbl, - .probe = ioat_pci_probe, - .remove = ioat_remove, -}; - -static struct ioatdma_device * -alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase) -{ - struct device *dev = &pdev->dev; - struct ioatdma_device *d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL); - - if (!d) - return NULL; - d->pdev = pdev; - d->reg_base = iobase; - return d; -} - -static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) -{ - void __iomem * const *iomap; - struct device *dev = &pdev->dev; - struct ioatdma_device *device; - int err; - - err = pcim_enable_device(pdev); - if (err) - return err; - - err = pcim_iomap_regions(pdev, 1 << IOAT_MMIO_BAR, DRV_NAME); - if (err) - return err; - iomap = pcim_iomap_table(pdev); - if (!iomap) - return -ENOMEM; - - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); - if (err) - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); - if (err) - return err; - - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (err) - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - if (err) - return err; - - device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]); - if (!device) - return -ENOMEM; - pci_set_master(pdev); - pci_set_drvdata(pdev, device); - - device->version = readb(device->reg_base + IOAT_VER_OFFSET); - if (device->version == IOAT_VER_1_2) - err = ioat1_dma_probe(device, ioat_dca_enabled); - else if (device->version == IOAT_VER_2_0) - err = ioat2_dma_probe(device, ioat_dca_enabled); - else if (device->version >= IOAT_VER_3_0) - err = ioat3_dma_probe(device, ioat_dca_enabled); - else - return -ENODEV; - - if (err) { - dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n"); - return -ENODEV; - } - - return 0; -} - -static void ioat_remove(struct pci_dev *pdev) -{ - struct ioatdma_device *device = pci_get_drvdata(pdev); - - if (!device) - return; - - dev_err(&pdev->dev, "Removing dma and dca services\n"); - if (device->dca) { - unregister_dca_provider(device->dca, &pdev->dev); - free_dca_provider(device->dca); - device->dca = NULL; - } - ioat_dma_remove(device); -} - -static int __init ioat_init_module(void) -{ - int err = -ENOMEM; - - pr_info("%s: Intel(R) QuickData Technology Driver %s\n", - DRV_NAME, IOAT_DMA_VERSION); - - ioat2_cache = kmem_cache_create("ioat2", sizeof(struct ioat_ring_ent), - 0, SLAB_HWCACHE_ALIGN, NULL); - if (!ioat2_cache) - return -ENOMEM; - - ioat3_sed_cache = KMEM_CACHE(ioat_sed_ent, 0); - if (!ioat3_sed_cache) - goto err_ioat2_cache; - - err = pci_register_driver(&ioat_pci_driver); - if (err) - goto err_ioat3_cache; - - return 0; - - err_ioat3_cache: - kmem_cache_destroy(ioat3_sed_cache); - - err_ioat2_cache: - kmem_cache_destroy(ioat2_cache); - - return err; -} -module_init(ioat_init_module); - -static void __exit ioat_exit_module(void) -{ - pci_unregister_driver(&ioat_pci_driver); - kmem_cache_destroy(ioat2_cache); -} -module_exit(ioat_exit_module); diff --git a/kernel/drivers/dma/ioat/prep.c b/kernel/drivers/dma/ioat/prep.c new file mode 100644 index 000000000..6bb4a13a8 --- /dev/null +++ b/kernel/drivers/dma/ioat/prep.c @@ -0,0 +1,749 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2004 - 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/gfp.h> +#include <linux/dmaengine.h> +#include <linux/dma-mapping.h> +#include <linux/prefetch.h> +#include "../dmaengine.h" +#include "registers.h" +#include "hw.h" +#include "dma.h" + +#define MAX_SCF 1024 + +/* provide a lookup table for setting the source address in the base or + * extended descriptor of an xor or pq descriptor + */ +static const u8 xor_idx_to_desc = 0xe0; +static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 }; +static const u8 pq_idx_to_desc = 0xf8; +static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2 }; +static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 }; +static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7, + 0, 1, 2, 3, 4, 5, 6 }; + +static void xor_set_src(struct ioat_raw_descriptor *descs[2], + dma_addr_t addr, u32 offset, int idx) +{ + struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; + + raw->field[xor_idx_to_field[idx]] = addr + offset; +} + +static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx) +{ + struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; + + return raw->field[pq_idx_to_field[idx]]; +} + +static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx) +{ + struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; + + return raw->field[pq16_idx_to_field[idx]]; +} + +static void pq_set_src(struct ioat_raw_descriptor *descs[2], + dma_addr_t addr, u32 offset, u8 coef, int idx) +{ + struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0]; + struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; + + raw->field[pq_idx_to_field[idx]] = addr + offset; + pq->coef[idx] = coef; +} + +static void pq16_set_src(struct ioat_raw_descriptor *desc[3], + dma_addr_t addr, u32 offset, u8 coef, unsigned idx) +{ + struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0]; + struct ioat_pq16a_descriptor *pq16 = + (struct ioat_pq16a_descriptor *)desc[1]; + struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; + + raw->field[pq16_idx_to_field[idx]] = addr + offset; + + if (idx < 8) + pq->coef[idx] = coef; + else + pq16->coef[idx - 8] = coef; +} + +static struct ioat_sed_ent * +ioat3_alloc_sed(struct ioatdma_device *ioat_dma, unsigned int hw_pool) +{ + struct ioat_sed_ent *sed; + gfp_t flags = __GFP_ZERO | GFP_ATOMIC; + + sed = kmem_cache_alloc(ioat_sed_cache, flags); + if (!sed) + return NULL; + + sed->hw_pool = hw_pool; + sed->hw = dma_pool_alloc(ioat_dma->sed_hw_pool[hw_pool], + flags, &sed->dma); + if (!sed->hw) { + kmem_cache_free(ioat_sed_cache, sed); + return NULL; + } + + return sed; +} + +struct dma_async_tx_descriptor * +ioat_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioat_dma_descriptor *hw; + struct ioat_ring_ent *desc; + dma_addr_t dst = dma_dest; + dma_addr_t src = dma_src; + size_t total_len = len; + int num_descs, idx, i; + + if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) + return NULL; + + num_descs = ioat_xferlen_to_descs(ioat_chan, len); + if (likely(num_descs) && + ioat_check_space_lock(ioat_chan, num_descs) == 0) + idx = ioat_chan->head; + else + return NULL; + i = 0; + do { + size_t copy = min_t(size_t, len, 1 << ioat_chan->xfercap_log); + + desc = ioat_get_ring_ent(ioat_chan, idx + i); + hw = desc->hw; + + hw->size = copy; + hw->ctl = 0; + hw->src_addr = src; + hw->dst_addr = dst; + + len -= copy; + dst += copy; + src += copy; + dump_desc_dbg(ioat_chan, desc); + } while (++i < num_descs); + + desc->txd.flags = flags; + desc->len = total_len; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + hw->ctl_f.compl_write = 1; + dump_desc_dbg(ioat_chan, desc); + /* we leave the channel locked to ensure in order submission */ + + return &desc->txd; +} + + +static struct dma_async_tx_descriptor * +__ioat_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, + dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, + size_t len, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioat_ring_ent *compl_desc; + struct ioat_ring_ent *desc; + struct ioat_ring_ent *ext; + size_t total_len = len; + struct ioat_xor_descriptor *xor; + struct ioat_xor_ext_descriptor *xor_ex = NULL; + struct ioat_dma_descriptor *hw; + int num_descs, with_ext, idx, i; + u32 offset = 0; + u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR; + + BUG_ON(src_cnt < 2); + + num_descs = ioat_xferlen_to_descs(ioat_chan, len); + /* we need 2x the number of descriptors to cover greater than 5 + * sources + */ + if (src_cnt > 5) { + with_ext = 1; + num_descs *= 2; + } else + with_ext = 0; + + /* completion writes from the raid engine may pass completion + * writes from the legacy engine, so we need one extra null + * (legacy) descriptor to ensure all completion writes arrive in + * order. + */ + if (likely(num_descs) && + ioat_check_space_lock(ioat_chan, num_descs+1) == 0) + idx = ioat_chan->head; + else + return NULL; + i = 0; + do { + struct ioat_raw_descriptor *descs[2]; + size_t xfer_size = min_t(size_t, + len, 1 << ioat_chan->xfercap_log); + int s; + + desc = ioat_get_ring_ent(ioat_chan, idx + i); + xor = desc->xor; + + /* save a branch by unconditionally retrieving the + * extended descriptor xor_set_src() knows to not write + * to it in the single descriptor case + */ + ext = ioat_get_ring_ent(ioat_chan, idx + i + 1); + xor_ex = ext->xor_ex; + + descs[0] = (struct ioat_raw_descriptor *) xor; + descs[1] = (struct ioat_raw_descriptor *) xor_ex; + for (s = 0; s < src_cnt; s++) + xor_set_src(descs, src[s], offset, s); + xor->size = xfer_size; + xor->dst_addr = dest + offset; + xor->ctl = 0; + xor->ctl_f.op = op; + xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt); + + len -= xfer_size; + offset += xfer_size; + dump_desc_dbg(ioat_chan, desc); + } while ((i += 1 + with_ext) < num_descs); + + /* last xor descriptor carries the unmap parameters and fence bit */ + desc->txd.flags = flags; + desc->len = total_len; + if (result) + desc->result = result; + xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + + /* completion descriptor carries interrupt bit */ + compl_desc = ioat_get_ring_ent(ioat_chan, idx + i); + compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; + hw = compl_desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + dump_desc_dbg(ioat_chan, compl_desc); + + /* we leave the channel locked to ensure in order submission */ + return &compl_desc->txd; +} + +struct dma_async_tx_descriptor * +ioat_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); + + if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) + return NULL; + + return __ioat_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags); +} + +struct dma_async_tx_descriptor * +ioat_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, + unsigned int src_cnt, size_t len, + enum sum_check_flags *result, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); + + if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) + return NULL; + + /* the cleanup routine only sets bits on validate failure, it + * does not clear bits on validate success... so clear it here + */ + *result = 0; + + return __ioat_prep_xor_lock(chan, result, src[0], &src[1], + src_cnt - 1, len, flags); +} + +static void +dump_pq_desc_dbg(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc, + struct ioat_ring_ent *ext) +{ + struct device *dev = to_dev(ioat_chan); + struct ioat_pq_descriptor *pq = desc->pq; + struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL; + struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex }; + int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); + int i; + + dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" + " sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" + " src_cnt: %d)\n", + desc_id(desc), (unsigned long long) desc->txd.phys, + (unsigned long long) (pq_ex ? pq_ex->next : pq->next), + desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, + pq->ctl_f.int_en, pq->ctl_f.compl_write, + pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", + pq->ctl_f.src_cnt); + for (i = 0; i < src_cnt; i++) + dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, + (unsigned long long) pq_get_src(descs, i), pq->coef[i]); + dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); + dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); + dev_dbg(dev, "\tNEXT: %#llx\n", pq->next); +} + +static void dump_pq16_desc_dbg(struct ioatdma_chan *ioat_chan, + struct ioat_ring_ent *desc) +{ + struct device *dev = to_dev(ioat_chan); + struct ioat_pq_descriptor *pq = desc->pq; + struct ioat_raw_descriptor *descs[] = { (void *)pq, + (void *)pq, + (void *)pq }; + int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt); + int i; + + if (desc->sed) { + descs[1] = (void *)desc->sed->hw; + descs[2] = (void *)desc->sed->hw + 64; + } + + dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" + " sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" + " src_cnt: %d)\n", + desc_id(desc), (unsigned long long) desc->txd.phys, + (unsigned long long) pq->next, + desc->txd.flags, pq->size, pq->ctl, + pq->ctl_f.op, pq->ctl_f.int_en, + pq->ctl_f.compl_write, + pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", + pq->ctl_f.src_cnt); + for (i = 0; i < src_cnt; i++) { + dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, + (unsigned long long) pq16_get_src(descs, i), + pq->coef[i]); + } + dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); + dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); +} + +static struct dma_async_tx_descriptor * +__ioat_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, + const dma_addr_t *dst, const dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; + struct ioat_ring_ent *compl_desc; + struct ioat_ring_ent *desc; + struct ioat_ring_ent *ext; + size_t total_len = len; + struct ioat_pq_descriptor *pq; + struct ioat_pq_ext_descriptor *pq_ex = NULL; + struct ioat_dma_descriptor *hw; + u32 offset = 0; + u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; + int i, s, idx, with_ext, num_descs; + int cb32 = (ioat_dma->version < IOAT_VER_3_3) ? 1 : 0; + + dev_dbg(to_dev(ioat_chan), "%s\n", __func__); + /* the engine requires at least two sources (we provide + * at least 1 implied source in the DMA_PREP_CONTINUE case) + */ + BUG_ON(src_cnt + dmaf_continue(flags) < 2); + + num_descs = ioat_xferlen_to_descs(ioat_chan, len); + /* we need 2x the number of descriptors to cover greater than 3 + * sources (we need 1 extra source in the q-only continuation + * case and 3 extra sources in the p+q continuation case. + */ + if (src_cnt + dmaf_p_disabled_continue(flags) > 3 || + (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) { + with_ext = 1; + num_descs *= 2; + } else + with_ext = 0; + + /* completion writes from the raid engine may pass completion + * writes from the legacy engine, so we need one extra null + * (legacy) descriptor to ensure all completion writes arrive in + * order. + */ + if (likely(num_descs) && + ioat_check_space_lock(ioat_chan, num_descs + cb32) == 0) + idx = ioat_chan->head; + else + return NULL; + i = 0; + do { + struct ioat_raw_descriptor *descs[2]; + size_t xfer_size = min_t(size_t, len, + 1 << ioat_chan->xfercap_log); + + desc = ioat_get_ring_ent(ioat_chan, idx + i); + pq = desc->pq; + + /* save a branch by unconditionally retrieving the + * extended descriptor pq_set_src() knows to not write + * to it in the single descriptor case + */ + ext = ioat_get_ring_ent(ioat_chan, idx + i + with_ext); + pq_ex = ext->pq_ex; + + descs[0] = (struct ioat_raw_descriptor *) pq; + descs[1] = (struct ioat_raw_descriptor *) pq_ex; + + for (s = 0; s < src_cnt; s++) + pq_set_src(descs, src[s], offset, scf[s], s); + + /* see the comment for dma_maxpq in include/linux/dmaengine.h */ + if (dmaf_p_disabled_continue(flags)) + pq_set_src(descs, dst[1], offset, 1, s++); + else if (dmaf_continue(flags)) { + pq_set_src(descs, dst[0], offset, 0, s++); + pq_set_src(descs, dst[1], offset, 1, s++); + pq_set_src(descs, dst[1], offset, 0, s++); + } + pq->size = xfer_size; + pq->p_addr = dst[0] + offset; + pq->q_addr = dst[1] + offset; + pq->ctl = 0; + pq->ctl_f.op = op; + /* we turn on descriptor write back error status */ + if (ioat_dma->cap & IOAT_CAP_DWBES) + pq->ctl_f.wb_en = result ? 1 : 0; + pq->ctl_f.src_cnt = src_cnt_to_hw(s); + pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); + pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); + + len -= xfer_size; + offset += xfer_size; + } while ((i += 1 + with_ext) < num_descs); + + /* last pq descriptor carries the unmap parameters and fence bit */ + desc->txd.flags = flags; + desc->len = total_len; + if (result) + desc->result = result; + pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + dump_pq_desc_dbg(ioat_chan, desc, ext); + + if (!cb32) { + pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + pq->ctl_f.compl_write = 1; + compl_desc = desc; + } else { + /* completion descriptor carries interrupt bit */ + compl_desc = ioat_get_ring_ent(ioat_chan, idx + i); + compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; + hw = compl_desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + dump_desc_dbg(ioat_chan, compl_desc); + } + + + /* we leave the channel locked to ensure in order submission */ + return &compl_desc->txd; +} + +static struct dma_async_tx_descriptor * +__ioat_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result, + const dma_addr_t *dst, const dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; + struct ioat_ring_ent *desc; + size_t total_len = len; + struct ioat_pq_descriptor *pq; + u32 offset = 0; + u8 op; + int i, s, idx, num_descs; + + /* this function is only called with 9-16 sources */ + op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S; + + dev_dbg(to_dev(ioat_chan), "%s\n", __func__); + + num_descs = ioat_xferlen_to_descs(ioat_chan, len); + + /* + * 16 source pq is only available on cb3.3 and has no completion + * write hw bug. + */ + if (num_descs && ioat_check_space_lock(ioat_chan, num_descs) == 0) + idx = ioat_chan->head; + else + return NULL; + + i = 0; + + do { + struct ioat_raw_descriptor *descs[4]; + size_t xfer_size = min_t(size_t, len, + 1 << ioat_chan->xfercap_log); + + desc = ioat_get_ring_ent(ioat_chan, idx + i); + pq = desc->pq; + + descs[0] = (struct ioat_raw_descriptor *) pq; + + desc->sed = ioat3_alloc_sed(ioat_dma, (src_cnt-2) >> 3); + if (!desc->sed) { + dev_err(to_dev(ioat_chan), + "%s: no free sed entries\n", __func__); + return NULL; + } + + pq->sed_addr = desc->sed->dma; + desc->sed->parent = desc; + + descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw; + descs[2] = (void *)descs[1] + 64; + + for (s = 0; s < src_cnt; s++) + pq16_set_src(descs, src[s], offset, scf[s], s); + + /* see the comment for dma_maxpq in include/linux/dmaengine.h */ + if (dmaf_p_disabled_continue(flags)) + pq16_set_src(descs, dst[1], offset, 1, s++); + else if (dmaf_continue(flags)) { + pq16_set_src(descs, dst[0], offset, 0, s++); + pq16_set_src(descs, dst[1], offset, 1, s++); + pq16_set_src(descs, dst[1], offset, 0, s++); + } + + pq->size = xfer_size; + pq->p_addr = dst[0] + offset; + pq->q_addr = dst[1] + offset; + pq->ctl = 0; + pq->ctl_f.op = op; + pq->ctl_f.src_cnt = src16_cnt_to_hw(s); + /* we turn on descriptor write back error status */ + if (ioat_dma->cap & IOAT_CAP_DWBES) + pq->ctl_f.wb_en = result ? 1 : 0; + pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); + pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); + + len -= xfer_size; + offset += xfer_size; + } while (++i < num_descs); + + /* last pq descriptor carries the unmap parameters and fence bit */ + desc->txd.flags = flags; + desc->len = total_len; + if (result) + desc->result = result; + pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + + /* with cb3.3 we should be able to do completion w/o a null desc */ + pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + pq->ctl_f.compl_write = 1; + + dump_pq16_desc_dbg(ioat_chan, desc); + + /* we leave the channel locked to ensure in order submission */ + return &desc->txd; +} + +static int src_cnt_flags(unsigned int src_cnt, unsigned long flags) +{ + if (dmaf_p_disabled_continue(flags)) + return src_cnt + 1; + else if (dmaf_continue(flags)) + return src_cnt + 3; + else + return src_cnt; +} + +struct dma_async_tx_descriptor * +ioat_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); + + if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) + return NULL; + + /* specify valid address for disabled result */ + if (flags & DMA_PREP_PQ_DISABLE_P) + dst[0] = dst[1]; + if (flags & DMA_PREP_PQ_DISABLE_Q) + dst[1] = dst[0]; + + /* handle the single source multiply case from the raid6 + * recovery path + */ + if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) { + dma_addr_t single_source[2]; + unsigned char single_source_coef[2]; + + BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q); + single_source[0] = src[0]; + single_source[1] = src[0]; + single_source_coef[0] = scf[0]; + single_source_coef[1] = 0; + + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat_prep_pq16_lock(chan, NULL, dst, single_source, + 2, single_source_coef, len, + flags) : + __ioat_prep_pq_lock(chan, NULL, dst, single_source, 2, + single_source_coef, len, flags); + + } else { + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat_prep_pq16_lock(chan, NULL, dst, src, src_cnt, + scf, len, flags) : + __ioat_prep_pq_lock(chan, NULL, dst, src, src_cnt, + scf, len, flags); + } +} + +struct dma_async_tx_descriptor * +ioat_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + enum sum_check_flags *pqres, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); + + if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) + return NULL; + + /* specify valid address for disabled result */ + if (flags & DMA_PREP_PQ_DISABLE_P) + pq[0] = pq[1]; + if (flags & DMA_PREP_PQ_DISABLE_Q) + pq[1] = pq[0]; + + /* the cleanup routine only sets bits on validate failure, it + * does not clear bits on validate success... so clear it here + */ + *pqres = 0; + + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len, + flags) : + __ioat_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, + flags); +} + +struct dma_async_tx_descriptor * +ioat_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags) +{ + unsigned char scf[MAX_SCF]; + dma_addr_t pq[2]; + struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); + + if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) + return NULL; + + if (src_cnt > MAX_SCF) + return NULL; + + memset(scf, 0, src_cnt); + pq[0] = dst; + flags |= DMA_PREP_PQ_DISABLE_Q; + pq[1] = dst; /* specify valid address for disabled result */ + + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len, + flags) : + __ioat_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, + flags); +} + +struct dma_async_tx_descriptor * +ioat_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, + unsigned int src_cnt, size_t len, + enum sum_check_flags *result, unsigned long flags) +{ + unsigned char scf[MAX_SCF]; + dma_addr_t pq[2]; + struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); + + if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) + return NULL; + + if (src_cnt > MAX_SCF) + return NULL; + + /* the cleanup routine only sets bits on validate failure, it + * does not clear bits on validate success... so clear it here + */ + *result = 0; + + memset(scf, 0, src_cnt); + pq[0] = src[0]; + flags |= DMA_PREP_PQ_DISABLE_Q; + pq[1] = pq[0]; /* specify valid address for disabled result */ + + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1, + scf, len, flags) : + __ioat_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, + scf, len, flags); +} + +struct dma_async_tx_descriptor * +ioat_prep_interrupt_lock(struct dma_chan *c, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioat_ring_ent *desc; + struct ioat_dma_descriptor *hw; + + if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) + return NULL; + + if (ioat_check_space_lock(ioat_chan, 1) == 0) + desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head); + else + return NULL; + + hw = desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = 1; + hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + hw->src_addr = 0; + hw->dst_addr = 0; + + desc->txd.flags = flags; + desc->len = 1; + + dump_desc_dbg(ioat_chan, desc); + + /* we leave the channel locked to ensure in order submission */ + return &desc->txd; +} + diff --git a/kernel/drivers/dma/ioat/sysfs.c b/kernel/drivers/dma/ioat/sysfs.c new file mode 100644 index 000000000..cb4a857ee --- /dev/null +++ b/kernel/drivers/dma/ioat/sysfs.c @@ -0,0 +1,135 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2004 - 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/dmaengine.h> +#include <linux/pci.h> +#include "dma.h" +#include "registers.h" +#include "hw.h" + +#include "../dmaengine.h" + +static ssize_t cap_show(struct dma_chan *c, char *page) +{ + struct dma_device *dma = c->device; + + return sprintf(page, "copy%s%s%s%s%s\n", + dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "", + dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "", + dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "", + dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "", + dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : ""); + +} +struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap); + +static ssize_t version_show(struct dma_chan *c, char *page) +{ + struct dma_device *dma = c->device; + struct ioatdma_device *ioat_dma = to_ioatdma_device(dma); + + return sprintf(page, "%d.%d\n", + ioat_dma->version >> 4, ioat_dma->version & 0xf); +} +struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version); + +static ssize_t +ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page) +{ + struct ioat_sysfs_entry *entry; + struct ioatdma_chan *ioat_chan; + + entry = container_of(attr, struct ioat_sysfs_entry, attr); + ioat_chan = container_of(kobj, struct ioatdma_chan, kobj); + + if (!entry->show) + return -EIO; + return entry->show(&ioat_chan->dma_chan, page); +} + +const struct sysfs_ops ioat_sysfs_ops = { + .show = ioat_attr_show, +}; + +void ioat_kobject_add(struct ioatdma_device *ioat_dma, struct kobj_type *type) +{ + struct dma_device *dma = &ioat_dma->dma_dev; + struct dma_chan *c; + + list_for_each_entry(c, &dma->channels, device_node) { + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct kobject *parent = &c->dev->device.kobj; + int err; + + err = kobject_init_and_add(&ioat_chan->kobj, type, + parent, "quickdata"); + if (err) { + dev_warn(to_dev(ioat_chan), + "sysfs init error (%d), continuing...\n", err); + kobject_put(&ioat_chan->kobj); + set_bit(IOAT_KOBJ_INIT_FAIL, &ioat_chan->state); + } + } +} + +void ioat_kobject_del(struct ioatdma_device *ioat_dma) +{ + struct dma_device *dma = &ioat_dma->dma_dev; + struct dma_chan *c; + + list_for_each_entry(c, &dma->channels, device_node) { + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + + if (!test_bit(IOAT_KOBJ_INIT_FAIL, &ioat_chan->state)) { + kobject_del(&ioat_chan->kobj); + kobject_put(&ioat_chan->kobj); + } + } +} + +static ssize_t ring_size_show(struct dma_chan *c, char *page) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + + return sprintf(page, "%d\n", (1 << ioat_chan->alloc_order) & ~1); +} +static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size); + +static ssize_t ring_active_show(struct dma_chan *c, char *page) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + + /* ...taken outside the lock, no need to be precise */ + return sprintf(page, "%d\n", ioat_ring_active(ioat_chan)); +} +static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active); + +static struct attribute *ioat_attrs[] = { + &ring_size_attr.attr, + &ring_active_attr.attr, + &ioat_cap_attr.attr, + &ioat_version_attr.attr, + NULL, +}; + +struct kobj_type ioat_ktype = { + .sysfs_ops = &ioat_sysfs_ops, + .default_attrs = ioat_attrs, +}; diff --git a/kernel/drivers/dma/iop-adma.c b/kernel/drivers/dma/iop-adma.c index 998826854..e4f43125e 100644 --- a/kernel/drivers/dma/iop-adma.c +++ b/kernel/drivers/dma/iop-adma.c @@ -1300,10 +1300,11 @@ static int iop_adma_probe(struct platform_device *pdev) * note: writecombine gives slightly better performance, but * requires that we explicitly flush the writes */ - if ((adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev, - plat_data->pool_size, - &adev->dma_desc_pool, - GFP_KERNEL)) == NULL) { + adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev, + plat_data->pool_size, + &adev->dma_desc_pool, + GFP_KERNEL); + if (!adev->dma_desc_pool_virt) { ret = -ENOMEM; goto err_free_adev; } diff --git a/kernel/drivers/dma/ipu/ipu_irq.c b/kernel/drivers/dma/ipu/ipu_irq.c index 2e284a443..2bf37e68a 100644 --- a/kernel/drivers/dma/ipu/ipu_irq.c +++ b/kernel/drivers/dma/ipu/ipu_irq.c @@ -265,10 +265,10 @@ int ipu_irq_unmap(unsigned int source) return ret; } -/* Chained IRQ handler for IPU error interrupt */ -static void ipu_irq_err(unsigned int irq, struct irq_desc *desc) +/* Chained IRQ handler for IPU function and error interrupt */ +static void ipu_irq_handler(struct irq_desc *desc) { - struct ipu *ipu = irq_get_handler_data(irq); + struct ipu *ipu = irq_desc_get_handler_data(desc); u32 status; int i, line; @@ -286,43 +286,7 @@ static void ipu_irq_err(unsigned int irq, struct irq_desc *desc) raw_spin_unlock(&bank_lock); while ((line = ffs(status))) { struct ipu_irq_map *map; - - line--; - status &= ~(1UL << line); - - raw_spin_lock(&bank_lock); - map = src2map(32 * i + line); - if (map) - irq = map->irq; - raw_spin_unlock(&bank_lock); - - if (!map) { - pr_err("IPU: Interrupt on unmapped source %u bank %d\n", - line, i); - continue; - } - generic_handle_irq(irq); - } - } -} - -/* Chained IRQ handler for IPU function interrupt */ -static void ipu_irq_fn(unsigned int irq, struct irq_desc *desc) -{ - struct ipu *ipu = irq_desc_get_handler_data(desc); - u32 status; - int i, line; - - for (i = 0; i < IPU_IRQ_NR_FN_BANKS; i++) { - struct ipu_irq_bank *bank = irq_bank + i; - - raw_spin_lock(&bank_lock); - status = ipu_read_reg(ipu, bank->status); - /* Not clearing all interrupts, see above */ - status &= ipu_read_reg(ipu, bank->control); - raw_spin_unlock(&bank_lock); - while ((line = ffs(status))) { - struct ipu_irq_map *map; + unsigned int irq = NO_IRQ; line--; status &= ~(1UL << line); @@ -377,16 +341,12 @@ int __init ipu_irq_attach_irq(struct ipu *ipu, struct platform_device *dev) irq_map[i].irq = irq; irq_map[i].source = -EINVAL; irq_set_handler(irq, handle_level_irq); -#ifdef CONFIG_ARM - set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); -#endif + irq_clear_status_flags(irq, IRQ_NOREQUEST | IRQ_NOPROBE); } - irq_set_handler_data(ipu->irq_fn, ipu); - irq_set_chained_handler(ipu->irq_fn, ipu_irq_fn); + irq_set_chained_handler_and_data(ipu->irq_fn, ipu_irq_handler, ipu); - irq_set_handler_data(ipu->irq_err, ipu); - irq_set_chained_handler(ipu->irq_err, ipu_irq_err); + irq_set_chained_handler_and_data(ipu->irq_err, ipu_irq_handler, ipu); ipu->irq_base = irq_base; @@ -399,16 +359,12 @@ void ipu_irq_detach_irq(struct ipu *ipu, struct platform_device *dev) irq_base = ipu->irq_base; - irq_set_chained_handler(ipu->irq_fn, NULL); - irq_set_handler_data(ipu->irq_fn, NULL); + irq_set_chained_handler_and_data(ipu->irq_fn, NULL, NULL); - irq_set_chained_handler(ipu->irq_err, NULL); - irq_set_handler_data(ipu->irq_err, NULL); + irq_set_chained_handler_and_data(ipu->irq_err, NULL, NULL); for (irq = irq_base; irq < irq_base + CONFIG_MX3_IPU_IRQS; irq++) { -#ifdef CONFIG_ARM - set_irq_flags(irq, 0); -#endif + irq_set_status_flags(irq, IRQ_NOREQUEST); irq_set_chip(irq, NULL); irq_set_chip_data(irq, NULL); } diff --git a/kernel/drivers/dma/k3dma.c b/kernel/drivers/dma/k3dma.c index 647e362f0..1ba2fd738 100644 --- a/kernel/drivers/dma/k3dma.c +++ b/kernel/drivers/dma/k3dma.c @@ -24,7 +24,6 @@ #include "virt-dma.h" #define DRIVER_NAME "k3-dma" -#define DMA_ALIGN 3 #define DMA_MAX_SIZE 0x1ffc #define INT_STAT 0x00 @@ -732,7 +731,7 @@ static int k3_dma_probe(struct platform_device *op) d->slave.device_pause = k3_dma_transfer_pause; d->slave.device_resume = k3_dma_transfer_resume; d->slave.device_terminate_all = k3_dma_terminate_all; - d->slave.copy_align = DMA_ALIGN; + d->slave.copy_align = DMAENGINE_ALIGN_8_BYTES; /* init virtual channel */ d->chans = devm_kzalloc(&op->dev, diff --git a/kernel/drivers/dma/lpc18xx-dmamux.c b/kernel/drivers/dma/lpc18xx-dmamux.c new file mode 100644 index 000000000..761f32687 --- /dev/null +++ b/kernel/drivers/dma/lpc18xx-dmamux.c @@ -0,0 +1,183 @@ +/* + * DMA Router driver for LPC18xx/43xx DMA MUX + * + * Copyright (C) 2015 Joachim Eastwood <manabian@gmail.com> + * + * Based on TI DMA Crossbar driver by: + * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com + * Author: Peter Ujfalusi <peter.ujfalusi@ti.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/err.h> +#include <linux/init.h> +#include <linux/mfd/syscon.h> +#include <linux/of_device.h> +#include <linux/of_dma.h> +#include <linux/regmap.h> +#include <linux/spinlock.h> + +/* CREG register offset and macros for mux manipulation */ +#define LPC18XX_CREG_DMAMUX 0x11c +#define LPC18XX_DMAMUX_VAL(v, n) ((v) << (n * 2)) +#define LPC18XX_DMAMUX_MASK(n) (0x3 << (n * 2)) +#define LPC18XX_DMAMUX_MAX_VAL 0x3 + +struct lpc18xx_dmamux { + u32 value; + bool busy; +}; + +struct lpc18xx_dmamux_data { + struct dma_router dmarouter; + struct lpc18xx_dmamux *muxes; + u32 dma_master_requests; + u32 dma_mux_requests; + struct regmap *reg; + spinlock_t lock; +}; + +static void lpc18xx_dmamux_free(struct device *dev, void *route_data) +{ + struct lpc18xx_dmamux_data *dmamux = dev_get_drvdata(dev); + struct lpc18xx_dmamux *mux = route_data; + unsigned long flags; + + spin_lock_irqsave(&dmamux->lock, flags); + mux->busy = false; + spin_unlock_irqrestore(&dmamux->lock, flags); +} + +static void *lpc18xx_dmamux_reserve(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct platform_device *pdev = of_find_device_by_node(ofdma->of_node); + struct lpc18xx_dmamux_data *dmamux = platform_get_drvdata(pdev); + unsigned long flags; + unsigned mux; + + if (dma_spec->args_count != 3) { + dev_err(&pdev->dev, "invalid number of dma mux args\n"); + return ERR_PTR(-EINVAL); + } + + mux = dma_spec->args[0]; + if (mux >= dmamux->dma_master_requests) { + dev_err(&pdev->dev, "invalid mux number: %d\n", + dma_spec->args[0]); + return ERR_PTR(-EINVAL); + } + + if (dma_spec->args[1] > LPC18XX_DMAMUX_MAX_VAL) { + dev_err(&pdev->dev, "invalid dma mux value: %d\n", + dma_spec->args[1]); + return ERR_PTR(-EINVAL); + } + + /* The of_node_put() will be done in the core for the node */ + dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0); + if (!dma_spec->np) { + dev_err(&pdev->dev, "can't get dma master\n"); + return ERR_PTR(-EINVAL); + } + + spin_lock_irqsave(&dmamux->lock, flags); + if (dmamux->muxes[mux].busy) { + spin_unlock_irqrestore(&dmamux->lock, flags); + dev_err(&pdev->dev, "dma request %u busy with %u.%u\n", + mux, mux, dmamux->muxes[mux].value); + of_node_put(dma_spec->np); + return ERR_PTR(-EBUSY); + } + + dmamux->muxes[mux].busy = true; + dmamux->muxes[mux].value = dma_spec->args[1]; + + regmap_update_bits(dmamux->reg, LPC18XX_CREG_DMAMUX, + LPC18XX_DMAMUX_MASK(mux), + LPC18XX_DMAMUX_VAL(dmamux->muxes[mux].value, mux)); + spin_unlock_irqrestore(&dmamux->lock, flags); + + dma_spec->args[1] = dma_spec->args[2]; + dma_spec->args_count = 2; + + dev_dbg(&pdev->dev, "mapping dmamux %u.%u to dma request %u\n", mux, + dmamux->muxes[mux].value, mux); + + return &dmamux->muxes[mux]; +} + +static int lpc18xx_dmamux_probe(struct platform_device *pdev) +{ + struct device_node *dma_np, *np = pdev->dev.of_node; + struct lpc18xx_dmamux_data *dmamux; + int ret; + + dmamux = devm_kzalloc(&pdev->dev, sizeof(*dmamux), GFP_KERNEL); + if (!dmamux) + return -ENOMEM; + + dmamux->reg = syscon_regmap_lookup_by_compatible("nxp,lpc1850-creg"); + if (IS_ERR(dmamux->reg)) { + dev_err(&pdev->dev, "syscon lookup failed\n"); + return PTR_ERR(dmamux->reg); + } + + ret = of_property_read_u32(np, "dma-requests", + &dmamux->dma_mux_requests); + if (ret) { + dev_err(&pdev->dev, "missing dma-requests property\n"); + return ret; + } + + dma_np = of_parse_phandle(np, "dma-masters", 0); + if (!dma_np) { + dev_err(&pdev->dev, "can't get dma master\n"); + return -ENODEV; + } + + ret = of_property_read_u32(dma_np, "dma-requests", + &dmamux->dma_master_requests); + of_node_put(dma_np); + if (ret) { + dev_err(&pdev->dev, "missing master dma-requests property\n"); + return ret; + } + + dmamux->muxes = devm_kcalloc(&pdev->dev, dmamux->dma_master_requests, + sizeof(struct lpc18xx_dmamux), + GFP_KERNEL); + if (!dmamux->muxes) + return -ENOMEM; + + spin_lock_init(&dmamux->lock); + platform_set_drvdata(pdev, dmamux); + dmamux->dmarouter.dev = &pdev->dev; + dmamux->dmarouter.route_free = lpc18xx_dmamux_free; + + return of_dma_router_register(np, lpc18xx_dmamux_reserve, + &dmamux->dmarouter); +} + +static const struct of_device_id lpc18xx_dmamux_match[] = { + { .compatible = "nxp,lpc1850-dmamux" }, + {}, +}; + +static struct platform_driver lpc18xx_dmamux_driver = { + .probe = lpc18xx_dmamux_probe, + .driver = { + .name = "lpc18xx-dmamux", + .of_match_table = lpc18xx_dmamux_match, + }, +}; + +static int __init lpc18xx_dmamux_init(void) +{ + return platform_driver_register(&lpc18xx_dmamux_driver); +} +arch_initcall(lpc18xx_dmamux_init); diff --git a/kernel/drivers/dma/mic_x100_dma.c b/kernel/drivers/dma/mic_x100_dma.c index 6de2e677b..068e920ec 100644 --- a/kernel/drivers/dma/mic_x100_dma.c +++ b/kernel/drivers/dma/mic_x100_dma.c @@ -22,6 +22,7 @@ #include <linux/module.h> #include <linux/io.h> #include <linux/seq_file.h> +#include <linux/vmalloc.h> #include "mic_x100_dma.h" @@ -192,8 +193,16 @@ static void mic_dma_prog_intr(struct mic_dma_chan *ch) static int mic_dma_do_dma(struct mic_dma_chan *ch, int flags, dma_addr_t src, dma_addr_t dst, size_t len) { - if (-ENOMEM == mic_dma_prog_memcpy_desc(ch, src, dst, len)) + if (len && -ENOMEM == mic_dma_prog_memcpy_desc(ch, src, dst, len)) { return -ENOMEM; + } else { + /* 3 is the maximum number of status descriptors */ + int ret = mic_dma_avail_desc_ring_space(ch, 3); + + if (ret < 0) + return ret; + } + /* Above mic_dma_prog_memcpy_desc() makes sure we have enough space */ if (flags & DMA_PREP_FENCE) { mic_dma_prep_status_desc(&ch->desc_ring[ch->head], 0, @@ -269,6 +278,33 @@ allocate_tx(struct mic_dma_chan *ch) return tx; } +/* Program a status descriptor with dst as address and value to be written */ +static struct dma_async_tx_descriptor * +mic_dma_prep_status_lock(struct dma_chan *ch, dma_addr_t dst, u64 src_val, + unsigned long flags) +{ + struct mic_dma_chan *mic_ch = to_mic_dma_chan(ch); + int result; + + spin_lock(&mic_ch->prep_lock); + result = mic_dma_avail_desc_ring_space(mic_ch, 4); + if (result < 0) + goto error; + mic_dma_prep_status_desc(&mic_ch->desc_ring[mic_ch->head], src_val, dst, + false); + mic_dma_hw_ring_inc_head(mic_ch); + result = mic_dma_do_dma(mic_ch, flags, 0, 0, 0); + if (result < 0) + goto error; + + return allocate_tx(mic_ch); +error: + dev_err(mic_dma_ch_to_device(mic_ch), + "Error enqueueing dma status descriptor, error=%d\n", result); + spin_unlock(&mic_ch->prep_lock); + return NULL; +} + /* * Prepare a memcpy descriptor to be added to the ring. * Note that the temporary descriptor adds an extra overhead of copying the @@ -586,6 +622,8 @@ static int mic_dma_register_dma_device(struct mic_dma_device *mic_dma_dev, mic_dma_free_chan_resources; mic_dma_dev->dma_dev.device_tx_status = mic_dma_tx_status; mic_dma_dev->dma_dev.device_prep_dma_memcpy = mic_dma_prep_memcpy_lock; + mic_dma_dev->dma_dev.device_prep_dma_imm_data = + mic_dma_prep_status_lock; mic_dma_dev->dma_dev.device_prep_dma_interrupt = mic_dma_prep_interrupt_lock; mic_dma_dev->dma_dev.device_issue_pending = mic_dma_issue_pending; diff --git a/kernel/drivers/dma/mic_x100_dma.h b/kernel/drivers/dma/mic_x100_dma.h index f663b0bdd..d89982034 100644 --- a/kernel/drivers/dma/mic_x100_dma.h +++ b/kernel/drivers/dma/mic_x100_dma.h @@ -39,7 +39,7 @@ */ #define MIC_DMA_MAX_NUM_CHAN 8 #define MIC_DMA_NUM_CHAN 4 -#define MIC_DMA_ALIGN_SHIFT 6 +#define MIC_DMA_ALIGN_SHIFT DMAENGINE_ALIGN_64_BYTES #define MIC_DMA_ALIGN_BYTES (1 << MIC_DMA_ALIGN_SHIFT) #define MIC_DMA_DESC_RX_SIZE (128 * 1024 - 4) diff --git a/kernel/drivers/dma/mmp_pdma.c b/kernel/drivers/dma/mmp_pdma.c index 462a0229a..e39457f13 100644 --- a/kernel/drivers/dma/mmp_pdma.c +++ b/kernel/drivers/dma/mmp_pdma.c @@ -72,7 +72,6 @@ #define DCMD_WIDTH4 (3 << 14) /* 4 byte width (Word) */ #define DCMD_LENGTH 0x01fff /* length mask (max = 8K - 1) */ -#define PDMA_ALIGNMENT 3 #define PDMA_MAX_DESC_BYTES DCMD_LENGTH struct mmp_pdma_desc_hw { @@ -1071,7 +1070,7 @@ static int mmp_pdma_probe(struct platform_device *op) pdev->device.device_issue_pending = mmp_pdma_issue_pending; pdev->device.device_config = mmp_pdma_config; pdev->device.device_terminate_all = mmp_pdma_terminate_all; - pdev->device.copy_align = PDMA_ALIGNMENT; + pdev->device.copy_align = DMAENGINE_ALIGN_8_BYTES; pdev->device.src_addr_widths = widths; pdev->device.dst_addr_widths = widths; pdev->device.directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM); diff --git a/kernel/drivers/dma/mmp_tdma.c b/kernel/drivers/dma/mmp_tdma.c index 449e785de..3df042260 100644 --- a/kernel/drivers/dma/mmp_tdma.c +++ b/kernel/drivers/dma/mmp_tdma.c @@ -100,7 +100,6 @@ enum mmp_tdma_type { PXA910_SQU, }; -#define TDMA_ALIGNMENT 3 #define TDMA_MAX_XFER_BYTES SZ_64K struct mmp_tdma_chan { @@ -657,7 +656,7 @@ static int mmp_tdma_probe(struct platform_device *pdev) INIT_LIST_HEAD(&tdev->device.channels); if (pdev->dev.of_node) - pool = of_get_named_gen_pool(pdev->dev.of_node, "asram", 0); + pool = of_gen_pool_get(pdev->dev.of_node, "asram", 0); else pool = sram_get_gpool("asram"); if (!pool) { @@ -695,7 +694,7 @@ static int mmp_tdma_probe(struct platform_device *pdev) tdev->device.device_pause = mmp_tdma_pause_chan; tdev->device.device_resume = mmp_tdma_resume_chan; tdev->device.device_terminate_all = mmp_tdma_terminate_all; - tdev->device.copy_align = TDMA_ALIGNMENT; + tdev->device.copy_align = DMAENGINE_ALIGN_8_BYTES; dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); platform_set_drvdata(pdev, tdev); diff --git a/kernel/drivers/dma/moxart-dma.c b/kernel/drivers/dma/moxart-dma.c index b4634109e..631c4435e 100644 --- a/kernel/drivers/dma/moxart-dma.c +++ b/kernel/drivers/dma/moxart-dma.c @@ -652,6 +652,7 @@ static const struct of_device_id moxart_dma_match[] = { { .compatible = "moxa,moxart-dma" }, { } }; +MODULE_DEVICE_TABLE(of, moxart_dma_match); static struct platform_driver moxart_driver = { .probe = moxart_probe, diff --git a/kernel/drivers/dma/mpc512x_dma.c b/kernel/drivers/dma/mpc512x_dma.c index e6281e7aa..aae76fb39 100644 --- a/kernel/drivers/dma/mpc512x_dma.c +++ b/kernel/drivers/dma/mpc512x_dma.c @@ -1073,6 +1073,7 @@ static const struct of_device_id mpc_dma_match[] = { { .compatible = "fsl,mpc8308-dma", }, {}, }; +MODULE_DEVICE_TABLE(of, mpc_dma_match); static struct platform_driver mpc_dma_driver = { .probe = mpc_dma_probe, diff --git a/kernel/drivers/dma/mv_xor.c b/kernel/drivers/dma/mv_xor.c index 50f1b422d..1c2de9a83 100644 --- a/kernel/drivers/dma/mv_xor.c +++ b/kernel/drivers/dma/mv_xor.c @@ -13,23 +13,29 @@ */ #include <linux/init.h> -#include <linux/module.h> #include <linux/slab.h> #include <linux/delay.h> #include <linux/dma-mapping.h> #include <linux/spinlock.h> #include <linux/interrupt.h> +#include <linux/of_device.h> #include <linux/platform_device.h> #include <linux/memory.h> #include <linux/clk.h> #include <linux/of.h> #include <linux/of_irq.h> #include <linux/irqdomain.h> +#include <linux/cpumask.h> #include <linux/platform_data/dma-mv_xor.h> #include "dmaengine.h" #include "mv_xor.h" +enum mv_xor_mode { + XOR_MODE_IN_REG, + XOR_MODE_IN_DESC, +}; + static void mv_xor_issue_pending(struct dma_chan *chan); #define to_mv_xor_chan(chan) \ @@ -56,18 +62,30 @@ static void mv_desc_init(struct mv_xor_desc_slot *desc, hw_desc->byte_count = byte_count; } -static void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc, - u32 next_desc_addr) +static void mv_desc_set_mode(struct mv_xor_desc_slot *desc) { struct mv_xor_desc *hw_desc = desc->hw_desc; - BUG_ON(hw_desc->phy_next_desc); - hw_desc->phy_next_desc = next_desc_addr; + + switch (desc->type) { + case DMA_XOR: + case DMA_INTERRUPT: + hw_desc->desc_command |= XOR_DESC_OPERATION_XOR; + break; + case DMA_MEMCPY: + hw_desc->desc_command |= XOR_DESC_OPERATION_MEMCPY; + break; + default: + BUG(); + return; + } } -static void mv_desc_clear_next_desc(struct mv_xor_desc_slot *desc) +static void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc, + u32 next_desc_addr) { struct mv_xor_desc *hw_desc = desc->hw_desc; - hw_desc->phy_next_desc = 0; + BUG_ON(hw_desc->phy_next_desc); + hw_desc->phy_next_desc = next_desc_addr; } static void mv_desc_set_src_addr(struct mv_xor_desc_slot *desc, @@ -104,7 +122,7 @@ static u32 mv_chan_get_intr_cause(struct mv_xor_chan *chan) return intr_cause; } -static void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan) +static void mv_chan_clear_eoc_cause(struct mv_xor_chan *chan) { u32 val; @@ -114,14 +132,14 @@ static void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan) writel_relaxed(val, XOR_INTR_CAUSE(chan)); } -static void mv_xor_device_clear_err_status(struct mv_xor_chan *chan) +static void mv_chan_clear_err_status(struct mv_xor_chan *chan) { u32 val = 0xFFFF0000 >> (chan->idx * 16); writel_relaxed(val, XOR_INTR_CAUSE(chan)); } -static void mv_set_mode(struct mv_xor_chan *chan, - enum dma_transaction_type type) +static void mv_chan_set_mode(struct mv_xor_chan *chan, + enum dma_transaction_type type) { u32 op_mode; u32 config = readl_relaxed(XOR_CONFIG(chan)); @@ -154,6 +172,25 @@ static void mv_set_mode(struct mv_xor_chan *chan, chan->current_type = type; } +static void mv_chan_set_mode_to_desc(struct mv_xor_chan *chan) +{ + u32 op_mode; + u32 config = readl_relaxed(XOR_CONFIG(chan)); + + op_mode = XOR_OPERATION_MODE_IN_DESC; + + config &= ~0x7; + config |= op_mode; + +#if defined(__BIG_ENDIAN) + config |= XOR_DESCRIPTOR_SWAP; +#else + config &= ~XOR_DESCRIPTOR_SWAP; +#endif + + writel_relaxed(config, XOR_CONFIG(chan)); +} + static void mv_chan_activate(struct mv_xor_chan *chan) { dev_dbg(mv_chan_to_devp(chan), " activate chan.\n"); @@ -171,28 +208,13 @@ static char mv_chan_is_busy(struct mv_xor_chan *chan) return (state == 1) ? 1 : 0; } -/** - * mv_xor_free_slots - flags descriptor slots for reuse - * @slot: Slot to free - * Caller must hold &mv_chan->lock while calling this function - */ -static void mv_xor_free_slots(struct mv_xor_chan *mv_chan, - struct mv_xor_desc_slot *slot) -{ - dev_dbg(mv_chan_to_devp(mv_chan), "%s %d slot %p\n", - __func__, __LINE__, slot); - - slot->slot_used = 0; - -} - /* - * mv_xor_start_new_chain - program the engine to operate on new chain headed by - * sw_desc + * mv_chan_start_new_chain - program the engine to operate on new + * chain headed by sw_desc * Caller must hold &mv_chan->lock while calling this function */ -static void mv_xor_start_new_chain(struct mv_xor_chan *mv_chan, - struct mv_xor_desc_slot *sw_desc) +static void mv_chan_start_new_chain(struct mv_xor_chan *mv_chan, + struct mv_xor_desc_slot *sw_desc) { dev_dbg(mv_chan_to_devp(mv_chan), "%s %d: sw_desc %p\n", __func__, __LINE__, sw_desc); @@ -205,8 +227,9 @@ static void mv_xor_start_new_chain(struct mv_xor_chan *mv_chan, } static dma_cookie_t -mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc, - struct mv_xor_chan *mv_chan, dma_cookie_t cookie) +mv_desc_run_tx_complete_actions(struct mv_xor_desc_slot *desc, + struct mv_xor_chan *mv_chan, + dma_cookie_t cookie) { BUG_ON(desc->async_tx.cookie < 0); @@ -230,44 +253,41 @@ mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc, } static int -mv_xor_clean_completed_slots(struct mv_xor_chan *mv_chan) +mv_chan_clean_completed_slots(struct mv_xor_chan *mv_chan) { struct mv_xor_desc_slot *iter, *_iter; dev_dbg(mv_chan_to_devp(mv_chan), "%s %d\n", __func__, __LINE__); list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots, - completed_node) { + node) { - if (async_tx_test_ack(&iter->async_tx)) { - list_del(&iter->completed_node); - mv_xor_free_slots(mv_chan, iter); - } + if (async_tx_test_ack(&iter->async_tx)) + list_move_tail(&iter->node, &mv_chan->free_slots); } return 0; } static int -mv_xor_clean_slot(struct mv_xor_desc_slot *desc, - struct mv_xor_chan *mv_chan) +mv_desc_clean_slot(struct mv_xor_desc_slot *desc, + struct mv_xor_chan *mv_chan) { dev_dbg(mv_chan_to_devp(mv_chan), "%s %d: desc %p flags %d\n", __func__, __LINE__, desc, desc->async_tx.flags); - list_del(&desc->chain_node); + /* the client is allowed to attach dependent operations * until 'ack' is set */ - if (!async_tx_test_ack(&desc->async_tx)) { + if (!async_tx_test_ack(&desc->async_tx)) /* move this slot to the completed_slots */ - list_add_tail(&desc->completed_node, &mv_chan->completed_slots); - return 0; - } + list_move_tail(&desc->node, &mv_chan->completed_slots); + else + list_move_tail(&desc->node, &mv_chan->free_slots); - mv_xor_free_slots(mv_chan, desc); return 0; } /* This function must be called with the mv_xor_chan spinlock held */ -static void mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan) +static void mv_chan_slot_cleanup(struct mv_xor_chan *mv_chan) { struct mv_xor_desc_slot *iter, *_iter; dma_cookie_t cookie = 0; @@ -278,23 +298,23 @@ static void mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan) dev_dbg(mv_chan_to_devp(mv_chan), "%s %d\n", __func__, __LINE__); dev_dbg(mv_chan_to_devp(mv_chan), "current_desc %x\n", current_desc); - mv_xor_clean_completed_slots(mv_chan); + mv_chan_clean_completed_slots(mv_chan); /* free completed slots from the chain starting with * the oldest descriptor */ list_for_each_entry_safe(iter, _iter, &mv_chan->chain, - chain_node) { + node) { /* clean finished descriptors */ hw_desc = iter->hw_desc; if (hw_desc->status & XOR_DESC_SUCCESS) { - cookie = mv_xor_run_tx_complete_actions(iter, mv_chan, - cookie); + cookie = mv_desc_run_tx_complete_actions(iter, mv_chan, + cookie); /* done processing desc, clean slot */ - mv_xor_clean_slot(iter, mv_chan); + mv_desc_clean_slot(iter, mv_chan); /* break if we did cleaned the current */ if (iter->async_tx.phys == current_desc) { @@ -317,18 +337,18 @@ static void mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan) */ iter = list_entry(mv_chan->chain.next, struct mv_xor_desc_slot, - chain_node); - mv_xor_start_new_chain(mv_chan, iter); + node); + mv_chan_start_new_chain(mv_chan, iter); } else { - if (!list_is_last(&iter->chain_node, &mv_chan->chain)) { + if (!list_is_last(&iter->node, &mv_chan->chain)) { /* * descriptors are still waiting after * current, trigger them */ - iter = list_entry(iter->chain_node.next, + iter = list_entry(iter->node.next, struct mv_xor_desc_slot, - chain_node); - mv_xor_start_new_chain(mv_chan, iter); + node); + mv_chan_start_new_chain(mv_chan, iter); } else { /* * some descriptors are still waiting @@ -348,56 +368,35 @@ static void mv_xor_tasklet(unsigned long data) struct mv_xor_chan *chan = (struct mv_xor_chan *) data; spin_lock_bh(&chan->lock); - mv_xor_slot_cleanup(chan); + mv_chan_slot_cleanup(chan); spin_unlock_bh(&chan->lock); } static struct mv_xor_desc_slot * -mv_xor_alloc_slot(struct mv_xor_chan *mv_chan) +mv_chan_alloc_slot(struct mv_xor_chan *mv_chan) { - struct mv_xor_desc_slot *iter, *_iter; - int retry = 0; + struct mv_xor_desc_slot *iter; - /* start search from the last allocated descrtiptor - * if a contiguous allocation can not be found start searching - * from the beginning of the list - */ -retry: - if (retry == 0) - iter = mv_chan->last_used; - else - iter = list_entry(&mv_chan->all_slots, - struct mv_xor_desc_slot, - slot_node); - - list_for_each_entry_safe_continue( - iter, _iter, &mv_chan->all_slots, slot_node) { - - prefetch(_iter); - prefetch(&_iter->async_tx); - if (iter->slot_used) { - /* give up after finding the first busy slot - * on the second pass through the list - */ - if (retry) - break; - continue; - } + spin_lock_bh(&mv_chan->lock); + + if (!list_empty(&mv_chan->free_slots)) { + iter = list_first_entry(&mv_chan->free_slots, + struct mv_xor_desc_slot, + node); + + list_move_tail(&iter->node, &mv_chan->allocated_slots); + + spin_unlock_bh(&mv_chan->lock); /* pre-ack descriptor */ async_tx_ack(&iter->async_tx); - - iter->slot_used = 1; - INIT_LIST_HEAD(&iter->chain_node); iter->async_tx.cookie = -EBUSY; - mv_chan->last_used = iter; - mv_desc_clear_next_desc(iter); return iter; } - if (!retry++) - goto retry; + + spin_unlock_bh(&mv_chan->lock); /* try to free some slots if the allocation fails */ tasklet_schedule(&mv_chan->irq_tasklet); @@ -423,14 +422,14 @@ mv_xor_tx_submit(struct dma_async_tx_descriptor *tx) cookie = dma_cookie_assign(tx); if (list_empty(&mv_chan->chain)) - list_add_tail(&sw_desc->chain_node, &mv_chan->chain); + list_move_tail(&sw_desc->node, &mv_chan->chain); else { new_hw_chain = 0; old_chain_tail = list_entry(mv_chan->chain.prev, struct mv_xor_desc_slot, - chain_node); - list_add_tail(&sw_desc->chain_node, &mv_chan->chain); + node); + list_move_tail(&sw_desc->node, &mv_chan->chain); dev_dbg(mv_chan_to_devp(mv_chan), "Append to last desc %pa\n", &old_chain_tail->async_tx.phys); @@ -451,7 +450,7 @@ mv_xor_tx_submit(struct dma_async_tx_descriptor *tx) } if (new_hw_chain) - mv_xor_start_new_chain(mv_chan, sw_desc); + mv_chan_start_new_chain(mv_chan, sw_desc); spin_unlock_bh(&mv_chan->lock); @@ -483,26 +482,20 @@ static int mv_xor_alloc_chan_resources(struct dma_chan *chan) dma_async_tx_descriptor_init(&slot->async_tx, chan); slot->async_tx.tx_submit = mv_xor_tx_submit; - INIT_LIST_HEAD(&slot->chain_node); - INIT_LIST_HEAD(&slot->slot_node); + INIT_LIST_HEAD(&slot->node); dma_desc = mv_chan->dma_desc_pool; slot->async_tx.phys = dma_desc + idx * MV_XOR_SLOT_SIZE; slot->idx = idx++; spin_lock_bh(&mv_chan->lock); mv_chan->slots_allocated = idx; - list_add_tail(&slot->slot_node, &mv_chan->all_slots); + list_add_tail(&slot->node, &mv_chan->free_slots); spin_unlock_bh(&mv_chan->lock); } - if (mv_chan->slots_allocated && !mv_chan->last_used) - mv_chan->last_used = list_entry(mv_chan->all_slots.next, - struct mv_xor_desc_slot, - slot_node); - dev_dbg(mv_chan_to_devp(mv_chan), - "allocated %d descriptor slots last_used: %p\n", - mv_chan->slots_allocated, mv_chan->last_used); + "allocated %d descriptor slots\n", + mv_chan->slots_allocated); return mv_chan->slots_allocated ? : -ENOMEM; } @@ -523,16 +516,17 @@ mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, "%s src_cnt: %d len: %u dest %pad flags: %ld\n", __func__, src_cnt, len, &dest, flags); - spin_lock_bh(&mv_chan->lock); - sw_desc = mv_xor_alloc_slot(mv_chan); + sw_desc = mv_chan_alloc_slot(mv_chan); if (sw_desc) { sw_desc->type = DMA_XOR; sw_desc->async_tx.flags = flags; mv_desc_init(sw_desc, dest, len, flags); + if (mv_chan->op_in_desc == XOR_MODE_IN_DESC) + mv_desc_set_mode(sw_desc); while (src_cnt--) mv_desc_set_src_addr(sw_desc, src_cnt, src[src_cnt]); } - spin_unlock_bh(&mv_chan->lock); + dev_dbg(mv_chan_to_devp(mv_chan), "%s sw_desc %p async_tx %p \n", __func__, sw_desc, &sw_desc->async_tx); @@ -576,25 +570,29 @@ static void mv_xor_free_chan_resources(struct dma_chan *chan) spin_lock_bh(&mv_chan->lock); - mv_xor_slot_cleanup(mv_chan); + mv_chan_slot_cleanup(mv_chan); list_for_each_entry_safe(iter, _iter, &mv_chan->chain, - chain_node) { + node) { in_use_descs++; - list_del(&iter->chain_node); + list_move_tail(&iter->node, &mv_chan->free_slots); } list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots, - completed_node) { + node) { + in_use_descs++; + list_move_tail(&iter->node, &mv_chan->free_slots); + } + list_for_each_entry_safe(iter, _iter, &mv_chan->allocated_slots, + node) { in_use_descs++; - list_del(&iter->completed_node); + list_move_tail(&iter->node, &mv_chan->free_slots); } list_for_each_entry_safe_reverse( - iter, _iter, &mv_chan->all_slots, slot_node) { - list_del(&iter->slot_node); + iter, _iter, &mv_chan->free_slots, node) { + list_del(&iter->node); kfree(iter); mv_chan->slots_allocated--; } - mv_chan->last_used = NULL; dev_dbg(mv_chan_to_devp(mv_chan), "%s slots_allocated %d\n", __func__, mv_chan->slots_allocated); @@ -623,13 +621,13 @@ static enum dma_status mv_xor_status(struct dma_chan *chan, return ret; spin_lock_bh(&mv_chan->lock); - mv_xor_slot_cleanup(mv_chan); + mv_chan_slot_cleanup(mv_chan); spin_unlock_bh(&mv_chan->lock); return dma_cookie_status(chan, cookie, txstate); } -static void mv_dump_xor_regs(struct mv_xor_chan *chan) +static void mv_chan_dump_regs(struct mv_xor_chan *chan) { u32 val; @@ -652,8 +650,8 @@ static void mv_dump_xor_regs(struct mv_xor_chan *chan) dev_err(mv_chan_to_devp(chan), "error addr 0x%08x\n", val); } -static void mv_xor_err_interrupt_handler(struct mv_xor_chan *chan, - u32 intr_cause) +static void mv_chan_err_interrupt_handler(struct mv_xor_chan *chan, + u32 intr_cause) { if (intr_cause & XOR_INT_ERR_DECODE) { dev_dbg(mv_chan_to_devp(chan), "ignoring address decode error\n"); @@ -663,7 +661,7 @@ static void mv_xor_err_interrupt_handler(struct mv_xor_chan *chan, dev_err(mv_chan_to_devp(chan), "error on chan %d. intr cause 0x%08x\n", chan->idx, intr_cause); - mv_dump_xor_regs(chan); + mv_chan_dump_regs(chan); WARN_ON(1); } @@ -675,11 +673,11 @@ static irqreturn_t mv_xor_interrupt_handler(int irq, void *data) dev_dbg(mv_chan_to_devp(chan), "intr cause %x\n", intr_cause); if (intr_cause & XOR_INTR_ERRORS) - mv_xor_err_interrupt_handler(chan, intr_cause); + mv_chan_err_interrupt_handler(chan, intr_cause); tasklet_schedule(&chan->irq_tasklet); - mv_xor_device_clear_eoc_cause(chan); + mv_chan_clear_eoc_cause(chan); return IRQ_HANDLED; } @@ -698,7 +696,7 @@ static void mv_xor_issue_pending(struct dma_chan *chan) * Perform a transaction to verify the HW works. */ -static int mv_xor_memcpy_self_test(struct mv_xor_chan *mv_chan) +static int mv_chan_memcpy_self_test(struct mv_xor_chan *mv_chan) { int i, ret; void *src, *dest; @@ -807,7 +805,7 @@ out: #define MV_XOR_NUM_SRC_TEST 4 /* must be <= 15 */ static int -mv_xor_xor_self_test(struct mv_xor_chan *mv_chan) +mv_chan_xor_self_test(struct mv_xor_chan *mv_chan) { int i, src_idx, ret; struct page *dest; @@ -971,7 +969,7 @@ static int mv_xor_channel_remove(struct mv_xor_chan *mv_chan) static struct mv_xor_chan * mv_xor_channel_add(struct mv_xor_device *xordev, struct platform_device *pdev, - int idx, dma_cap_mask_t cap_mask, int irq) + int idx, dma_cap_mask_t cap_mask, int irq, int op_in_desc) { int ret = 0; struct mv_xor_chan *mv_chan; @@ -983,6 +981,7 @@ mv_xor_channel_add(struct mv_xor_device *xordev, mv_chan->idx = idx; mv_chan->irq = irq; + mv_chan->op_in_desc = op_in_desc; dma_dev = &mv_chan->dmadev; @@ -1034,7 +1033,7 @@ mv_xor_channel_add(struct mv_xor_device *xordev, mv_chan); /* clear errors before enabling interrupts */ - mv_xor_device_clear_err_status(mv_chan); + mv_chan_clear_err_status(mv_chan); ret = request_irq(mv_chan->irq, mv_xor_interrupt_handler, 0, dev_name(&pdev->dev), mv_chan); @@ -1043,32 +1042,37 @@ mv_xor_channel_add(struct mv_xor_device *xordev, mv_chan_unmask_interrupts(mv_chan); - mv_set_mode(mv_chan, DMA_XOR); + if (mv_chan->op_in_desc == XOR_MODE_IN_DESC) + mv_chan_set_mode_to_desc(mv_chan); + else + mv_chan_set_mode(mv_chan, DMA_XOR); spin_lock_init(&mv_chan->lock); INIT_LIST_HEAD(&mv_chan->chain); INIT_LIST_HEAD(&mv_chan->completed_slots); - INIT_LIST_HEAD(&mv_chan->all_slots); + INIT_LIST_HEAD(&mv_chan->free_slots); + INIT_LIST_HEAD(&mv_chan->allocated_slots); mv_chan->dmachan.device = dma_dev; dma_cookie_init(&mv_chan->dmachan); list_add_tail(&mv_chan->dmachan.device_node, &dma_dev->channels); if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) { - ret = mv_xor_memcpy_self_test(mv_chan); + ret = mv_chan_memcpy_self_test(mv_chan); dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret); if (ret) goto err_free_irq; } if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { - ret = mv_xor_xor_self_test(mv_chan); + ret = mv_chan_xor_self_test(mv_chan); dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); if (ret) goto err_free_irq; } - dev_info(&pdev->dev, "Marvell XOR: ( %s%s%s)\n", + dev_info(&pdev->dev, "Marvell XOR (%s): ( %s%s%s)\n", + mv_chan->op_in_desc ? "Descriptor Mode" : "Registers Mode", dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : ""); @@ -1117,13 +1121,23 @@ mv_xor_conf_mbus_windows(struct mv_xor_device *xordev, writel(0, base + WINDOW_OVERRIDE_CTRL(1)); } +static const struct of_device_id mv_xor_dt_ids[] = { + { .compatible = "marvell,orion-xor", .data = (void *)XOR_MODE_IN_REG }, + { .compatible = "marvell,armada-380-xor", .data = (void *)XOR_MODE_IN_DESC }, + {}, +}; + +static unsigned int mv_xor_engine_count; + static int mv_xor_probe(struct platform_device *pdev) { const struct mbus_dram_target_info *dram; struct mv_xor_device *xordev; struct mv_xor_platform_data *pdata = dev_get_platdata(&pdev->dev); struct resource *res; + unsigned int max_engines, max_channels; int i, ret; + int op_in_desc; dev_notice(&pdev->dev, "Marvell shared XOR driver\n"); @@ -1165,22 +1179,41 @@ static int mv_xor_probe(struct platform_device *pdev) if (!IS_ERR(xordev->clk)) clk_prepare_enable(xordev->clk); + /* + * We don't want to have more than one channel per CPU in + * order for async_tx to perform well. So we limit the number + * of engines and channels so that we take into account this + * constraint. Note that we also want to use channels from + * separate engines when possible. + */ + max_engines = num_present_cpus(); + max_channels = min_t(unsigned int, + MV_XOR_MAX_CHANNELS, + DIV_ROUND_UP(num_present_cpus(), 2)); + + if (mv_xor_engine_count >= max_engines) + return 0; + if (pdev->dev.of_node) { struct device_node *np; int i = 0; + const struct of_device_id *of_id = + of_match_device(mv_xor_dt_ids, + &pdev->dev); for_each_child_of_node(pdev->dev.of_node, np) { struct mv_xor_chan *chan; dma_cap_mask_t cap_mask; int irq; + op_in_desc = (int)of_id->data; + + if (i >= max_channels) + continue; dma_cap_zero(cap_mask); - if (of_property_read_bool(np, "dmacap,memcpy")) - dma_cap_set(DMA_MEMCPY, cap_mask); - if (of_property_read_bool(np, "dmacap,xor")) - dma_cap_set(DMA_XOR, cap_mask); - if (of_property_read_bool(np, "dmacap,interrupt")) - dma_cap_set(DMA_INTERRUPT, cap_mask); + dma_cap_set(DMA_MEMCPY, cap_mask); + dma_cap_set(DMA_XOR, cap_mask); + dma_cap_set(DMA_INTERRUPT, cap_mask); irq = irq_of_parse_and_map(np, 0); if (!irq) { @@ -1189,7 +1222,7 @@ static int mv_xor_probe(struct platform_device *pdev) } chan = mv_xor_channel_add(xordev, pdev, i, - cap_mask, irq); + cap_mask, irq, op_in_desc); if (IS_ERR(chan)) { ret = PTR_ERR(chan); irq_dispose_mapping(irq); @@ -1200,7 +1233,7 @@ static int mv_xor_probe(struct platform_device *pdev) i++; } } else if (pdata && pdata->channels) { - for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) { + for (i = 0; i < max_channels; i++) { struct mv_xor_channel_data *cd; struct mv_xor_chan *chan; int irq; @@ -1218,7 +1251,8 @@ static int mv_xor_probe(struct platform_device *pdev) } chan = mv_xor_channel_add(xordev, pdev, i, - cd->cap_mask, irq); + cd->cap_mask, irq, + XOR_MODE_IN_REG); if (IS_ERR(chan)) { ret = PTR_ERR(chan); goto err_channel_add; @@ -1246,35 +1280,8 @@ err_channel_add: return ret; } -static int mv_xor_remove(struct platform_device *pdev) -{ - struct mv_xor_device *xordev = platform_get_drvdata(pdev); - int i; - - for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) { - if (xordev->channels[i]) - mv_xor_channel_remove(xordev->channels[i]); - } - - if (!IS_ERR(xordev->clk)) { - clk_disable_unprepare(xordev->clk); - clk_put(xordev->clk); - } - - return 0; -} - -#ifdef CONFIG_OF -static const struct of_device_id mv_xor_dt_ids[] = { - { .compatible = "marvell,orion-xor", }, - {}, -}; -MODULE_DEVICE_TABLE(of, mv_xor_dt_ids); -#endif - static struct platform_driver mv_xor_driver = { .probe = mv_xor_probe, - .remove = mv_xor_remove, .driver = { .name = MV_XOR_NAME, .of_match_table = of_match_ptr(mv_xor_dt_ids), @@ -1286,19 +1293,10 @@ static int __init mv_xor_init(void) { return platform_driver_register(&mv_xor_driver); } -module_init(mv_xor_init); - -/* it's currently unsafe to unload this module */ -#if 0 -static void __exit mv_xor_exit(void) -{ - platform_driver_unregister(&mv_xor_driver); - return; -} - -module_exit(mv_xor_exit); -#endif +device_initcall(mv_xor_init); +/* MODULE_AUTHOR("Saeed Bishara <saeed@marvell.com>"); MODULE_DESCRIPTION("DMA engine driver for Marvell's XOR engine"); MODULE_LICENSE("GPL"); +*/ diff --git a/kernel/drivers/dma/mv_xor.h b/kernel/drivers/dma/mv_xor.h index 0e302b3a3..b7455b421 100644 --- a/kernel/drivers/dma/mv_xor.h +++ b/kernel/drivers/dma/mv_xor.h @@ -19,7 +19,7 @@ #include <linux/dmaengine.h> #include <linux/interrupt.h> -#define MV_XOR_POOL_SIZE PAGE_SIZE +#define MV_XOR_POOL_SIZE (MV_XOR_SLOT_SIZE * 3072) #define MV_XOR_SLOT_SIZE 64 #define MV_XOR_THRESHOLD 1 #define MV_XOR_MAX_CHANNELS 2 @@ -30,9 +30,14 @@ /* Values for the XOR_CONFIG register */ #define XOR_OPERATION_MODE_XOR 0 #define XOR_OPERATION_MODE_MEMCPY 2 +#define XOR_OPERATION_MODE_IN_DESC 7 #define XOR_DESCRIPTOR_SWAP BIT(14) #define XOR_DESC_SUCCESS 0x40000000 +#define XOR_DESC_OPERATION_XOR (0 << 24) +#define XOR_DESC_OPERATION_CRC32C (1 << 24) +#define XOR_DESC_OPERATION_MEMCPY (2 << 24) + #define XOR_DESC_DMA_OWNED BIT(31) #define XOR_DESC_EOD_INT_EN BIT(31) @@ -89,13 +94,14 @@ struct mv_xor_device { * @mmr_base: memory mapped register base * @idx: the index of the xor channel * @chain: device chain view of the descriptors + * @free_slots: free slots usable by the channel + * @allocated_slots: slots allocated by the driver * @completed_slots: slots completed by HW but still need to be acked * @device: parent device * @common: common dmaengine channel object members - * @last_used: place holder for allocation to continue from where it left off - * @all_slots: complete domain of slots usable by the channel * @slots_allocated: records the actual size of the descriptor slot pool * @irq_tasklet: bottom half where mv_xor_slot_cleanup runs + * @op_in_desc: new mode of driver, each op is writen to descriptor. */ struct mv_xor_chan { int pending; @@ -106,16 +112,17 @@ struct mv_xor_chan { int irq; enum dma_transaction_type current_type; struct list_head chain; + struct list_head free_slots; + struct list_head allocated_slots; struct list_head completed_slots; dma_addr_t dma_desc_pool; void *dma_desc_pool_virt; size_t pool_size; struct dma_device dmadev; struct dma_chan dmachan; - struct mv_xor_desc_slot *last_used; - struct list_head all_slots; int slots_allocated; struct tasklet_struct irq_tasklet; + int op_in_desc; char dummy_src[MV_XOR_MIN_BYTE_COUNT]; char dummy_dst[MV_XOR_MIN_BYTE_COUNT]; dma_addr_t dummy_src_addr, dummy_dst_addr; @@ -123,9 +130,7 @@ struct mv_xor_chan { /** * struct mv_xor_desc_slot - software descriptor - * @slot_node: node on the mv_xor_chan.all_slots list - * @chain_node: node on the mv_xor_chan.chain list - * @completed_node: node on the mv_xor_chan.completed_slots list + * @node: node on the mv_xor_chan lists * @hw_desc: virtual address of the hardware descriptor chain * @phys: hardware address of the hardware descriptor chain * @slot_used: slot in use or not @@ -134,12 +139,9 @@ struct mv_xor_chan { * @async_tx: support for the async_tx api */ struct mv_xor_desc_slot { - struct list_head slot_node; - struct list_head chain_node; - struct list_head completed_node; + struct list_head node; enum dma_transaction_type type; void *hw_desc; - u16 slot_used; u16 idx; struct dma_async_tx_descriptor async_tx; }; diff --git a/kernel/drivers/dma/mxs-dma.c b/kernel/drivers/dma/mxs-dma.c index 829ec686d..60de35251 100644 --- a/kernel/drivers/dma/mxs-dma.c +++ b/kernel/drivers/dma/mxs-dma.c @@ -170,7 +170,7 @@ static struct mxs_dma_type mxs_dma_types[] = { } }; -static struct platform_device_id mxs_dma_ids[] = { +static const struct platform_device_id mxs_dma_ids[] = { { .name = "imx23-dma-apbh", .driver_data = (kernel_ulong_t) &mxs_dma_types[0], diff --git a/kernel/drivers/dma/nbpfaxi.c b/kernel/drivers/dma/nbpfaxi.c index 88b77c983..2b5a198ac 100644 --- a/kernel/drivers/dma/nbpfaxi.c +++ b/kernel/drivers/dma/nbpfaxi.c @@ -1455,7 +1455,7 @@ static int nbpf_remove(struct platform_device *pdev) return 0; } -static struct platform_device_id nbpf_ids[] = { +static const struct platform_device_id nbpf_ids[] = { {"nbpfaxi64dmac1b4", (kernel_ulong_t)&nbpf_cfg[NBPF1B4]}, {"nbpfaxi64dmac1b8", (kernel_ulong_t)&nbpf_cfg[NBPF1B8]}, {"nbpfaxi64dmac1b16", (kernel_ulong_t)&nbpf_cfg[NBPF1B16]}, diff --git a/kernel/drivers/dma/of-dma.c b/kernel/drivers/dma/of-dma.c index cbd4a8aff..1e1f2986e 100644 --- a/kernel/drivers/dma/of-dma.c +++ b/kernel/drivers/dma/of-dma.c @@ -45,6 +45,50 @@ static struct of_dma *of_dma_find_controller(struct of_phandle_args *dma_spec) } /** + * of_dma_router_xlate - translation function for router devices + * @dma_spec: pointer to DMA specifier as found in the device tree + * @of_dma: pointer to DMA controller data (router information) + * + * The function creates new dma_spec to be passed to the router driver's + * of_dma_route_allocate() function to prepare a dma_spec which will be used + * to request channel from the real DMA controller. + */ +static struct dma_chan *of_dma_router_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct dma_chan *chan; + struct of_dma *ofdma_target; + struct of_phandle_args dma_spec_target; + void *route_data; + + /* translate the request for the real DMA controller */ + memcpy(&dma_spec_target, dma_spec, sizeof(dma_spec_target)); + route_data = ofdma->of_dma_route_allocate(&dma_spec_target, ofdma); + if (IS_ERR(route_data)) + return NULL; + + ofdma_target = of_dma_find_controller(&dma_spec_target); + if (!ofdma_target) + return NULL; + + chan = ofdma_target->of_dma_xlate(&dma_spec_target, ofdma_target); + if (chan) { + chan->router = ofdma->dma_router; + chan->route_data = route_data; + } else { + ofdma->dma_router->route_free(ofdma->dma_router->dev, + route_data); + } + + /* + * Need to put the node back since the ofdma->of_dma_route_allocate + * has taken it for generating the new, translated dma_spec + */ + of_node_put(dma_spec_target.np); + return chan; +} + +/** * of_dma_controller_register - Register a DMA controller to DT DMA helpers * @np: device node of DMA controller * @of_dma_xlate: translation function which converts a phandle @@ -110,6 +154,51 @@ void of_dma_controller_free(struct device_node *np) EXPORT_SYMBOL_GPL(of_dma_controller_free); /** + * of_dma_router_register - Register a DMA router to DT DMA helpers as a + * controller + * @np: device node of DMA router + * @of_dma_route_allocate: setup function for the router which need to + * modify the dma_spec for the DMA controller to + * use and to set up the requested route. + * @dma_router: pointer to dma_router structure to be used when + * the route need to be free up. + * + * Returns 0 on success or appropriate errno value on error. + * + * Allocated memory should be freed with appropriate of_dma_controller_free() + * call. + */ +int of_dma_router_register(struct device_node *np, + void *(*of_dma_route_allocate) + (struct of_phandle_args *, struct of_dma *), + struct dma_router *dma_router) +{ + struct of_dma *ofdma; + + if (!np || !of_dma_route_allocate || !dma_router) { + pr_err("%s: not enough information provided\n", __func__); + return -EINVAL; + } + + ofdma = kzalloc(sizeof(*ofdma), GFP_KERNEL); + if (!ofdma) + return -ENOMEM; + + ofdma->of_node = np; + ofdma->of_dma_xlate = of_dma_router_xlate; + ofdma->of_dma_route_allocate = of_dma_route_allocate; + ofdma->dma_router = dma_router; + + /* Now queue of_dma controller structure in list */ + mutex_lock(&of_dma_lock); + list_add_tail(&ofdma->of_dma_controllers, &of_dma_list); + mutex_unlock(&of_dma_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(of_dma_router_register); + +/** * of_dma_match_channel - Check if a DMA specifier matches name * @np: device node to look for DMA channels * @name: channel name to be matched diff --git a/kernel/drivers/dma/omap-dma.c b/kernel/drivers/dma/omap-dma.c index 167dbaf65..1dfc71c90 100644 --- a/kernel/drivers/dma/omap-dma.c +++ b/kernel/drivers/dma/omap-dma.c @@ -22,6 +22,9 @@ #include "virt-dma.h" +#define OMAP_SDMA_REQUESTS 127 +#define OMAP_SDMA_CHANNELS 32 + struct omap_dmadev { struct dma_device ddev; spinlock_t lock; @@ -31,9 +34,10 @@ struct omap_dmadev { const struct omap_dma_reg *reg_map; struct omap_system_dma_plat_info *plat; bool legacy; + unsigned dma_requests; spinlock_t irq_lock; uint32_t irq_enable_mask; - struct omap_chan *lch_map[32]; + struct omap_chan *lch_map[OMAP_SDMA_CHANNELS]; }; struct omap_chan { @@ -362,7 +366,7 @@ static void omap_dma_start_sg(struct omap_chan *c, struct omap_desc *d, struct omap_sg *sg = d->sg + idx; unsigned cxsa, cxei, cxfi; - if (d->dir == DMA_DEV_TO_MEM) { + if (d->dir == DMA_DEV_TO_MEM || d->dir == DMA_MEM_TO_MEM) { cxsa = CDSA; cxei = CDEI; cxfi = CDFI; @@ -408,7 +412,7 @@ static void omap_dma_start_desc(struct omap_chan *c) if (dma_omap1()) omap_dma_chan_write(c, CCR2, d->ccr >> 16); - if (d->dir == DMA_DEV_TO_MEM) { + if (d->dir == DMA_DEV_TO_MEM || d->dir == DMA_MEM_TO_MEM) { cxsa = CSSA; cxei = CSEI; cxfi = CSFI; @@ -589,6 +593,7 @@ static void omap_dma_free_chan_resources(struct dma_chan *chan) omap_free_dma(c->dma_ch); dev_dbg(od->ddev.dev, "freeing channel for %u\n", c->dma_sig); + c->dma_sig = 0; } static size_t omap_dma_sg_size(struct omap_sg *sg) @@ -930,8 +935,12 @@ static struct dma_async_tx_descriptor *omap_dma_prep_dma_cyclic( else d->ccr |= CCR_SYNC_ELEMENT; - if (dir == DMA_DEV_TO_MEM) + if (dir == DMA_DEV_TO_MEM) { d->ccr |= CCR_TRIGGER_SRC; + d->csdp |= CSDP_DST_PACKED; + } else { + d->csdp |= CSDP_SRC_PACKED; + } d->cicr |= CICR_MISALIGNED_ERR_IE | CICR_TRANS_ERR_IE; @@ -948,6 +957,51 @@ static struct dma_async_tx_descriptor *omap_dma_prep_dma_cyclic( return vchan_tx_prep(&c->vc, &d->vd, flags); } +static struct dma_async_tx_descriptor *omap_dma_prep_dma_memcpy( + struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long tx_flags) +{ + struct omap_chan *c = to_omap_dma_chan(chan); + struct omap_desc *d; + uint8_t data_type; + + d = kzalloc(sizeof(*d) + sizeof(d->sg[0]), GFP_ATOMIC); + if (!d) + return NULL; + + data_type = __ffs((src | dest | len)); + if (data_type > CSDP_DATA_TYPE_32) + data_type = CSDP_DATA_TYPE_32; + + d->dir = DMA_MEM_TO_MEM; + d->dev_addr = src; + d->fi = 0; + d->es = data_type; + d->sg[0].en = len / BIT(data_type); + d->sg[0].fn = 1; + d->sg[0].addr = dest; + d->sglen = 1; + d->ccr = c->ccr; + d->ccr |= CCR_DST_AMODE_POSTINC | CCR_SRC_AMODE_POSTINC; + + d->cicr = CICR_DROP_IE; + if (tx_flags & DMA_PREP_INTERRUPT) + d->cicr |= CICR_FRAME_IE; + + d->csdp = data_type; + + if (dma_omap1()) { + d->cicr |= CICR_TOUT_IE; + d->csdp |= CSDP_DST_PORT_EMIFF | CSDP_SRC_PORT_EMIFF; + } else { + d->csdp |= CSDP_DST_PACKED | CSDP_SRC_PACKED; + d->cicr |= CICR_MISALIGNED_ERR_IE | CICR_TRANS_ERR_IE; + d->csdp |= CSDP_DST_BURST_64 | CSDP_SRC_BURST_64; + } + + return vchan_tx_prep(&c->vc, &d->vd, tx_flags); +} + static int omap_dma_slave_config(struct dma_chan *chan, struct dma_slave_config *cfg) { struct omap_chan *c = to_omap_dma_chan(chan); @@ -1037,7 +1091,7 @@ static int omap_dma_resume(struct dma_chan *chan) return 0; } -static int omap_dma_chan_init(struct omap_dmadev *od, int dma_sig) +static int omap_dma_chan_init(struct omap_dmadev *od) { struct omap_chan *c; @@ -1046,7 +1100,6 @@ static int omap_dma_chan_init(struct omap_dmadev *od, int dma_sig) return -ENOMEM; c->reg_map = od->reg_map; - c->dma_sig = dma_sig; c->vc.desc_free = omap_dma_desc_free; vchan_init(&c->vc, &od->ddev); INIT_LIST_HEAD(&c->node); @@ -1094,12 +1147,14 @@ static int omap_dma_probe(struct platform_device *pdev) dma_cap_set(DMA_SLAVE, od->ddev.cap_mask); dma_cap_set(DMA_CYCLIC, od->ddev.cap_mask); + dma_cap_set(DMA_MEMCPY, od->ddev.cap_mask); od->ddev.device_alloc_chan_resources = omap_dma_alloc_chan_resources; od->ddev.device_free_chan_resources = omap_dma_free_chan_resources; od->ddev.device_tx_status = omap_dma_tx_status; od->ddev.device_issue_pending = omap_dma_issue_pending; od->ddev.device_prep_slave_sg = omap_dma_prep_slave_sg; od->ddev.device_prep_dma_cyclic = omap_dma_prep_dma_cyclic; + od->ddev.device_prep_dma_memcpy = omap_dma_prep_dma_memcpy; od->ddev.device_config = omap_dma_slave_config; od->ddev.device_pause = omap_dma_pause; od->ddev.device_resume = omap_dma_resume; @@ -1116,8 +1171,17 @@ static int omap_dma_probe(struct platform_device *pdev) tasklet_init(&od->task, omap_dma_sched, (unsigned long)od); - for (i = 0; i < 127; i++) { - rc = omap_dma_chan_init(od, i); + od->dma_requests = OMAP_SDMA_REQUESTS; + if (pdev->dev.of_node && of_property_read_u32(pdev->dev.of_node, + "dma-requests", + &od->dma_requests)) { + dev_info(&pdev->dev, + "Missing dma-requests property, using %u.\n", + OMAP_SDMA_REQUESTS); + } + + for (i = 0; i < OMAP_SDMA_CHANNELS; i++) { + rc = omap_dma_chan_init(od); if (rc) { omap_dma_free(od); return rc; @@ -1208,10 +1272,14 @@ static struct platform_driver omap_dma_driver = { bool omap_dma_filter_fn(struct dma_chan *chan, void *param) { if (chan->device->dev->driver == &omap_dma_driver.driver) { + struct omap_dmadev *od = to_omap_dma_dev(chan->device); struct omap_chan *c = to_omap_dma_chan(chan); unsigned req = *(unsigned *)param; - return req == c->dma_sig; + if (req <= od->dma_requests) { + c->dma_sig = req; + return true; + } } return false; } diff --git a/kernel/drivers/dma/pch_dma.c b/kernel/drivers/dma/pch_dma.c index b859792dd..113605f6f 100644 --- a/kernel/drivers/dma/pch_dma.c +++ b/kernel/drivers/dma/pch_dma.c @@ -11,10 +11,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/dmaengine.h> diff --git a/kernel/drivers/dma/pl330.c b/kernel/drivers/dma/pl330.c index 3dabc52b9..17ee758b4 100644 --- a/kernel/drivers/dma/pl330.c +++ b/kernel/drivers/dma/pl330.c @@ -1198,6 +1198,9 @@ static inline int _loop(unsigned dry_run, u8 buf[], unsigned lcnt0, lcnt1, ljmp0, ljmp1; struct _arg_LPEND lpend; + if (*bursts == 1) + return _bursts(dry_run, buf, pxs, 1); + /* Max iterations possible in DMALP is 256 */ if (*bursts >= 256*256) { lcnt1 = 256; @@ -1424,8 +1427,8 @@ static int pl330_submit_req(struct pl330_thread *thrd, goto xfer_exit; if (ret > pl330->mcbufsz / 2) { - dev_info(pl330->ddma.dev, "%s:%d Trying increasing mcbufsz\n", - __func__, __LINE__); + dev_info(pl330->ddma.dev, "%s:%d Try increasing mcbufsz (%i/%i)\n", + __func__, __LINE__, ret, pl330->mcbufsz / 2); ret = -ENOMEM; goto xfer_exit; } @@ -2584,12 +2587,14 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst, { struct dma_pl330_desc *desc; struct dma_pl330_chan *pch = to_pchan(chan); - struct pl330_dmac *pl330 = pch->dmac; + struct pl330_dmac *pl330; int burst; if (unlikely(!pch || !len)) return NULL; + pl330 = pch->dmac; + desc = __pl330_prep_dma_memcpy(pch, dst, src, len); if (!desc) return NULL; diff --git a/kernel/drivers/dma/pxa_dma.c b/kernel/drivers/dma/pxa_dma.c new file mode 100644 index 000000000..a59061e42 --- /dev/null +++ b/kernel/drivers/dma/pxa_dma.c @@ -0,0 +1,1491 @@ +/* + * Copyright 2015 Robert Jarzmik <robert.jarzmik@free.fr> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/err.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/types.h> +#include <linux/interrupt.h> +#include <linux/dma-mapping.h> +#include <linux/slab.h> +#include <linux/dmaengine.h> +#include <linux/platform_device.h> +#include <linux/device.h> +#include <linux/platform_data/mmp_dma.h> +#include <linux/dmapool.h> +#include <linux/of_device.h> +#include <linux/of_dma.h> +#include <linux/of.h> +#include <linux/dma/pxa-dma.h> + +#include "dmaengine.h" +#include "virt-dma.h" + +#define DCSR(n) (0x0000 + ((n) << 2)) +#define DALGN(n) 0x00a0 +#define DINT 0x00f0 +#define DDADR(n) (0x0200 + ((n) << 4)) +#define DSADR(n) (0x0204 + ((n) << 4)) +#define DTADR(n) (0x0208 + ((n) << 4)) +#define DCMD(n) (0x020c + ((n) << 4)) + +#define PXA_DCSR_RUN BIT(31) /* Run Bit (read / write) */ +#define PXA_DCSR_NODESC BIT(30) /* No-Descriptor Fetch (read / write) */ +#define PXA_DCSR_STOPIRQEN BIT(29) /* Stop Interrupt Enable (R/W) */ +#define PXA_DCSR_REQPEND BIT(8) /* Request Pending (read-only) */ +#define PXA_DCSR_STOPSTATE BIT(3) /* Stop State (read-only) */ +#define PXA_DCSR_ENDINTR BIT(2) /* End Interrupt (read / write) */ +#define PXA_DCSR_STARTINTR BIT(1) /* Start Interrupt (read / write) */ +#define PXA_DCSR_BUSERR BIT(0) /* Bus Error Interrupt (read / write) */ + +#define PXA_DCSR_EORIRQEN BIT(28) /* End of Receive IRQ Enable (R/W) */ +#define PXA_DCSR_EORJMPEN BIT(27) /* Jump to next descriptor on EOR */ +#define PXA_DCSR_EORSTOPEN BIT(26) /* STOP on an EOR */ +#define PXA_DCSR_SETCMPST BIT(25) /* Set Descriptor Compare Status */ +#define PXA_DCSR_CLRCMPST BIT(24) /* Clear Descriptor Compare Status */ +#define PXA_DCSR_CMPST BIT(10) /* The Descriptor Compare Status */ +#define PXA_DCSR_EORINTR BIT(9) /* The end of Receive */ + +#define DRCMR_MAPVLD BIT(7) /* Map Valid (read / write) */ +#define DRCMR_CHLNUM 0x1f /* mask for Channel Number (read / write) */ + +#define DDADR_DESCADDR 0xfffffff0 /* Address of next descriptor (mask) */ +#define DDADR_STOP BIT(0) /* Stop (read / write) */ + +#define PXA_DCMD_INCSRCADDR BIT(31) /* Source Address Increment Setting. */ +#define PXA_DCMD_INCTRGADDR BIT(30) /* Target Address Increment Setting. */ +#define PXA_DCMD_FLOWSRC BIT(29) /* Flow Control by the source. */ +#define PXA_DCMD_FLOWTRG BIT(28) /* Flow Control by the target. */ +#define PXA_DCMD_STARTIRQEN BIT(22) /* Start Interrupt Enable */ +#define PXA_DCMD_ENDIRQEN BIT(21) /* End Interrupt Enable */ +#define PXA_DCMD_ENDIAN BIT(18) /* Device Endian-ness. */ +#define PXA_DCMD_BURST8 (1 << 16) /* 8 byte burst */ +#define PXA_DCMD_BURST16 (2 << 16) /* 16 byte burst */ +#define PXA_DCMD_BURST32 (3 << 16) /* 32 byte burst */ +#define PXA_DCMD_WIDTH1 (1 << 14) /* 1 byte width */ +#define PXA_DCMD_WIDTH2 (2 << 14) /* 2 byte width (HalfWord) */ +#define PXA_DCMD_WIDTH4 (3 << 14) /* 4 byte width (Word) */ +#define PXA_DCMD_LENGTH 0x01fff /* length mask (max = 8K - 1) */ + +#define PDMA_ALIGNMENT 3 +#define PDMA_MAX_DESC_BYTES (PXA_DCMD_LENGTH & ~((1 << PDMA_ALIGNMENT) - 1)) + +struct pxad_desc_hw { + u32 ddadr; /* Points to the next descriptor + flags */ + u32 dsadr; /* DSADR value for the current transfer */ + u32 dtadr; /* DTADR value for the current transfer */ + u32 dcmd; /* DCMD value for the current transfer */ +} __aligned(16); + +struct pxad_desc_sw { + struct virt_dma_desc vd; /* Virtual descriptor */ + int nb_desc; /* Number of hw. descriptors */ + size_t len; /* Number of bytes xfered */ + dma_addr_t first; /* First descriptor's addr */ + + /* At least one descriptor has an src/dst address not multiple of 8 */ + bool misaligned; + bool cyclic; + struct dma_pool *desc_pool; /* Channel's used allocator */ + + struct pxad_desc_hw *hw_desc[]; /* DMA coherent descriptors */ +}; + +struct pxad_phy { + int idx; + void __iomem *base; + struct pxad_chan *vchan; +}; + +struct pxad_chan { + struct virt_dma_chan vc; /* Virtual channel */ + u32 drcmr; /* Requestor of the channel */ + enum pxad_chan_prio prio; /* Required priority of phy */ + /* + * At least one desc_sw in submitted or issued transfers on this channel + * has one address such as: addr % 8 != 0. This implies the DALGN + * setting on the phy. + */ + bool misaligned; + struct dma_slave_config cfg; /* Runtime config */ + + /* protected by vc->lock */ + struct pxad_phy *phy; + struct dma_pool *desc_pool; /* Descriptors pool */ +}; + +struct pxad_device { + struct dma_device slave; + int nr_chans; + void __iomem *base; + struct pxad_phy *phys; + spinlock_t phy_lock; /* Phy association */ +#ifdef CONFIG_DEBUG_FS + struct dentry *dbgfs_root; + struct dentry *dbgfs_state; + struct dentry **dbgfs_chan; +#endif +}; + +#define tx_to_pxad_desc(tx) \ + container_of(tx, struct pxad_desc_sw, async_tx) +#define to_pxad_chan(dchan) \ + container_of(dchan, struct pxad_chan, vc.chan) +#define to_pxad_dev(dmadev) \ + container_of(dmadev, struct pxad_device, slave) +#define to_pxad_sw_desc(_vd) \ + container_of((_vd), struct pxad_desc_sw, vd) + +#define _phy_readl_relaxed(phy, _reg) \ + readl_relaxed((phy)->base + _reg((phy)->idx)) +#define phy_readl_relaxed(phy, _reg) \ + ({ \ + u32 _v; \ + _v = readl_relaxed((phy)->base + _reg((phy)->idx)); \ + dev_vdbg(&phy->vchan->vc.chan.dev->device, \ + "%s(): readl(%s): 0x%08x\n", __func__, #_reg, \ + _v); \ + _v; \ + }) +#define phy_writel(phy, val, _reg) \ + do { \ + writel((val), (phy)->base + _reg((phy)->idx)); \ + dev_vdbg(&phy->vchan->vc.chan.dev->device, \ + "%s(): writel(0x%08x, %s)\n", \ + __func__, (u32)(val), #_reg); \ + } while (0) +#define phy_writel_relaxed(phy, val, _reg) \ + do { \ + writel_relaxed((val), (phy)->base + _reg((phy)->idx)); \ + dev_vdbg(&phy->vchan->vc.chan.dev->device, \ + "%s(): writel_relaxed(0x%08x, %s)\n", \ + __func__, (u32)(val), #_reg); \ + } while (0) + +static unsigned int pxad_drcmr(unsigned int line) +{ + if (line < 64) + return 0x100 + line * 4; + return 0x1000 + line * 4; +} + +/* + * Debug fs + */ +#ifdef CONFIG_DEBUG_FS +#include <linux/debugfs.h> +#include <linux/uaccess.h> +#include <linux/seq_file.h> + +static int dbg_show_requester_chan(struct seq_file *s, void *p) +{ + struct pxad_phy *phy = s->private; + int i; + u32 drcmr; + + seq_printf(s, "DMA channel %d requester :\n", phy->idx); + for (i = 0; i < 70; i++) { + drcmr = readl_relaxed(phy->base + pxad_drcmr(i)); + if ((drcmr & DRCMR_CHLNUM) == phy->idx) + seq_printf(s, "\tRequester %d (MAPVLD=%d)\n", i, + !!(drcmr & DRCMR_MAPVLD)); + } + return 0; +} + +static inline int dbg_burst_from_dcmd(u32 dcmd) +{ + int burst = (dcmd >> 16) & 0x3; + + return burst ? 4 << burst : 0; +} + +static int is_phys_valid(unsigned long addr) +{ + return pfn_valid(__phys_to_pfn(addr)); +} + +#define PXA_DCSR_STR(flag) (dcsr & PXA_DCSR_##flag ? #flag" " : "") +#define PXA_DCMD_STR(flag) (dcmd & PXA_DCMD_##flag ? #flag" " : "") + +static int dbg_show_descriptors(struct seq_file *s, void *p) +{ + struct pxad_phy *phy = s->private; + int i, max_show = 20, burst, width; + u32 dcmd; + unsigned long phys_desc, ddadr; + struct pxad_desc_hw *desc; + + phys_desc = ddadr = _phy_readl_relaxed(phy, DDADR); + + seq_printf(s, "DMA channel %d descriptors :\n", phy->idx); + seq_printf(s, "[%03d] First descriptor unknown\n", 0); + for (i = 1; i < max_show && is_phys_valid(phys_desc); i++) { + desc = phys_to_virt(phys_desc); + dcmd = desc->dcmd; + burst = dbg_burst_from_dcmd(dcmd); + width = (1 << ((dcmd >> 14) & 0x3)) >> 1; + + seq_printf(s, "[%03d] Desc at %08lx(virt %p)\n", + i, phys_desc, desc); + seq_printf(s, "\tDDADR = %08x\n", desc->ddadr); + seq_printf(s, "\tDSADR = %08x\n", desc->dsadr); + seq_printf(s, "\tDTADR = %08x\n", desc->dtadr); + seq_printf(s, "\tDCMD = %08x (%s%s%s%s%s%s%sburst=%d width=%d len=%d)\n", + dcmd, + PXA_DCMD_STR(INCSRCADDR), PXA_DCMD_STR(INCTRGADDR), + PXA_DCMD_STR(FLOWSRC), PXA_DCMD_STR(FLOWTRG), + PXA_DCMD_STR(STARTIRQEN), PXA_DCMD_STR(ENDIRQEN), + PXA_DCMD_STR(ENDIAN), burst, width, + dcmd & PXA_DCMD_LENGTH); + phys_desc = desc->ddadr; + } + if (i == max_show) + seq_printf(s, "[%03d] Desc at %08lx ... max display reached\n", + i, phys_desc); + else + seq_printf(s, "[%03d] Desc at %08lx is %s\n", + i, phys_desc, phys_desc == DDADR_STOP ? + "DDADR_STOP" : "invalid"); + + return 0; +} + +static int dbg_show_chan_state(struct seq_file *s, void *p) +{ + struct pxad_phy *phy = s->private; + u32 dcsr, dcmd; + int burst, width; + static const char * const str_prio[] = { + "high", "normal", "low", "invalid" + }; + + dcsr = _phy_readl_relaxed(phy, DCSR); + dcmd = _phy_readl_relaxed(phy, DCMD); + burst = dbg_burst_from_dcmd(dcmd); + width = (1 << ((dcmd >> 14) & 0x3)) >> 1; + + seq_printf(s, "DMA channel %d\n", phy->idx); + seq_printf(s, "\tPriority : %s\n", + str_prio[(phy->idx & 0xf) / 4]); + seq_printf(s, "\tUnaligned transfer bit: %s\n", + _phy_readl_relaxed(phy, DALGN) & BIT(phy->idx) ? + "yes" : "no"); + seq_printf(s, "\tDCSR = %08x (%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", + dcsr, PXA_DCSR_STR(RUN), PXA_DCSR_STR(NODESC), + PXA_DCSR_STR(STOPIRQEN), PXA_DCSR_STR(EORIRQEN), + PXA_DCSR_STR(EORJMPEN), PXA_DCSR_STR(EORSTOPEN), + PXA_DCSR_STR(SETCMPST), PXA_DCSR_STR(CLRCMPST), + PXA_DCSR_STR(CMPST), PXA_DCSR_STR(EORINTR), + PXA_DCSR_STR(REQPEND), PXA_DCSR_STR(STOPSTATE), + PXA_DCSR_STR(ENDINTR), PXA_DCSR_STR(STARTINTR), + PXA_DCSR_STR(BUSERR)); + + seq_printf(s, "\tDCMD = %08x (%s%s%s%s%s%s%sburst=%d width=%d len=%d)\n", + dcmd, + PXA_DCMD_STR(INCSRCADDR), PXA_DCMD_STR(INCTRGADDR), + PXA_DCMD_STR(FLOWSRC), PXA_DCMD_STR(FLOWTRG), + PXA_DCMD_STR(STARTIRQEN), PXA_DCMD_STR(ENDIRQEN), + PXA_DCMD_STR(ENDIAN), burst, width, dcmd & PXA_DCMD_LENGTH); + seq_printf(s, "\tDSADR = %08x\n", _phy_readl_relaxed(phy, DSADR)); + seq_printf(s, "\tDTADR = %08x\n", _phy_readl_relaxed(phy, DTADR)); + seq_printf(s, "\tDDADR = %08x\n", _phy_readl_relaxed(phy, DDADR)); + + return 0; +} + +static int dbg_show_state(struct seq_file *s, void *p) +{ + struct pxad_device *pdev = s->private; + + /* basic device status */ + seq_puts(s, "DMA engine status\n"); + seq_printf(s, "\tChannel number: %d\n", pdev->nr_chans); + + return 0; +} + +#define DBGFS_FUNC_DECL(name) \ +static int dbg_open_##name(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, dbg_show_##name, inode->i_private); \ +} \ +static const struct file_operations dbg_fops_##name = { \ + .owner = THIS_MODULE, \ + .open = dbg_open_##name, \ + .llseek = seq_lseek, \ + .read = seq_read, \ + .release = single_release, \ +} + +DBGFS_FUNC_DECL(state); +DBGFS_FUNC_DECL(chan_state); +DBGFS_FUNC_DECL(descriptors); +DBGFS_FUNC_DECL(requester_chan); + +static struct dentry *pxad_dbg_alloc_chan(struct pxad_device *pdev, + int ch, struct dentry *chandir) +{ + char chan_name[11]; + struct dentry *chan, *chan_state = NULL, *chan_descr = NULL; + struct dentry *chan_reqs = NULL; + void *dt; + + scnprintf(chan_name, sizeof(chan_name), "%d", ch); + chan = debugfs_create_dir(chan_name, chandir); + dt = (void *)&pdev->phys[ch]; + + if (chan) + chan_state = debugfs_create_file("state", 0400, chan, dt, + &dbg_fops_chan_state); + if (chan_state) + chan_descr = debugfs_create_file("descriptors", 0400, chan, dt, + &dbg_fops_descriptors); + if (chan_descr) + chan_reqs = debugfs_create_file("requesters", 0400, chan, dt, + &dbg_fops_requester_chan); + if (!chan_reqs) + goto err_state; + + return chan; + +err_state: + debugfs_remove_recursive(chan); + return NULL; +} + +static void pxad_init_debugfs(struct pxad_device *pdev) +{ + int i; + struct dentry *chandir; + + pdev->dbgfs_root = debugfs_create_dir(dev_name(pdev->slave.dev), NULL); + if (IS_ERR(pdev->dbgfs_root) || !pdev->dbgfs_root) + goto err_root; + + pdev->dbgfs_state = debugfs_create_file("state", 0400, pdev->dbgfs_root, + pdev, &dbg_fops_state); + if (!pdev->dbgfs_state) + goto err_state; + + pdev->dbgfs_chan = + kmalloc_array(pdev->nr_chans, sizeof(*pdev->dbgfs_state), + GFP_KERNEL); + if (!pdev->dbgfs_chan) + goto err_alloc; + + chandir = debugfs_create_dir("channels", pdev->dbgfs_root); + if (!chandir) + goto err_chandir; + + for (i = 0; i < pdev->nr_chans; i++) { + pdev->dbgfs_chan[i] = pxad_dbg_alloc_chan(pdev, i, chandir); + if (!pdev->dbgfs_chan[i]) + goto err_chans; + } + + return; +err_chans: +err_chandir: + kfree(pdev->dbgfs_chan); +err_alloc: +err_state: + debugfs_remove_recursive(pdev->dbgfs_root); +err_root: + pr_err("pxad: debugfs is not available\n"); +} + +static void pxad_cleanup_debugfs(struct pxad_device *pdev) +{ + debugfs_remove_recursive(pdev->dbgfs_root); +} +#else +static inline void pxad_init_debugfs(struct pxad_device *pdev) {} +static inline void pxad_cleanup_debugfs(struct pxad_device *pdev) {} +#endif + +/* + * In the transition phase where legacy pxa handling is done at the same time as + * mmp_dma, the DMA physical channel split between the 2 DMA providers is done + * through legacy_reserved. Legacy code reserves DMA channels by settings + * corresponding bits in legacy_reserved. + */ +static u32 legacy_reserved; +static u32 legacy_unavailable; + +static struct pxad_phy *lookup_phy(struct pxad_chan *pchan) +{ + int prio, i; + struct pxad_device *pdev = to_pxad_dev(pchan->vc.chan.device); + struct pxad_phy *phy, *found = NULL; + unsigned long flags; + + /* + * dma channel priorities + * ch 0 - 3, 16 - 19 <--> (0) + * ch 4 - 7, 20 - 23 <--> (1) + * ch 8 - 11, 24 - 27 <--> (2) + * ch 12 - 15, 28 - 31 <--> (3) + */ + + spin_lock_irqsave(&pdev->phy_lock, flags); + for (prio = pchan->prio; prio >= PXAD_PRIO_HIGHEST; prio--) { + for (i = 0; i < pdev->nr_chans; i++) { + if (prio != (i & 0xf) >> 2) + continue; + if ((i < 32) && (legacy_reserved & BIT(i))) + continue; + phy = &pdev->phys[i]; + if (!phy->vchan) { + phy->vchan = pchan; + found = phy; + if (i < 32) + legacy_unavailable |= BIT(i); + goto out_unlock; + } + } + } + +out_unlock: + spin_unlock_irqrestore(&pdev->phy_lock, flags); + dev_dbg(&pchan->vc.chan.dev->device, + "%s(): phy=%p(%d)\n", __func__, found, + found ? found->idx : -1); + + return found; +} + +static void pxad_free_phy(struct pxad_chan *chan) +{ + struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device); + unsigned long flags; + u32 reg; + int i; + + dev_dbg(&chan->vc.chan.dev->device, + "%s(): freeing\n", __func__); + if (!chan->phy) + return; + + /* clear the channel mapping in DRCMR */ + if (chan->drcmr <= DRCMR_CHLNUM) { + reg = pxad_drcmr(chan->drcmr); + writel_relaxed(0, chan->phy->base + reg); + } + + spin_lock_irqsave(&pdev->phy_lock, flags); + for (i = 0; i < 32; i++) + if (chan->phy == &pdev->phys[i]) + legacy_unavailable &= ~BIT(i); + chan->phy->vchan = NULL; + chan->phy = NULL; + spin_unlock_irqrestore(&pdev->phy_lock, flags); +} + +static bool is_chan_running(struct pxad_chan *chan) +{ + u32 dcsr; + struct pxad_phy *phy = chan->phy; + + if (!phy) + return false; + dcsr = phy_readl_relaxed(phy, DCSR); + return dcsr & PXA_DCSR_RUN; +} + +static bool is_running_chan_misaligned(struct pxad_chan *chan) +{ + u32 dalgn; + + BUG_ON(!chan->phy); + dalgn = phy_readl_relaxed(chan->phy, DALGN); + return dalgn & (BIT(chan->phy->idx)); +} + +static void phy_enable(struct pxad_phy *phy, bool misaligned) +{ + u32 reg, dalgn; + + if (!phy->vchan) + return; + + dev_dbg(&phy->vchan->vc.chan.dev->device, + "%s(); phy=%p(%d) misaligned=%d\n", __func__, + phy, phy->idx, misaligned); + + if (phy->vchan->drcmr <= DRCMR_CHLNUM) { + reg = pxad_drcmr(phy->vchan->drcmr); + writel_relaxed(DRCMR_MAPVLD | phy->idx, phy->base + reg); + } + + dalgn = phy_readl_relaxed(phy, DALGN); + if (misaligned) + dalgn |= BIT(phy->idx); + else + dalgn &= ~BIT(phy->idx); + phy_writel_relaxed(phy, dalgn, DALGN); + + phy_writel(phy, PXA_DCSR_STOPIRQEN | PXA_DCSR_ENDINTR | + PXA_DCSR_BUSERR | PXA_DCSR_RUN, DCSR); +} + +static void phy_disable(struct pxad_phy *phy) +{ + u32 dcsr; + + if (!phy) + return; + + dcsr = phy_readl_relaxed(phy, DCSR); + dev_dbg(&phy->vchan->vc.chan.dev->device, + "%s(): phy=%p(%d)\n", __func__, phy, phy->idx); + phy_writel(phy, dcsr & ~PXA_DCSR_RUN & ~PXA_DCSR_STOPIRQEN, DCSR); +} + +static void pxad_launch_chan(struct pxad_chan *chan, + struct pxad_desc_sw *desc) +{ + dev_dbg(&chan->vc.chan.dev->device, + "%s(): desc=%p\n", __func__, desc); + if (!chan->phy) { + chan->phy = lookup_phy(chan); + if (!chan->phy) { + dev_dbg(&chan->vc.chan.dev->device, + "%s(): no free dma channel\n", __func__); + return; + } + } + + /* + * Program the descriptor's address into the DMA controller, + * then start the DMA transaction + */ + phy_writel(chan->phy, desc->first, DDADR); + phy_enable(chan->phy, chan->misaligned); +} + +static void set_updater_desc(struct pxad_desc_sw *sw_desc, + unsigned long flags) +{ + struct pxad_desc_hw *updater = + sw_desc->hw_desc[sw_desc->nb_desc - 1]; + dma_addr_t dma = sw_desc->hw_desc[sw_desc->nb_desc - 2]->ddadr; + + updater->ddadr = DDADR_STOP; + updater->dsadr = dma; + updater->dtadr = dma + 8; + updater->dcmd = PXA_DCMD_WIDTH4 | PXA_DCMD_BURST32 | + (PXA_DCMD_LENGTH & sizeof(u32)); + if (flags & DMA_PREP_INTERRUPT) + updater->dcmd |= PXA_DCMD_ENDIRQEN; + if (sw_desc->cyclic) + sw_desc->hw_desc[sw_desc->nb_desc - 2]->ddadr = sw_desc->first; +} + +static bool is_desc_completed(struct virt_dma_desc *vd) +{ + struct pxad_desc_sw *sw_desc = to_pxad_sw_desc(vd); + struct pxad_desc_hw *updater = + sw_desc->hw_desc[sw_desc->nb_desc - 1]; + + return updater->dtadr != (updater->dsadr + 8); +} + +static void pxad_desc_chain(struct virt_dma_desc *vd1, + struct virt_dma_desc *vd2) +{ + struct pxad_desc_sw *desc1 = to_pxad_sw_desc(vd1); + struct pxad_desc_sw *desc2 = to_pxad_sw_desc(vd2); + dma_addr_t dma_to_chain; + + dma_to_chain = desc2->first; + desc1->hw_desc[desc1->nb_desc - 1]->ddadr = dma_to_chain; +} + +static bool pxad_try_hotchain(struct virt_dma_chan *vc, + struct virt_dma_desc *vd) +{ + struct virt_dma_desc *vd_last_issued = NULL; + struct pxad_chan *chan = to_pxad_chan(&vc->chan); + + /* + * Attempt to hot chain the tx if the phy is still running. This is + * considered successful only if either the channel is still running + * after the chaining, or if the chained transfer is completed after + * having been hot chained. + * A change of alignment is not allowed, and forbids hotchaining. + */ + if (is_chan_running(chan)) { + BUG_ON(list_empty(&vc->desc_issued)); + + if (!is_running_chan_misaligned(chan) && + to_pxad_sw_desc(vd)->misaligned) + return false; + + vd_last_issued = list_entry(vc->desc_issued.prev, + struct virt_dma_desc, node); + pxad_desc_chain(vd_last_issued, vd); + if (is_chan_running(chan) || is_desc_completed(vd_last_issued)) + return true; + } + + return false; +} + +static unsigned int clear_chan_irq(struct pxad_phy *phy) +{ + u32 dcsr; + u32 dint = readl(phy->base + DINT); + + if (!(dint & BIT(phy->idx))) + return PXA_DCSR_RUN; + + /* clear irq */ + dcsr = phy_readl_relaxed(phy, DCSR); + phy_writel(phy, dcsr, DCSR); + if ((dcsr & PXA_DCSR_BUSERR) && (phy->vchan)) + dev_warn(&phy->vchan->vc.chan.dev->device, + "%s(chan=%p): PXA_DCSR_BUSERR\n", + __func__, &phy->vchan); + + return dcsr & ~PXA_DCSR_RUN; +} + +static irqreturn_t pxad_chan_handler(int irq, void *dev_id) +{ + struct pxad_phy *phy = dev_id; + struct pxad_chan *chan = phy->vchan; + struct virt_dma_desc *vd, *tmp; + unsigned int dcsr; + unsigned long flags; + + BUG_ON(!chan); + + dcsr = clear_chan_irq(phy); + if (dcsr & PXA_DCSR_RUN) + return IRQ_NONE; + + spin_lock_irqsave(&chan->vc.lock, flags); + list_for_each_entry_safe(vd, tmp, &chan->vc.desc_issued, node) { + dev_dbg(&chan->vc.chan.dev->device, + "%s(): checking txd %p[%x]: completed=%d\n", + __func__, vd, vd->tx.cookie, is_desc_completed(vd)); + if (to_pxad_sw_desc(vd)->cyclic) { + vchan_cyclic_callback(vd); + break; + } + if (is_desc_completed(vd)) { + list_del(&vd->node); + vchan_cookie_complete(vd); + } else { + break; + } + } + + if (dcsr & PXA_DCSR_STOPSTATE) { + dev_dbg(&chan->vc.chan.dev->device, + "%s(): channel stopped, submitted_empty=%d issued_empty=%d", + __func__, + list_empty(&chan->vc.desc_submitted), + list_empty(&chan->vc.desc_issued)); + phy_writel_relaxed(phy, dcsr & ~PXA_DCSR_STOPIRQEN, DCSR); + + if (list_empty(&chan->vc.desc_issued)) { + chan->misaligned = + !list_empty(&chan->vc.desc_submitted); + } else { + vd = list_first_entry(&chan->vc.desc_issued, + struct virt_dma_desc, node); + pxad_launch_chan(chan, to_pxad_sw_desc(vd)); + } + } + spin_unlock_irqrestore(&chan->vc.lock, flags); + + return IRQ_HANDLED; +} + +static irqreturn_t pxad_int_handler(int irq, void *dev_id) +{ + struct pxad_device *pdev = dev_id; + struct pxad_phy *phy; + u32 dint = readl(pdev->base + DINT); + int i, ret = IRQ_NONE; + + while (dint) { + i = __ffs(dint); + dint &= (dint - 1); + phy = &pdev->phys[i]; + if ((i < 32) && (legacy_reserved & BIT(i))) + continue; + if (pxad_chan_handler(irq, phy) == IRQ_HANDLED) + ret = IRQ_HANDLED; + } + + return ret; +} + +static int pxad_alloc_chan_resources(struct dma_chan *dchan) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device); + + if (chan->desc_pool) + return 1; + + chan->desc_pool = dma_pool_create(dma_chan_name(dchan), + pdev->slave.dev, + sizeof(struct pxad_desc_hw), + __alignof__(struct pxad_desc_hw), + 0); + if (!chan->desc_pool) { + dev_err(&chan->vc.chan.dev->device, + "%s(): unable to allocate descriptor pool\n", + __func__); + return -ENOMEM; + } + + return 1; +} + +static void pxad_free_chan_resources(struct dma_chan *dchan) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + + vchan_free_chan_resources(&chan->vc); + dma_pool_destroy(chan->desc_pool); + chan->desc_pool = NULL; + +} + +static void pxad_free_desc(struct virt_dma_desc *vd) +{ + int i; + dma_addr_t dma; + struct pxad_desc_sw *sw_desc = to_pxad_sw_desc(vd); + + BUG_ON(sw_desc->nb_desc == 0); + for (i = sw_desc->nb_desc - 1; i >= 0; i--) { + if (i > 0) + dma = sw_desc->hw_desc[i - 1]->ddadr; + else + dma = sw_desc->first; + dma_pool_free(sw_desc->desc_pool, + sw_desc->hw_desc[i], dma); + } + sw_desc->nb_desc = 0; + kfree(sw_desc); +} + +static struct pxad_desc_sw * +pxad_alloc_desc(struct pxad_chan *chan, unsigned int nb_hw_desc) +{ + struct pxad_desc_sw *sw_desc; + dma_addr_t dma; + int i; + + sw_desc = kzalloc(sizeof(*sw_desc) + + nb_hw_desc * sizeof(struct pxad_desc_hw *), + GFP_NOWAIT); + if (!sw_desc) + return NULL; + sw_desc->desc_pool = chan->desc_pool; + + for (i = 0; i < nb_hw_desc; i++) { + sw_desc->hw_desc[i] = dma_pool_alloc(sw_desc->desc_pool, + GFP_NOWAIT, &dma); + if (!sw_desc->hw_desc[i]) { + dev_err(&chan->vc.chan.dev->device, + "%s(): Couldn't allocate the %dth hw_desc from dma_pool %p\n", + __func__, i, sw_desc->desc_pool); + goto err; + } + + if (i == 0) + sw_desc->first = dma; + else + sw_desc->hw_desc[i - 1]->ddadr = dma; + sw_desc->nb_desc++; + } + + return sw_desc; +err: + pxad_free_desc(&sw_desc->vd); + return NULL; +} + +static dma_cookie_t pxad_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct virt_dma_chan *vc = to_virt_chan(tx->chan); + struct pxad_chan *chan = to_pxad_chan(&vc->chan); + struct virt_dma_desc *vd_chained = NULL, + *vd = container_of(tx, struct virt_dma_desc, tx); + dma_cookie_t cookie; + unsigned long flags; + + set_updater_desc(to_pxad_sw_desc(vd), tx->flags); + + spin_lock_irqsave(&vc->lock, flags); + cookie = dma_cookie_assign(tx); + + if (list_empty(&vc->desc_submitted) && pxad_try_hotchain(vc, vd)) { + list_move_tail(&vd->node, &vc->desc_issued); + dev_dbg(&chan->vc.chan.dev->device, + "%s(): txd %p[%x]: submitted (hot linked)\n", + __func__, vd, cookie); + goto out; + } + + /* + * Fallback to placing the tx in the submitted queue + */ + if (!list_empty(&vc->desc_submitted)) { + vd_chained = list_entry(vc->desc_submitted.prev, + struct virt_dma_desc, node); + /* + * Only chain the descriptors if no new misalignment is + * introduced. If a new misalignment is chained, let the channel + * stop, and be relaunched in misalign mode from the irq + * handler. + */ + if (chan->misaligned || !to_pxad_sw_desc(vd)->misaligned) + pxad_desc_chain(vd_chained, vd); + else + vd_chained = NULL; + } + dev_dbg(&chan->vc.chan.dev->device, + "%s(): txd %p[%x]: submitted (%s linked)\n", + __func__, vd, cookie, vd_chained ? "cold" : "not"); + list_move_tail(&vd->node, &vc->desc_submitted); + chan->misaligned |= to_pxad_sw_desc(vd)->misaligned; + +out: + spin_unlock_irqrestore(&vc->lock, flags); + return cookie; +} + +static void pxad_issue_pending(struct dma_chan *dchan) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + struct virt_dma_desc *vd_first; + unsigned long flags; + + spin_lock_irqsave(&chan->vc.lock, flags); + if (list_empty(&chan->vc.desc_submitted)) + goto out; + + vd_first = list_first_entry(&chan->vc.desc_submitted, + struct virt_dma_desc, node); + dev_dbg(&chan->vc.chan.dev->device, + "%s(): txd %p[%x]", __func__, vd_first, vd_first->tx.cookie); + + vchan_issue_pending(&chan->vc); + if (!pxad_try_hotchain(&chan->vc, vd_first)) + pxad_launch_chan(chan, to_pxad_sw_desc(vd_first)); +out: + spin_unlock_irqrestore(&chan->vc.lock, flags); +} + +static inline struct dma_async_tx_descriptor * +pxad_tx_prep(struct virt_dma_chan *vc, struct virt_dma_desc *vd, + unsigned long tx_flags) +{ + struct dma_async_tx_descriptor *tx; + struct pxad_chan *chan = container_of(vc, struct pxad_chan, vc); + + INIT_LIST_HEAD(&vd->node); + tx = vchan_tx_prep(vc, vd, tx_flags); + tx->tx_submit = pxad_tx_submit; + dev_dbg(&chan->vc.chan.dev->device, + "%s(): vc=%p txd=%p[%x] flags=0x%lx\n", __func__, + vc, vd, vd->tx.cookie, + tx_flags); + + return tx; +} + +static void pxad_get_config(struct pxad_chan *chan, + enum dma_transfer_direction dir, + u32 *dcmd, u32 *dev_src, u32 *dev_dst) +{ + u32 maxburst = 0, dev_addr = 0; + enum dma_slave_buswidth width = DMA_SLAVE_BUSWIDTH_UNDEFINED; + + *dcmd = 0; + if (dir == DMA_DEV_TO_MEM) { + maxburst = chan->cfg.src_maxburst; + width = chan->cfg.src_addr_width; + dev_addr = chan->cfg.src_addr; + *dev_src = dev_addr; + *dcmd |= PXA_DCMD_INCTRGADDR; + if (chan->drcmr <= DRCMR_CHLNUM) + *dcmd |= PXA_DCMD_FLOWSRC; + } + if (dir == DMA_MEM_TO_DEV) { + maxburst = chan->cfg.dst_maxburst; + width = chan->cfg.dst_addr_width; + dev_addr = chan->cfg.dst_addr; + *dev_dst = dev_addr; + *dcmd |= PXA_DCMD_INCSRCADDR; + if (chan->drcmr <= DRCMR_CHLNUM) + *dcmd |= PXA_DCMD_FLOWTRG; + } + if (dir == DMA_MEM_TO_MEM) + *dcmd |= PXA_DCMD_BURST32 | PXA_DCMD_INCTRGADDR | + PXA_DCMD_INCSRCADDR; + + dev_dbg(&chan->vc.chan.dev->device, + "%s(): dev_addr=0x%x maxburst=%d width=%d dir=%d\n", + __func__, dev_addr, maxburst, width, dir); + + if (width == DMA_SLAVE_BUSWIDTH_1_BYTE) + *dcmd |= PXA_DCMD_WIDTH1; + else if (width == DMA_SLAVE_BUSWIDTH_2_BYTES) + *dcmd |= PXA_DCMD_WIDTH2; + else if (width == DMA_SLAVE_BUSWIDTH_4_BYTES) + *dcmd |= PXA_DCMD_WIDTH4; + + if (maxburst == 8) + *dcmd |= PXA_DCMD_BURST8; + else if (maxburst == 16) + *dcmd |= PXA_DCMD_BURST16; + else if (maxburst == 32) + *dcmd |= PXA_DCMD_BURST32; + + /* FIXME: drivers should be ported over to use the filter + * function. Once that's done, the following two lines can + * be removed. + */ + if (chan->cfg.slave_id) + chan->drcmr = chan->cfg.slave_id; +} + +static struct dma_async_tx_descriptor * +pxad_prep_memcpy(struct dma_chan *dchan, + dma_addr_t dma_dst, dma_addr_t dma_src, + size_t len, unsigned long flags) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + struct pxad_desc_sw *sw_desc; + struct pxad_desc_hw *hw_desc; + u32 dcmd; + unsigned int i, nb_desc = 0; + size_t copy; + + if (!dchan || !len) + return NULL; + + dev_dbg(&chan->vc.chan.dev->device, + "%s(): dma_dst=0x%lx dma_src=0x%lx len=%zu flags=%lx\n", + __func__, (unsigned long)dma_dst, (unsigned long)dma_src, + len, flags); + pxad_get_config(chan, DMA_MEM_TO_MEM, &dcmd, NULL, NULL); + + nb_desc = DIV_ROUND_UP(len, PDMA_MAX_DESC_BYTES); + sw_desc = pxad_alloc_desc(chan, nb_desc + 1); + if (!sw_desc) + return NULL; + sw_desc->len = len; + + if (!IS_ALIGNED(dma_src, 1 << PDMA_ALIGNMENT) || + !IS_ALIGNED(dma_dst, 1 << PDMA_ALIGNMENT)) + sw_desc->misaligned = true; + + i = 0; + do { + hw_desc = sw_desc->hw_desc[i++]; + copy = min_t(size_t, len, PDMA_MAX_DESC_BYTES); + hw_desc->dcmd = dcmd | (PXA_DCMD_LENGTH & copy); + hw_desc->dsadr = dma_src; + hw_desc->dtadr = dma_dst; + len -= copy; + dma_src += copy; + dma_dst += copy; + } while (len); + set_updater_desc(sw_desc, flags); + + return pxad_tx_prep(&chan->vc, &sw_desc->vd, flags); +} + +static struct dma_async_tx_descriptor * +pxad_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction dir, + unsigned long flags, void *context) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + struct pxad_desc_sw *sw_desc; + size_t len, avail; + struct scatterlist *sg; + dma_addr_t dma; + u32 dcmd, dsadr = 0, dtadr = 0; + unsigned int nb_desc = 0, i, j = 0; + + if ((sgl == NULL) || (sg_len == 0)) + return NULL; + + pxad_get_config(chan, dir, &dcmd, &dsadr, &dtadr); + dev_dbg(&chan->vc.chan.dev->device, + "%s(): dir=%d flags=%lx\n", __func__, dir, flags); + + for_each_sg(sgl, sg, sg_len, i) + nb_desc += DIV_ROUND_UP(sg_dma_len(sg), PDMA_MAX_DESC_BYTES); + sw_desc = pxad_alloc_desc(chan, nb_desc + 1); + if (!sw_desc) + return NULL; + + for_each_sg(sgl, sg, sg_len, i) { + dma = sg_dma_address(sg); + avail = sg_dma_len(sg); + sw_desc->len += avail; + + do { + len = min_t(size_t, avail, PDMA_MAX_DESC_BYTES); + if (dma & 0x7) + sw_desc->misaligned = true; + + sw_desc->hw_desc[j]->dcmd = + dcmd | (PXA_DCMD_LENGTH & len); + sw_desc->hw_desc[j]->dsadr = dsadr ? dsadr : dma; + sw_desc->hw_desc[j++]->dtadr = dtadr ? dtadr : dma; + + dma += len; + avail -= len; + } while (avail); + } + set_updater_desc(sw_desc, flags); + + return pxad_tx_prep(&chan->vc, &sw_desc->vd, flags); +} + +static struct dma_async_tx_descriptor * +pxad_prep_dma_cyclic(struct dma_chan *dchan, + dma_addr_t buf_addr, size_t len, size_t period_len, + enum dma_transfer_direction dir, unsigned long flags) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + struct pxad_desc_sw *sw_desc; + struct pxad_desc_hw **phw_desc; + dma_addr_t dma; + u32 dcmd, dsadr = 0, dtadr = 0; + unsigned int nb_desc = 0; + + if (!dchan || !len || !period_len) + return NULL; + if ((dir != DMA_DEV_TO_MEM) && (dir != DMA_MEM_TO_DEV)) { + dev_err(&chan->vc.chan.dev->device, + "Unsupported direction for cyclic DMA\n"); + return NULL; + } + /* the buffer length must be a multiple of period_len */ + if (len % period_len != 0 || period_len > PDMA_MAX_DESC_BYTES || + !IS_ALIGNED(period_len, 1 << PDMA_ALIGNMENT)) + return NULL; + + pxad_get_config(chan, dir, &dcmd, &dsadr, &dtadr); + dcmd |= PXA_DCMD_ENDIRQEN | (PXA_DCMD_LENGTH & period_len); + dev_dbg(&chan->vc.chan.dev->device, + "%s(): buf_addr=0x%lx len=%zu period=%zu dir=%d flags=%lx\n", + __func__, (unsigned long)buf_addr, len, period_len, dir, flags); + + nb_desc = DIV_ROUND_UP(period_len, PDMA_MAX_DESC_BYTES); + nb_desc *= DIV_ROUND_UP(len, period_len); + sw_desc = pxad_alloc_desc(chan, nb_desc + 1); + if (!sw_desc) + return NULL; + sw_desc->cyclic = true; + sw_desc->len = len; + + phw_desc = sw_desc->hw_desc; + dma = buf_addr; + do { + phw_desc[0]->dsadr = dsadr ? dsadr : dma; + phw_desc[0]->dtadr = dtadr ? dtadr : dma; + phw_desc[0]->dcmd = dcmd; + phw_desc++; + dma += period_len; + len -= period_len; + } while (len); + set_updater_desc(sw_desc, flags); + + return pxad_tx_prep(&chan->vc, &sw_desc->vd, flags); +} + +static int pxad_config(struct dma_chan *dchan, + struct dma_slave_config *cfg) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + + if (!dchan) + return -EINVAL; + + chan->cfg = *cfg; + return 0; +} + +static int pxad_terminate_all(struct dma_chan *dchan) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device); + struct virt_dma_desc *vd = NULL; + unsigned long flags; + struct pxad_phy *phy; + LIST_HEAD(head); + + dev_dbg(&chan->vc.chan.dev->device, + "%s(): vchan %p: terminate all\n", __func__, &chan->vc); + + spin_lock_irqsave(&chan->vc.lock, flags); + vchan_get_all_descriptors(&chan->vc, &head); + + list_for_each_entry(vd, &head, node) { + dev_dbg(&chan->vc.chan.dev->device, + "%s(): cancelling txd %p[%x] (completed=%d)", __func__, + vd, vd->tx.cookie, is_desc_completed(vd)); + } + + phy = chan->phy; + if (phy) { + phy_disable(chan->phy); + pxad_free_phy(chan); + chan->phy = NULL; + spin_lock(&pdev->phy_lock); + phy->vchan = NULL; + spin_unlock(&pdev->phy_lock); + } + spin_unlock_irqrestore(&chan->vc.lock, flags); + vchan_dma_desc_free_list(&chan->vc, &head); + + return 0; +} + +static unsigned int pxad_residue(struct pxad_chan *chan, + dma_cookie_t cookie) +{ + struct virt_dma_desc *vd = NULL; + struct pxad_desc_sw *sw_desc = NULL; + struct pxad_desc_hw *hw_desc = NULL; + u32 curr, start, len, end, residue = 0; + unsigned long flags; + bool passed = false; + int i; + + /* + * If the channel does not have a phy pointer anymore, it has already + * been completed. Therefore, its residue is 0. + */ + if (!chan->phy) + return 0; + + spin_lock_irqsave(&chan->vc.lock, flags); + + vd = vchan_find_desc(&chan->vc, cookie); + if (!vd) + goto out; + + sw_desc = to_pxad_sw_desc(vd); + if (sw_desc->hw_desc[0]->dcmd & PXA_DCMD_INCSRCADDR) + curr = phy_readl_relaxed(chan->phy, DSADR); + else + curr = phy_readl_relaxed(chan->phy, DTADR); + + /* + * curr has to be actually read before checking descriptor + * completion, so that a curr inside a status updater + * descriptor implies the following test returns true, and + * preventing reordering of curr load and the test. + */ + rmb(); + if (is_desc_completed(vd)) + goto out; + + for (i = 0; i < sw_desc->nb_desc - 1; i++) { + hw_desc = sw_desc->hw_desc[i]; + if (sw_desc->hw_desc[0]->dcmd & PXA_DCMD_INCSRCADDR) + start = hw_desc->dsadr; + else + start = hw_desc->dtadr; + len = hw_desc->dcmd & PXA_DCMD_LENGTH; + end = start + len; + + /* + * 'passed' will be latched once we found the descriptor + * which lies inside the boundaries of the curr + * pointer. All descriptors that occur in the list + * _after_ we found that partially handled descriptor + * are still to be processed and are hence added to the + * residual bytes counter. + */ + + if (passed) { + residue += len; + } else if (curr >= start && curr <= end) { + residue += end - curr; + passed = true; + } + } + if (!passed) + residue = sw_desc->len; + +out: + spin_unlock_irqrestore(&chan->vc.lock, flags); + dev_dbg(&chan->vc.chan.dev->device, + "%s(): txd %p[%x] sw_desc=%p: %d\n", + __func__, vd, cookie, sw_desc, residue); + return residue; +} + +static enum dma_status pxad_tx_status(struct dma_chan *dchan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + enum dma_status ret; + + ret = dma_cookie_status(dchan, cookie, txstate); + if (likely(txstate && (ret != DMA_ERROR))) + dma_set_residue(txstate, pxad_residue(chan, cookie)); + + return ret; +} + +static void pxad_free_channels(struct dma_device *dmadev) +{ + struct pxad_chan *c, *cn; + + list_for_each_entry_safe(c, cn, &dmadev->channels, + vc.chan.device_node) { + list_del(&c->vc.chan.device_node); + tasklet_kill(&c->vc.task); + } +} + +static int pxad_remove(struct platform_device *op) +{ + struct pxad_device *pdev = platform_get_drvdata(op); + + pxad_cleanup_debugfs(pdev); + pxad_free_channels(&pdev->slave); + dma_async_device_unregister(&pdev->slave); + return 0; +} + +static int pxad_init_phys(struct platform_device *op, + struct pxad_device *pdev, + unsigned int nb_phy_chans) +{ + int irq0, irq, nr_irq = 0, i, ret; + struct pxad_phy *phy; + + irq0 = platform_get_irq(op, 0); + if (irq0 < 0) + return irq0; + + pdev->phys = devm_kcalloc(&op->dev, nb_phy_chans, + sizeof(pdev->phys[0]), GFP_KERNEL); + if (!pdev->phys) + return -ENOMEM; + + for (i = 0; i < nb_phy_chans; i++) + if (platform_get_irq(op, i) > 0) + nr_irq++; + + for (i = 0; i < nb_phy_chans; i++) { + phy = &pdev->phys[i]; + phy->base = pdev->base; + phy->idx = i; + irq = platform_get_irq(op, i); + if ((nr_irq > 1) && (irq > 0)) + ret = devm_request_irq(&op->dev, irq, + pxad_chan_handler, + IRQF_SHARED, "pxa-dma", phy); + if ((nr_irq == 1) && (i == 0)) + ret = devm_request_irq(&op->dev, irq0, + pxad_int_handler, + IRQF_SHARED, "pxa-dma", pdev); + if (ret) { + dev_err(pdev->slave.dev, + "%s(): can't request irq %d:%d\n", __func__, + irq, ret); + return ret; + } + } + + return 0; +} + +static const struct of_device_id const pxad_dt_ids[] = { + { .compatible = "marvell,pdma-1.0", }, + {} +}; +MODULE_DEVICE_TABLE(of, pxad_dt_ids); + +static struct dma_chan *pxad_dma_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct pxad_device *d = ofdma->of_dma_data; + struct dma_chan *chan; + + chan = dma_get_any_slave_channel(&d->slave); + if (!chan) + return NULL; + + to_pxad_chan(chan)->drcmr = dma_spec->args[0]; + to_pxad_chan(chan)->prio = dma_spec->args[1]; + + return chan; +} + +static int pxad_init_dmadev(struct platform_device *op, + struct pxad_device *pdev, + unsigned int nr_phy_chans) +{ + int ret; + unsigned int i; + struct pxad_chan *c; + + pdev->nr_chans = nr_phy_chans; + INIT_LIST_HEAD(&pdev->slave.channels); + pdev->slave.device_alloc_chan_resources = pxad_alloc_chan_resources; + pdev->slave.device_free_chan_resources = pxad_free_chan_resources; + pdev->slave.device_tx_status = pxad_tx_status; + pdev->slave.device_issue_pending = pxad_issue_pending; + pdev->slave.device_config = pxad_config; + pdev->slave.device_terminate_all = pxad_terminate_all; + + if (op->dev.coherent_dma_mask) + dma_set_mask(&op->dev, op->dev.coherent_dma_mask); + else + dma_set_mask(&op->dev, DMA_BIT_MASK(32)); + + ret = pxad_init_phys(op, pdev, nr_phy_chans); + if (ret) + return ret; + + for (i = 0; i < nr_phy_chans; i++) { + c = devm_kzalloc(&op->dev, sizeof(*c), GFP_KERNEL); + if (!c) + return -ENOMEM; + c->vc.desc_free = pxad_free_desc; + vchan_init(&c->vc, &pdev->slave); + } + + return dma_async_device_register(&pdev->slave); +} + +static int pxad_probe(struct platform_device *op) +{ + struct pxad_device *pdev; + const struct of_device_id *of_id; + struct mmp_dma_platdata *pdata = dev_get_platdata(&op->dev); + struct resource *iores; + int ret, dma_channels = 0; + const enum dma_slave_buswidth widths = + DMA_SLAVE_BUSWIDTH_1_BYTE | DMA_SLAVE_BUSWIDTH_2_BYTES | + DMA_SLAVE_BUSWIDTH_4_BYTES; + + pdev = devm_kzalloc(&op->dev, sizeof(*pdev), GFP_KERNEL); + if (!pdev) + return -ENOMEM; + + spin_lock_init(&pdev->phy_lock); + + iores = platform_get_resource(op, IORESOURCE_MEM, 0); + pdev->base = devm_ioremap_resource(&op->dev, iores); + if (IS_ERR(pdev->base)) + return PTR_ERR(pdev->base); + + of_id = of_match_device(pxad_dt_ids, &op->dev); + if (of_id) + of_property_read_u32(op->dev.of_node, "#dma-channels", + &dma_channels); + else if (pdata && pdata->dma_channels) + dma_channels = pdata->dma_channels; + else + dma_channels = 32; /* default 32 channel */ + + dma_cap_set(DMA_SLAVE, pdev->slave.cap_mask); + dma_cap_set(DMA_MEMCPY, pdev->slave.cap_mask); + dma_cap_set(DMA_CYCLIC, pdev->slave.cap_mask); + dma_cap_set(DMA_PRIVATE, pdev->slave.cap_mask); + pdev->slave.device_prep_dma_memcpy = pxad_prep_memcpy; + pdev->slave.device_prep_slave_sg = pxad_prep_slave_sg; + pdev->slave.device_prep_dma_cyclic = pxad_prep_dma_cyclic; + + pdev->slave.copy_align = PDMA_ALIGNMENT; + pdev->slave.src_addr_widths = widths; + pdev->slave.dst_addr_widths = widths; + pdev->slave.directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM); + pdev->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + + pdev->slave.dev = &op->dev; + ret = pxad_init_dmadev(op, pdev, dma_channels); + if (ret) { + dev_err(pdev->slave.dev, "unable to register\n"); + return ret; + } + + if (op->dev.of_node) { + /* Device-tree DMA controller registration */ + ret = of_dma_controller_register(op->dev.of_node, + pxad_dma_xlate, pdev); + if (ret < 0) { + dev_err(pdev->slave.dev, + "of_dma_controller_register failed\n"); + return ret; + } + } + + platform_set_drvdata(op, pdev); + pxad_init_debugfs(pdev); + dev_info(pdev->slave.dev, "initialized %d channels\n", dma_channels); + return 0; +} + +static const struct platform_device_id pxad_id_table[] = { + { "pxa-dma", }, + { }, +}; + +static struct platform_driver pxad_driver = { + .driver = { + .name = "pxa-dma", + .of_match_table = pxad_dt_ids, + }, + .id_table = pxad_id_table, + .probe = pxad_probe, + .remove = pxad_remove, +}; + +bool pxad_filter_fn(struct dma_chan *chan, void *param) +{ + struct pxad_chan *c = to_pxad_chan(chan); + struct pxad_param *p = param; + + if (chan->device->dev->driver != &pxad_driver.driver) + return false; + + c->drcmr = p->drcmr; + c->prio = p->prio; + + return true; +} +EXPORT_SYMBOL_GPL(pxad_filter_fn); + +int pxad_toggle_reserved_channel(int legacy_channel) +{ + if (legacy_unavailable & (BIT(legacy_channel))) + return -EBUSY; + legacy_reserved ^= BIT(legacy_channel); + return 0; +} +EXPORT_SYMBOL_GPL(pxad_toggle_reserved_channel); + +module_platform_driver(pxad_driver); + +MODULE_DESCRIPTION("Marvell PXA Peripheral DMA Driver"); +MODULE_AUTHOR("Robert Jarzmik <robert.jarzmik@free.fr>"); +MODULE_LICENSE("GPL v2"); diff --git a/kernel/drivers/dma/s3c24xx-dma.c b/kernel/drivers/dma/s3c24xx-dma.c index 01dcaf21b..17ccdfd28 100644 --- a/kernel/drivers/dma/s3c24xx-dma.c +++ b/kernel/drivers/dma/s3c24xx-dma.c @@ -1168,7 +1168,7 @@ static struct soc_data soc_s3c2443 = { .has_clocks = true, }; -static struct platform_device_id s3c24xx_dma_driver_ids[] = { +static const struct platform_device_id s3c24xx_dma_driver_ids[] = { { .name = "s3c2410-dma", .driver_data = (kernel_ulong_t)&soc_s3c2410, diff --git a/kernel/drivers/dma/sh/Kconfig b/kernel/drivers/dma/sh/Kconfig index 0f371524a..9fda65af8 100644 --- a/kernel/drivers/dma/sh/Kconfig +++ b/kernel/drivers/dma/sh/Kconfig @@ -39,18 +39,6 @@ config SH_DMAE_R8A73A4 endif -config SUDMAC - tristate "Renesas SUDMAC support" - depends on SH_DMAE_BASE - help - Enable support for the Renesas SUDMAC controllers. - -config RCAR_HPB_DMAE - tristate "Renesas R-Car HPB DMAC support" - depends on SH_DMAE_BASE - help - Enable support for the Renesas R-Car series DMA controllers. - config RCAR_DMAC tristate "Renesas R-Car Gen2 DMA Controller" depends on ARCH_SHMOBILE || COMPILE_TEST @@ -59,6 +47,12 @@ config RCAR_DMAC This driver supports the general purpose DMA controller found in the Renesas R-Car second generation SoCs. +config RCAR_HPB_DMAE + tristate "Renesas R-Car HPB DMAC support" + depends on SH_DMAE_BASE + help + Enable support for the Renesas R-Car series DMA controllers. + config RENESAS_USB_DMAC tristate "Renesas USB-DMA Controller" depends on ARCH_SHMOBILE || COMPILE_TEST @@ -67,3 +61,9 @@ config RENESAS_USB_DMAC help This driver supports the USB-DMA controller found in the Renesas SoCs. + +config SUDMAC + tristate "Renesas SUDMAC support" + depends on SH_DMAE_BASE + help + Enable support for the Renesas SUDMAC controllers. diff --git a/kernel/drivers/dma/sh/Makefile b/kernel/drivers/dma/sh/Makefile index b8a598066..0133e4658 100644 --- a/kernel/drivers/dma/sh/Makefile +++ b/kernel/drivers/dma/sh/Makefile @@ -13,7 +13,7 @@ shdma-$(CONFIG_SH_DMAE_R8A73A4) += shdma-r8a73a4.o shdma-objs := $(shdma-y) obj-$(CONFIG_SH_DMAE) += shdma.o -obj-$(CONFIG_SUDMAC) += sudmac.o -obj-$(CONFIG_RCAR_HPB_DMAE) += rcar-hpbdma.o obj-$(CONFIG_RCAR_DMAC) += rcar-dmac.o +obj-$(CONFIG_RCAR_HPB_DMAE) += rcar-hpbdma.o obj-$(CONFIG_RENESAS_USB_DMAC) += usb-dmac.o +obj-$(CONFIG_SUDMAC) += sudmac.o diff --git a/kernel/drivers/dma/sh/rcar-dmac.c b/kernel/drivers/dma/sh/rcar-dmac.c index a18d16cc4..7820d07e7 100644 --- a/kernel/drivers/dma/sh/rcar-dmac.c +++ b/kernel/drivers/dma/sh/rcar-dmac.c @@ -183,7 +183,7 @@ struct rcar_dmac { unsigned int n_channels; struct rcar_dmac_chan *channels; - unsigned long modules[256 / BITS_PER_LONG]; + DECLARE_BITMAP(modules, 256); }; #define to_rcar_dmac(d) container_of(d, struct rcar_dmac, engine) @@ -465,6 +465,7 @@ static dma_cookie_t rcar_dmac_tx_submit(struct dma_async_tx_descriptor *tx) static int rcar_dmac_desc_alloc(struct rcar_dmac_chan *chan, gfp_t gfp) { struct rcar_dmac_desc_page *page; + unsigned long flags; LIST_HEAD(list); unsigned int i; @@ -482,10 +483,10 @@ static int rcar_dmac_desc_alloc(struct rcar_dmac_chan *chan, gfp_t gfp) list_add_tail(&desc->node, &list); } - spin_lock_irq(&chan->lock); + spin_lock_irqsave(&chan->lock, flags); list_splice_tail(&list, &chan->desc.free); list_add_tail(&page->node, &chan->desc.pages); - spin_unlock_irq(&chan->lock); + spin_unlock_irqrestore(&chan->lock, flags); return 0; } @@ -516,6 +517,7 @@ static void rcar_dmac_desc_put(struct rcar_dmac_chan *chan, static void rcar_dmac_desc_recycle_acked(struct rcar_dmac_chan *chan) { struct rcar_dmac_desc *desc, *_desc; + unsigned long flags; LIST_HEAD(list); /* @@ -524,9 +526,9 @@ static void rcar_dmac_desc_recycle_acked(struct rcar_dmac_chan *chan) * list_for_each_entry_safe, isn't safe if we release the channel lock * around the rcar_dmac_desc_put() call. */ - spin_lock_irq(&chan->lock); + spin_lock_irqsave(&chan->lock, flags); list_splice_init(&chan->desc.wait, &list); - spin_unlock_irq(&chan->lock); + spin_unlock_irqrestore(&chan->lock, flags); list_for_each_entry_safe(desc, _desc, &list, node) { if (async_tx_test_ack(&desc->async_tx)) { @@ -539,9 +541,9 @@ static void rcar_dmac_desc_recycle_acked(struct rcar_dmac_chan *chan) return; /* Put the remaining descriptors back in the wait list. */ - spin_lock_irq(&chan->lock); + spin_lock_irqsave(&chan->lock, flags); list_splice(&list, &chan->desc.wait); - spin_unlock_irq(&chan->lock); + spin_unlock_irqrestore(&chan->lock, flags); } /* @@ -556,12 +558,13 @@ static void rcar_dmac_desc_recycle_acked(struct rcar_dmac_chan *chan) static struct rcar_dmac_desc *rcar_dmac_desc_get(struct rcar_dmac_chan *chan) { struct rcar_dmac_desc *desc; + unsigned long flags; int ret; /* Recycle acked descriptors before attempting allocation. */ rcar_dmac_desc_recycle_acked(chan); - spin_lock_irq(&chan->lock); + spin_lock_irqsave(&chan->lock, flags); while (list_empty(&chan->desc.free)) { /* @@ -570,17 +573,17 @@ static struct rcar_dmac_desc *rcar_dmac_desc_get(struct rcar_dmac_chan *chan) * allocated descriptors. If the allocation fails return an * error. */ - spin_unlock_irq(&chan->lock); + spin_unlock_irqrestore(&chan->lock, flags); ret = rcar_dmac_desc_alloc(chan, GFP_NOWAIT); if (ret < 0) return NULL; - spin_lock_irq(&chan->lock); + spin_lock_irqsave(&chan->lock, flags); } desc = list_first_entry(&chan->desc.free, struct rcar_dmac_desc, node); list_del(&desc->node); - spin_unlock_irq(&chan->lock); + spin_unlock_irqrestore(&chan->lock, flags); return desc; } @@ -593,6 +596,7 @@ static struct rcar_dmac_desc *rcar_dmac_desc_get(struct rcar_dmac_chan *chan) static int rcar_dmac_xfer_chunk_alloc(struct rcar_dmac_chan *chan, gfp_t gfp) { struct rcar_dmac_desc_page *page; + unsigned long flags; LIST_HEAD(list); unsigned int i; @@ -606,10 +610,10 @@ static int rcar_dmac_xfer_chunk_alloc(struct rcar_dmac_chan *chan, gfp_t gfp) list_add_tail(&chunk->node, &list); } - spin_lock_irq(&chan->lock); + spin_lock_irqsave(&chan->lock, flags); list_splice_tail(&list, &chan->desc.chunks_free); list_add_tail(&page->node, &chan->desc.pages); - spin_unlock_irq(&chan->lock); + spin_unlock_irqrestore(&chan->lock, flags); return 0; } @@ -627,9 +631,10 @@ static struct rcar_dmac_xfer_chunk * rcar_dmac_xfer_chunk_get(struct rcar_dmac_chan *chan) { struct rcar_dmac_xfer_chunk *chunk; + unsigned long flags; int ret; - spin_lock_irq(&chan->lock); + spin_lock_irqsave(&chan->lock, flags); while (list_empty(&chan->desc.chunks_free)) { /* @@ -638,18 +643,18 @@ rcar_dmac_xfer_chunk_get(struct rcar_dmac_chan *chan) * allocated descriptors. If the allocation fails return an * error. */ - spin_unlock_irq(&chan->lock); + spin_unlock_irqrestore(&chan->lock, flags); ret = rcar_dmac_xfer_chunk_alloc(chan, GFP_NOWAIT); if (ret < 0) return NULL; - spin_lock_irq(&chan->lock); + spin_lock_irqsave(&chan->lock, flags); } chunk = list_first_entry(&chan->desc.chunks_free, struct rcar_dmac_xfer_chunk, node); list_del(&chunk->node); - spin_unlock_irq(&chan->lock); + spin_unlock_irqrestore(&chan->lock, flags); return chunk; } diff --git a/kernel/drivers/dma/sh/shdma-r8a73a4.c b/kernel/drivers/dma/sh/shdma-r8a73a4.c index 4fb99970a..96ea3828c 100644 --- a/kernel/drivers/dma/sh/shdma-r8a73a4.c +++ b/kernel/drivers/dma/sh/shdma-r8a73a4.c @@ -11,7 +11,7 @@ #include "shdma-arm.h" -const unsigned int dma_ts_shift[] = SH_DMAE_TS_SHIFT; +static const unsigned int dma_ts_shift[] = SH_DMAE_TS_SHIFT; static const struct sh_dmae_slave_config dma_slaves[] = { { diff --git a/kernel/drivers/dma/sh/usb-dmac.c b/kernel/drivers/dma/sh/usb-dmac.c index ebd8a5f39..f1bcc2a16 100644 --- a/kernel/drivers/dma/sh/usb-dmac.c +++ b/kernel/drivers/dma/sh/usb-dmac.c @@ -679,8 +679,11 @@ static int usb_dmac_runtime_suspend(struct device *dev) struct usb_dmac *dmac = dev_get_drvdata(dev); int i; - for (i = 0; i < dmac->n_channels; ++i) + for (i = 0; i < dmac->n_channels; ++i) { + if (!dmac->channels[i].iomem) + break; usb_dmac_chan_halt(&dmac->channels[i]); + } return 0; } @@ -799,11 +802,10 @@ static int usb_dmac_probe(struct platform_device *pdev) ret = pm_runtime_get_sync(&pdev->dev); if (ret < 0) { dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret); - return ret; + goto error_pm; } ret = usb_dmac_init(dmac); - pm_runtime_put(&pdev->dev); if (ret) { dev_err(&pdev->dev, "failed to reset device\n"); @@ -851,10 +853,13 @@ static int usb_dmac_probe(struct platform_device *pdev) if (ret < 0) goto error; + pm_runtime_put(&pdev->dev); return 0; error: of_dma_controller_free(pdev->dev.of_node); + pm_runtime_put(&pdev->dev); +error_pm: pm_runtime_disable(&pdev->dev); return ret; } diff --git a/kernel/drivers/dma/sirf-dma.c b/kernel/drivers/dma/sirf-dma.c index a1afda43b..22ea2419e 100644 --- a/kernel/drivers/dma/sirf-dma.c +++ b/kernel/drivers/dma/sirf-dma.c @@ -23,8 +23,13 @@ #include "dmaengine.h" +#define SIRFSOC_DMA_VER_A7V1 1 +#define SIRFSOC_DMA_VER_A7V2 2 +#define SIRFSOC_DMA_VER_A6 4 + #define SIRFSOC_DMA_DESCRIPTORS 16 #define SIRFSOC_DMA_CHANNELS 16 +#define SIRFSOC_DMA_TABLE_NUM 256 #define SIRFSOC_DMA_CH_ADDR 0x00 #define SIRFSOC_DMA_CH_XLEN 0x04 @@ -35,15 +40,44 @@ #define SIRFSOC_DMA_CH_VALID 0x140 #define SIRFSOC_DMA_CH_INT 0x144 #define SIRFSOC_DMA_INT_EN 0x148 -#define SIRFSOC_DMA_INT_EN_CLR 0x14C +#define SIRFSOC_DMA_INT_EN_CLR 0x14C #define SIRFSOC_DMA_CH_LOOP_CTRL 0x150 -#define SIRFSOC_DMA_CH_LOOP_CTRL_CLR 0x15C +#define SIRFSOC_DMA_CH_LOOP_CTRL_CLR 0x154 +#define SIRFSOC_DMA_WIDTH_ATLAS7 0x10 +#define SIRFSOC_DMA_VALID_ATLAS7 0x14 +#define SIRFSOC_DMA_INT_ATLAS7 0x18 +#define SIRFSOC_DMA_INT_EN_ATLAS7 0x1c +#define SIRFSOC_DMA_LOOP_CTRL_ATLAS7 0x20 +#define SIRFSOC_DMA_CUR_DATA_ADDR 0x34 +#define SIRFSOC_DMA_MUL_ATLAS7 0x38 +#define SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7 0x158 +#define SIRFSOC_DMA_CH_LOOP_CTRL_CLR_ATLAS7 0x15C +#define SIRFSOC_DMA_IOBG_SCMD_EN 0x800 +#define SIRFSOC_DMA_EARLY_RESP_SET 0x818 +#define SIRFSOC_DMA_EARLY_RESP_CLR 0x81C #define SIRFSOC_DMA_MODE_CTRL_BIT 4 #define SIRFSOC_DMA_DIR_CTRL_BIT 5 +#define SIRFSOC_DMA_MODE_CTRL_BIT_ATLAS7 2 +#define SIRFSOC_DMA_CHAIN_CTRL_BIT_ATLAS7 3 +#define SIRFSOC_DMA_DIR_CTRL_BIT_ATLAS7 4 +#define SIRFSOC_DMA_TAB_NUM_ATLAS7 7 +#define SIRFSOC_DMA_CHAIN_INT_BIT_ATLAS7 5 +#define SIRFSOC_DMA_CHAIN_FLAG_SHIFT_ATLAS7 25 +#define SIRFSOC_DMA_CHAIN_ADDR_SHIFT 32 + +#define SIRFSOC_DMA_INT_FINI_INT_ATLAS7 BIT(0) +#define SIRFSOC_DMA_INT_CNT_INT_ATLAS7 BIT(1) +#define SIRFSOC_DMA_INT_PAU_INT_ATLAS7 BIT(2) +#define SIRFSOC_DMA_INT_LOOP_INT_ATLAS7 BIT(3) +#define SIRFSOC_DMA_INT_INV_INT_ATLAS7 BIT(4) +#define SIRFSOC_DMA_INT_END_INT_ATLAS7 BIT(5) +#define SIRFSOC_DMA_INT_ALL_ATLAS7 0x3F /* xlen and dma_width register is in 4 bytes boundary */ #define SIRFSOC_DMA_WORD_LEN 4 +#define SIRFSOC_DMA_XLEN_MAX_V1 0x800 +#define SIRFSOC_DMA_XLEN_MAX_V2 0x1000 struct sirfsoc_dma_desc { struct dma_async_tx_descriptor desc; @@ -56,7 +90,9 @@ struct sirfsoc_dma_desc { int width; /* DMA width */ int dir; bool cyclic; /* is loop DMA? */ + bool chain; /* is chain DMA? */ u32 addr; /* DMA buffer address */ + u64 chain_table[SIRFSOC_DMA_TABLE_NUM]; /* chain tbl */ }; struct sirfsoc_dma_chan { @@ -87,10 +123,25 @@ struct sirfsoc_dma { void __iomem *base; int irq; struct clk *clk; - bool is_marco; + int type; + void (*exec_desc)(struct sirfsoc_dma_desc *sdesc, + int cid, int burst_mode, void __iomem *base); struct sirfsoc_dma_regs regs_save; }; +struct sirfsoc_dmadata { + void (*exec)(struct sirfsoc_dma_desc *sdesc, + int cid, int burst_mode, void __iomem *base); + int type; +}; + +enum sirfsoc_dma_chain_flag { + SIRFSOC_DMA_CHAIN_NORMAL = 0x01, + SIRFSOC_DMA_CHAIN_PAUSE = 0x02, + SIRFSOC_DMA_CHAIN_LOOP = 0x03, + SIRFSOC_DMA_CHAIN_END = 0x04 +}; + #define DRV_NAME "sirfsoc_dma" static int sirfsoc_dma_runtime_suspend(struct device *dev); @@ -109,48 +160,105 @@ static inline struct sirfsoc_dma *dma_chan_to_sirfsoc_dma(struct dma_chan *c) return container_of(schan, struct sirfsoc_dma, channels[c->chan_id]); } +static void sirfsoc_dma_execute_hw_a7v2(struct sirfsoc_dma_desc *sdesc, + int cid, int burst_mode, void __iomem *base) +{ + if (sdesc->chain) { + /* DMA v2 HW chain mode */ + writel_relaxed((sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT_ATLAS7) | + (sdesc->chain << + SIRFSOC_DMA_CHAIN_CTRL_BIT_ATLAS7) | + (0x8 << SIRFSOC_DMA_TAB_NUM_ATLAS7) | 0x3, + base + SIRFSOC_DMA_CH_CTRL); + } else { + /* DMA v2 legacy mode */ + writel_relaxed(sdesc->xlen, base + SIRFSOC_DMA_CH_XLEN); + writel_relaxed(sdesc->ylen, base + SIRFSOC_DMA_CH_YLEN); + writel_relaxed(sdesc->width, base + SIRFSOC_DMA_WIDTH_ATLAS7); + writel_relaxed((sdesc->width*((sdesc->ylen+1)>>1)), + base + SIRFSOC_DMA_MUL_ATLAS7); + writel_relaxed((sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT_ATLAS7) | + (sdesc->chain << + SIRFSOC_DMA_CHAIN_CTRL_BIT_ATLAS7) | + 0x3, base + SIRFSOC_DMA_CH_CTRL); + } + writel_relaxed(sdesc->chain ? SIRFSOC_DMA_INT_END_INT_ATLAS7 : + (SIRFSOC_DMA_INT_FINI_INT_ATLAS7 | + SIRFSOC_DMA_INT_LOOP_INT_ATLAS7), + base + SIRFSOC_DMA_INT_EN_ATLAS7); + writel(sdesc->addr, base + SIRFSOC_DMA_CH_ADDR); + if (sdesc->cyclic) + writel(0x10001, base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7); +} + +static void sirfsoc_dma_execute_hw_a7v1(struct sirfsoc_dma_desc *sdesc, + int cid, int burst_mode, void __iomem *base) +{ + writel_relaxed(1, base + SIRFSOC_DMA_IOBG_SCMD_EN); + writel_relaxed((1 << cid), base + SIRFSOC_DMA_EARLY_RESP_SET); + writel_relaxed(sdesc->width, base + SIRFSOC_DMA_WIDTH_0 + cid * 4); + writel_relaxed(cid | (burst_mode << SIRFSOC_DMA_MODE_CTRL_BIT) | + (sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT), + base + cid * 0x10 + SIRFSOC_DMA_CH_CTRL); + writel_relaxed(sdesc->xlen, base + cid * 0x10 + SIRFSOC_DMA_CH_XLEN); + writel_relaxed(sdesc->ylen, base + cid * 0x10 + SIRFSOC_DMA_CH_YLEN); + writel_relaxed(readl_relaxed(base + SIRFSOC_DMA_INT_EN) | + (1 << cid), base + SIRFSOC_DMA_INT_EN); + writel(sdesc->addr >> 2, base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR); + if (sdesc->cyclic) { + writel((1 << cid) | 1 << (cid + 16) | + readl_relaxed(base + SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7), + base + SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7); + } + +} + +static void sirfsoc_dma_execute_hw_a6(struct sirfsoc_dma_desc *sdesc, + int cid, int burst_mode, void __iomem *base) +{ + writel_relaxed(sdesc->width, base + SIRFSOC_DMA_WIDTH_0 + cid * 4); + writel_relaxed(cid | (burst_mode << SIRFSOC_DMA_MODE_CTRL_BIT) | + (sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT), + base + cid * 0x10 + SIRFSOC_DMA_CH_CTRL); + writel_relaxed(sdesc->xlen, base + cid * 0x10 + SIRFSOC_DMA_CH_XLEN); + writel_relaxed(sdesc->ylen, base + cid * 0x10 + SIRFSOC_DMA_CH_YLEN); + writel_relaxed(readl_relaxed(base + SIRFSOC_DMA_INT_EN) | + (1 << cid), base + SIRFSOC_DMA_INT_EN); + writel(sdesc->addr >> 2, base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR); + if (sdesc->cyclic) { + writel((1 << cid) | 1 << (cid + 16) | + readl_relaxed(base + SIRFSOC_DMA_CH_LOOP_CTRL), + base + SIRFSOC_DMA_CH_LOOP_CTRL); + } + +} + /* Execute all queued DMA descriptors */ static void sirfsoc_dma_execute(struct sirfsoc_dma_chan *schan) { struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(&schan->chan); int cid = schan->chan.chan_id; struct sirfsoc_dma_desc *sdesc = NULL; + void __iomem *base; /* * lock has been held by functions calling this, so we don't hold * lock again */ - + base = sdma->base; sdesc = list_first_entry(&schan->queued, struct sirfsoc_dma_desc, - node); + node); /* Move the first queued descriptor to active list */ list_move_tail(&sdesc->node, &schan->active); - /* Start the DMA transfer */ - writel_relaxed(sdesc->width, sdma->base + SIRFSOC_DMA_WIDTH_0 + - cid * 4); - writel_relaxed(cid | (schan->mode << SIRFSOC_DMA_MODE_CTRL_BIT) | - (sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT), - sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_CTRL); - writel_relaxed(sdesc->xlen, sdma->base + cid * 0x10 + - SIRFSOC_DMA_CH_XLEN); - writel_relaxed(sdesc->ylen, sdma->base + cid * 0x10 + - SIRFSOC_DMA_CH_YLEN); - writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) | - (1 << cid), sdma->base + SIRFSOC_DMA_INT_EN); + if (sdma->type == SIRFSOC_DMA_VER_A7V2) + cid = 0; - /* - * writel has an implict memory write barrier to make sure data is - * flushed into memory before starting DMA - */ - writel(sdesc->addr >> 2, sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR); + /* Start the DMA transfer */ + sdma->exec_desc(sdesc, cid, schan->mode, base); - if (sdesc->cyclic) { - writel((1 << cid) | 1 << (cid + 16) | - readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL), - sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL); + if (sdesc->cyclic) schan->happened_cyclic = schan->completed_cyclic = 0; - } } /* Interrupt handler */ @@ -160,27 +268,65 @@ static irqreturn_t sirfsoc_dma_irq(int irq, void *data) struct sirfsoc_dma_chan *schan; struct sirfsoc_dma_desc *sdesc = NULL; u32 is; + bool chain; int ch; + void __iomem *reg; + + switch (sdma->type) { + case SIRFSOC_DMA_VER_A6: + case SIRFSOC_DMA_VER_A7V1: + is = readl(sdma->base + SIRFSOC_DMA_CH_INT); + reg = sdma->base + SIRFSOC_DMA_CH_INT; + while ((ch = fls(is) - 1) >= 0) { + is &= ~(1 << ch); + writel_relaxed(1 << ch, reg); + schan = &sdma->channels[ch]; + spin_lock(&schan->lock); + sdesc = list_first_entry(&schan->active, + struct sirfsoc_dma_desc, node); + if (!sdesc->cyclic) { + /* Execute queued descriptors */ + list_splice_tail_init(&schan->active, + &schan->completed); + dma_cookie_complete(&sdesc->desc); + if (!list_empty(&schan->queued)) + sirfsoc_dma_execute(schan); + } else + schan->happened_cyclic++; + spin_unlock(&schan->lock); + } + break; - is = readl(sdma->base + SIRFSOC_DMA_CH_INT); - while ((ch = fls(is) - 1) >= 0) { - is &= ~(1 << ch); - writel_relaxed(1 << ch, sdma->base + SIRFSOC_DMA_CH_INT); - schan = &sdma->channels[ch]; + case SIRFSOC_DMA_VER_A7V2: + is = readl(sdma->base + SIRFSOC_DMA_INT_ATLAS7); + reg = sdma->base + SIRFSOC_DMA_INT_ATLAS7; + writel_relaxed(SIRFSOC_DMA_INT_ALL_ATLAS7, reg); + schan = &sdma->channels[0]; spin_lock(&schan->lock); - - sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc, - node); + sdesc = list_first_entry(&schan->active, + struct sirfsoc_dma_desc, node); if (!sdesc->cyclic) { - /* Execute queued descriptors */ - list_splice_tail_init(&schan->active, &schan->completed); - if (!list_empty(&schan->queued)) - sirfsoc_dma_execute(schan); - } else + chain = sdesc->chain; + if ((chain && (is & SIRFSOC_DMA_INT_END_INT_ATLAS7)) || + (!chain && + (is & SIRFSOC_DMA_INT_FINI_INT_ATLAS7))) { + /* Execute queued descriptors */ + list_splice_tail_init(&schan->active, + &schan->completed); + dma_cookie_complete(&sdesc->desc); + if (!list_empty(&schan->queued)) + sirfsoc_dma_execute(schan); + } + } else if (sdesc->cyclic && (is & + SIRFSOC_DMA_INT_LOOP_INT_ATLAS7)) schan->happened_cyclic++; spin_unlock(&schan->lock); + break; + + default: + break; } /* Schedule tasklet */ @@ -227,16 +373,15 @@ static void sirfsoc_dma_process_completed(struct sirfsoc_dma *sdma) schan->chan.completed_cookie = last_cookie; spin_unlock_irqrestore(&schan->lock, flags); } else { - /* for cyclic channel, desc is always in active list */ - sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc, - node); - - if (!sdesc || (sdesc && !sdesc->cyclic)) { - /* without active cyclic DMA */ + if (list_empty(&schan->active)) { spin_unlock_irqrestore(&schan->lock, flags); continue; } + /* for cyclic channel, desc is always in active list */ + sdesc = list_first_entry(&schan->active, + struct sirfsoc_dma_desc, node); + /* cyclic DMA */ happened_cyclic = schan->happened_cyclic; spin_unlock_irqrestore(&schan->lock, flags); @@ -307,20 +452,35 @@ static int sirfsoc_dma_terminate_all(struct dma_chan *chan) spin_lock_irqsave(&schan->lock, flags); - if (!sdma->is_marco) { - writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) & - ~(1 << cid), sdma->base + SIRFSOC_DMA_INT_EN); - writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL) - & ~((1 << cid) | 1 << (cid + 16)), - sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL); - } else { + switch (sdma->type) { + case SIRFSOC_DMA_VER_A7V1: writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_INT_EN_CLR); + writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_INT); writel_relaxed((1 << cid) | 1 << (cid + 16), - sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL_CLR); + sdma->base + + SIRFSOC_DMA_CH_LOOP_CTRL_CLR_ATLAS7); + writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_VALID); + break; + case SIRFSOC_DMA_VER_A7V2: + writel_relaxed(0, sdma->base + SIRFSOC_DMA_INT_EN_ATLAS7); + writel_relaxed(SIRFSOC_DMA_INT_ALL_ATLAS7, + sdma->base + SIRFSOC_DMA_INT_ATLAS7); + writel_relaxed(0, sdma->base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7); + writel_relaxed(0, sdma->base + SIRFSOC_DMA_VALID_ATLAS7); + break; + case SIRFSOC_DMA_VER_A6: + writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) & + ~(1 << cid), sdma->base + SIRFSOC_DMA_INT_EN); + writel_relaxed(readl_relaxed(sdma->base + + SIRFSOC_DMA_CH_LOOP_CTRL) & + ~((1 << cid) | 1 << (cid + 16)), + sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL); + writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_VALID); + break; + default: + break; } - writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_VALID); - list_splice_tail_init(&schan->active, &schan->free); list_splice_tail_init(&schan->queued, &schan->free); @@ -338,13 +498,25 @@ static int sirfsoc_dma_pause_chan(struct dma_chan *chan) spin_lock_irqsave(&schan->lock, flags); - if (!sdma->is_marco) - writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL) - & ~((1 << cid) | 1 << (cid + 16)), - sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL); - else + switch (sdma->type) { + case SIRFSOC_DMA_VER_A7V1: writel_relaxed((1 << cid) | 1 << (cid + 16), - sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL_CLR); + sdma->base + + SIRFSOC_DMA_CH_LOOP_CTRL_CLR_ATLAS7); + break; + case SIRFSOC_DMA_VER_A7V2: + writel_relaxed(0, sdma->base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7); + break; + case SIRFSOC_DMA_VER_A6: + writel_relaxed(readl_relaxed(sdma->base + + SIRFSOC_DMA_CH_LOOP_CTRL) & + ~((1 << cid) | 1 << (cid + 16)), + sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL); + break; + + default: + break; + } spin_unlock_irqrestore(&schan->lock, flags); @@ -359,14 +531,25 @@ static int sirfsoc_dma_resume_chan(struct dma_chan *chan) unsigned long flags; spin_lock_irqsave(&schan->lock, flags); - - if (!sdma->is_marco) - writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL) - | ((1 << cid) | 1 << (cid + 16)), - sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL); - else + switch (sdma->type) { + case SIRFSOC_DMA_VER_A7V1: writel_relaxed((1 << cid) | 1 << (cid + 16), - sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL); + sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7); + break; + case SIRFSOC_DMA_VER_A7V2: + writel_relaxed(0x10001, + sdma->base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7); + break; + case SIRFSOC_DMA_VER_A6: + writel_relaxed(readl_relaxed(sdma->base + + SIRFSOC_DMA_CH_LOOP_CTRL) | + ((1 << cid) | 1 << (cid + 16)), + sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL); + break; + + default: + break; + } spin_unlock_irqrestore(&schan->lock, flags); @@ -473,14 +656,31 @@ sirfsoc_dma_tx_status(struct dma_chan *chan, dma_cookie_t cookie, spin_lock_irqsave(&schan->lock, flags); - sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc, - node); - dma_request_bytes = (sdesc->xlen + 1) * (sdesc->ylen + 1) * - (sdesc->width * SIRFSOC_DMA_WORD_LEN); + if (list_empty(&schan->active)) { + ret = dma_cookie_status(chan, cookie, txstate); + dma_set_residue(txstate, 0); + spin_unlock_irqrestore(&schan->lock, flags); + return ret; + } + sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc, node); + if (sdesc->cyclic) + dma_request_bytes = (sdesc->xlen + 1) * (sdesc->ylen + 1) * + (sdesc->width * SIRFSOC_DMA_WORD_LEN); + else + dma_request_bytes = sdesc->xlen * SIRFSOC_DMA_WORD_LEN; ret = dma_cookie_status(chan, cookie, txstate); - dma_pos = readl_relaxed(sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR) - << 2; + + if (sdma->type == SIRFSOC_DMA_VER_A7V2) + cid = 0; + + if (sdma->type == SIRFSOC_DMA_VER_A7V2) { + dma_pos = readl_relaxed(sdma->base + SIRFSOC_DMA_CUR_DATA_ADDR); + } else { + dma_pos = readl_relaxed( + sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR) << 2; + } + residue = dma_request_bytes - (dma_pos - sdesc->addr); dma_set_residue(txstate, residue); @@ -647,6 +847,7 @@ static int sirfsoc_dma_probe(struct platform_device *op) struct dma_device *dma; struct sirfsoc_dma *sdma; struct sirfsoc_dma_chan *schan; + struct sirfsoc_dmadata *data; struct resource res; ulong regs_start, regs_size; u32 id; @@ -657,9 +858,11 @@ static int sirfsoc_dma_probe(struct platform_device *op) dev_err(dev, "Memory exhausted!\n"); return -ENOMEM; } - - if (of_device_is_compatible(dn, "sirf,marco-dmac")) - sdma->is_marco = true; + data = (struct sirfsoc_dmadata *) + (of_match_device(op->dev.driver->of_match_table, + &op->dev)->data); + sdma->exec_desc = data->exec; + sdma->type = data->type; if (of_property_read_u32(dn, "cell-index", &id)) { dev_err(dev, "Fail to get DMAC index\n"); @@ -816,6 +1019,8 @@ static int sirfsoc_dma_pm_suspend(struct device *dev) struct sirfsoc_dma_chan *schan; int ch; int ret; + int count; + u32 int_offset; /* * if we were runtime-suspended before, resume to enable clock @@ -827,11 +1032,19 @@ static int sirfsoc_dma_pm_suspend(struct device *dev) return ret; } + if (sdma->type == SIRFSOC_DMA_VER_A7V2) { + count = 1; + int_offset = SIRFSOC_DMA_INT_EN_ATLAS7; + } else { + count = SIRFSOC_DMA_CHANNELS; + int_offset = SIRFSOC_DMA_INT_EN; + } + /* * DMA controller will lose all registers while suspending * so we need to save registers for active channels */ - for (ch = 0; ch < SIRFSOC_DMA_CHANNELS; ch++) { + for (ch = 0; ch < count; ch++) { schan = &sdma->channels[ch]; if (list_empty(&schan->active)) continue; @@ -841,7 +1054,7 @@ static int sirfsoc_dma_pm_suspend(struct device *dev) save->ctrl[ch] = readl_relaxed(sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_CTRL); } - save->interrupt_en = readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN); + save->interrupt_en = readl_relaxed(sdma->base + int_offset); /* Disable clock */ sirfsoc_dma_runtime_suspend(dev); @@ -857,14 +1070,27 @@ static int sirfsoc_dma_pm_resume(struct device *dev) struct sirfsoc_dma_chan *schan; int ch; int ret; + int count; + u32 int_offset; + u32 width_offset; /* Enable clock before accessing register */ ret = sirfsoc_dma_runtime_resume(dev); if (ret < 0) return ret; - writel_relaxed(save->interrupt_en, sdma->base + SIRFSOC_DMA_INT_EN); - for (ch = 0; ch < SIRFSOC_DMA_CHANNELS; ch++) { + if (sdma->type == SIRFSOC_DMA_VER_A7V2) { + count = 1; + int_offset = SIRFSOC_DMA_INT_EN_ATLAS7; + width_offset = SIRFSOC_DMA_WIDTH_ATLAS7; + } else { + count = SIRFSOC_DMA_CHANNELS; + int_offset = SIRFSOC_DMA_INT_EN; + width_offset = SIRFSOC_DMA_WIDTH_0; + } + + writel_relaxed(save->interrupt_en, sdma->base + int_offset); + for (ch = 0; ch < count; ch++) { schan = &sdma->channels[ch]; if (list_empty(&schan->active)) continue; @@ -872,15 +1098,21 @@ static int sirfsoc_dma_pm_resume(struct device *dev) struct sirfsoc_dma_desc, node); writel_relaxed(sdesc->width, - sdma->base + SIRFSOC_DMA_WIDTH_0 + ch * 4); + sdma->base + width_offset + ch * 4); writel_relaxed(sdesc->xlen, sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_XLEN); writel_relaxed(sdesc->ylen, sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_YLEN); writel_relaxed(save->ctrl[ch], sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_CTRL); - writel_relaxed(sdesc->addr >> 2, - sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_ADDR); + if (sdma->type == SIRFSOC_DMA_VER_A7V2) { + writel_relaxed(sdesc->addr, + sdma->base + SIRFSOC_DMA_CH_ADDR); + } else { + writel_relaxed(sdesc->addr >> 2, + sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_ADDR); + + } } /* if we were runtime-suspended before, suspend again */ @@ -896,11 +1128,28 @@ static const struct dev_pm_ops sirfsoc_dma_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(sirfsoc_dma_pm_suspend, sirfsoc_dma_pm_resume) }; +struct sirfsoc_dmadata sirfsoc_dmadata_a6 = { + .exec = sirfsoc_dma_execute_hw_a6, + .type = SIRFSOC_DMA_VER_A6, +}; + +struct sirfsoc_dmadata sirfsoc_dmadata_a7v1 = { + .exec = sirfsoc_dma_execute_hw_a7v1, + .type = SIRFSOC_DMA_VER_A7V1, +}; + +struct sirfsoc_dmadata sirfsoc_dmadata_a7v2 = { + .exec = sirfsoc_dma_execute_hw_a7v2, + .type = SIRFSOC_DMA_VER_A7V2, +}; + static const struct of_device_id sirfsoc_dma_match[] = { - { .compatible = "sirf,prima2-dmac", }, - { .compatible = "sirf,marco-dmac", }, + { .compatible = "sirf,prima2-dmac", .data = &sirfsoc_dmadata_a6,}, + { .compatible = "sirf,atlas7-dmac", .data = &sirfsoc_dmadata_a7v1,}, + { .compatible = "sirf,atlas7-dmac-v2", .data = &sirfsoc_dmadata_a7v2,}, {}, }; +MODULE_DEVICE_TABLE(of, sirfsoc_dma_match); static struct platform_driver sirfsoc_dma_driver = { .probe = sirfsoc_dma_probe, @@ -925,7 +1174,7 @@ static void __exit sirfsoc_dma_exit(void) subsys_initcall(sirfsoc_dma_init); module_exit(sirfsoc_dma_exit); -MODULE_AUTHOR("Rongjun Ying <rongjun.ying@csr.com>, " - "Barry Song <baohua.song@csr.com>"); +MODULE_AUTHOR("Rongjun Ying <rongjun.ying@csr.com>"); +MODULE_AUTHOR("Barry Song <baohua.song@csr.com>"); MODULE_DESCRIPTION("SIRFSOC DMA control driver"); MODULE_LICENSE("GPL v2"); diff --git a/kernel/drivers/dma/ste_dma40.c b/kernel/drivers/dma/ste_dma40.c index 3c10f034d..dd3e7ba27 100644 --- a/kernel/drivers/dma/ste_dma40.c +++ b/kernel/drivers/dma/ste_dma40.c @@ -2853,7 +2853,7 @@ static void d40_ops_init(struct d40_base *base, struct dma_device *dev) * This controller can only access address at even * 32bit boundaries, i.e. 2^2 */ - dev->copy_align = 2; + dev->copy_align = DMAENGINE_ALIGN_4_BYTES; } if (dma_has_cap(DMA_SG, dev->cap_mask)) @@ -2907,7 +2907,7 @@ static int __init d40_dmaengine_init(struct d40_base *base, if (err) { d40_err(base->dev, - "Failed to regsiter memcpy only channels\n"); + "Failed to register memcpy only channels\n"); goto failure2; } diff --git a/kernel/drivers/dma/sun4i-dma.c b/kernel/drivers/dma/sun4i-dma.c new file mode 100644 index 000000000..1661d5182 --- /dev/null +++ b/kernel/drivers/dma/sun4i-dma.c @@ -0,0 +1,1288 @@ +/* + * Copyright (C) 2014 Emilio López + * Emilio López <emilio@elopez.com.ar> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/bitmap.h> +#include <linux/bitops.h> +#include <linux/clk.h> +#include <linux/dmaengine.h> +#include <linux/dmapool.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/of_dma.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/spinlock.h> + +#include "virt-dma.h" + +/** Common macros to normal and dedicated DMA registers **/ + +#define SUN4I_DMA_CFG_LOADING BIT(31) +#define SUN4I_DMA_CFG_DST_DATA_WIDTH(width) ((width) << 25) +#define SUN4I_DMA_CFG_DST_BURST_LENGTH(len) ((len) << 23) +#define SUN4I_DMA_CFG_DST_ADDR_MODE(mode) ((mode) << 21) +#define SUN4I_DMA_CFG_DST_DRQ_TYPE(type) ((type) << 16) +#define SUN4I_DMA_CFG_SRC_DATA_WIDTH(width) ((width) << 9) +#define SUN4I_DMA_CFG_SRC_BURST_LENGTH(len) ((len) << 7) +#define SUN4I_DMA_CFG_SRC_ADDR_MODE(mode) ((mode) << 5) +#define SUN4I_DMA_CFG_SRC_DRQ_TYPE(type) (type) + +/** Normal DMA register values **/ + +/* Normal DMA source/destination data request type values */ +#define SUN4I_NDMA_DRQ_TYPE_SDRAM 0x16 +#define SUN4I_NDMA_DRQ_TYPE_LIMIT (0x1F + 1) + +/** Normal DMA register layout **/ + +/* Dedicated DMA source/destination address mode values */ +#define SUN4I_NDMA_ADDR_MODE_LINEAR 0 +#define SUN4I_NDMA_ADDR_MODE_IO 1 + +/* Normal DMA configuration register layout */ +#define SUN4I_NDMA_CFG_CONT_MODE BIT(30) +#define SUN4I_NDMA_CFG_WAIT_STATE(n) ((n) << 27) +#define SUN4I_NDMA_CFG_DST_NON_SECURE BIT(22) +#define SUN4I_NDMA_CFG_BYTE_COUNT_MODE_REMAIN BIT(15) +#define SUN4I_NDMA_CFG_SRC_NON_SECURE BIT(6) + +/** Dedicated DMA register values **/ + +/* Dedicated DMA source/destination address mode values */ +#define SUN4I_DDMA_ADDR_MODE_LINEAR 0 +#define SUN4I_DDMA_ADDR_MODE_IO 1 +#define SUN4I_DDMA_ADDR_MODE_HORIZONTAL_PAGE 2 +#define SUN4I_DDMA_ADDR_MODE_VERTICAL_PAGE 3 + +/* Dedicated DMA source/destination data request type values */ +#define SUN4I_DDMA_DRQ_TYPE_SDRAM 0x1 +#define SUN4I_DDMA_DRQ_TYPE_LIMIT (0x1F + 1) + +/** Dedicated DMA register layout **/ + +/* Dedicated DMA configuration register layout */ +#define SUN4I_DDMA_CFG_BUSY BIT(30) +#define SUN4I_DDMA_CFG_CONT_MODE BIT(29) +#define SUN4I_DDMA_CFG_DST_NON_SECURE BIT(28) +#define SUN4I_DDMA_CFG_BYTE_COUNT_MODE_REMAIN BIT(15) +#define SUN4I_DDMA_CFG_SRC_NON_SECURE BIT(12) + +/* Dedicated DMA parameter register layout */ +#define SUN4I_DDMA_PARA_DST_DATA_BLK_SIZE(n) (((n) - 1) << 24) +#define SUN4I_DDMA_PARA_DST_WAIT_CYCLES(n) (((n) - 1) << 16) +#define SUN4I_DDMA_PARA_SRC_DATA_BLK_SIZE(n) (((n) - 1) << 8) +#define SUN4I_DDMA_PARA_SRC_WAIT_CYCLES(n) (((n) - 1) << 0) + +/** DMA register offsets **/ + +/* General register offsets */ +#define SUN4I_DMA_IRQ_ENABLE_REG 0x0 +#define SUN4I_DMA_IRQ_PENDING_STATUS_REG 0x4 + +/* Normal DMA register offsets */ +#define SUN4I_NDMA_CHANNEL_REG_BASE(n) (0x100 + (n) * 0x20) +#define SUN4I_NDMA_CFG_REG 0x0 +#define SUN4I_NDMA_SRC_ADDR_REG 0x4 +#define SUN4I_NDMA_DST_ADDR_REG 0x8 +#define SUN4I_NDMA_BYTE_COUNT_REG 0xC + +/* Dedicated DMA register offsets */ +#define SUN4I_DDMA_CHANNEL_REG_BASE(n) (0x300 + (n) * 0x20) +#define SUN4I_DDMA_CFG_REG 0x0 +#define SUN4I_DDMA_SRC_ADDR_REG 0x4 +#define SUN4I_DDMA_DST_ADDR_REG 0x8 +#define SUN4I_DDMA_BYTE_COUNT_REG 0xC +#define SUN4I_DDMA_PARA_REG 0x18 + +/** DMA Driver **/ + +/* + * Normal DMA has 8 channels, and Dedicated DMA has another 8, so + * that's 16 channels. As for endpoints, there's 29 and 21 + * respectively. Given that the Normal DMA endpoints (other than + * SDRAM) can be used as tx/rx, we need 78 vchans in total + */ +#define SUN4I_NDMA_NR_MAX_CHANNELS 8 +#define SUN4I_DDMA_NR_MAX_CHANNELS 8 +#define SUN4I_DMA_NR_MAX_CHANNELS \ + (SUN4I_NDMA_NR_MAX_CHANNELS + SUN4I_DDMA_NR_MAX_CHANNELS) +#define SUN4I_NDMA_NR_MAX_VCHANS (29 * 2 - 1) +#define SUN4I_DDMA_NR_MAX_VCHANS 21 +#define SUN4I_DMA_NR_MAX_VCHANS \ + (SUN4I_NDMA_NR_MAX_VCHANS + SUN4I_DDMA_NR_MAX_VCHANS) + +/* This set of SUN4I_DDMA timing parameters were found experimentally while + * working with the SPI driver and seem to make it behave correctly */ +#define SUN4I_DDMA_MAGIC_SPI_PARAMETERS \ + (SUN4I_DDMA_PARA_DST_DATA_BLK_SIZE(1) | \ + SUN4I_DDMA_PARA_SRC_DATA_BLK_SIZE(1) | \ + SUN4I_DDMA_PARA_DST_WAIT_CYCLES(2) | \ + SUN4I_DDMA_PARA_SRC_WAIT_CYCLES(2)) + +struct sun4i_dma_pchan { + /* Register base of channel */ + void __iomem *base; + /* vchan currently being serviced */ + struct sun4i_dma_vchan *vchan; + /* Is this a dedicated pchan? */ + int is_dedicated; +}; + +struct sun4i_dma_vchan { + struct virt_dma_chan vc; + struct dma_slave_config cfg; + struct sun4i_dma_pchan *pchan; + struct sun4i_dma_promise *processing; + struct sun4i_dma_contract *contract; + u8 endpoint; + int is_dedicated; +}; + +struct sun4i_dma_promise { + u32 cfg; + u32 para; + dma_addr_t src; + dma_addr_t dst; + size_t len; + struct list_head list; +}; + +/* A contract is a set of promises */ +struct sun4i_dma_contract { + struct virt_dma_desc vd; + struct list_head demands; + struct list_head completed_demands; + int is_cyclic; +}; + +struct sun4i_dma_dev { + DECLARE_BITMAP(pchans_used, SUN4I_DMA_NR_MAX_CHANNELS); + struct dma_device slave; + struct sun4i_dma_pchan *pchans; + struct sun4i_dma_vchan *vchans; + void __iomem *base; + struct clk *clk; + int irq; + spinlock_t lock; +}; + +static struct sun4i_dma_dev *to_sun4i_dma_dev(struct dma_device *dev) +{ + return container_of(dev, struct sun4i_dma_dev, slave); +} + +static struct sun4i_dma_vchan *to_sun4i_dma_vchan(struct dma_chan *chan) +{ + return container_of(chan, struct sun4i_dma_vchan, vc.chan); +} + +static struct sun4i_dma_contract *to_sun4i_dma_contract(struct virt_dma_desc *vd) +{ + return container_of(vd, struct sun4i_dma_contract, vd); +} + +static struct device *chan2dev(struct dma_chan *chan) +{ + return &chan->dev->device; +} + +static int convert_burst(u32 maxburst) +{ + if (maxburst > 8) + return -EINVAL; + + /* 1 -> 0, 4 -> 1, 8 -> 2 */ + return (maxburst >> 2); +} + +static int convert_buswidth(enum dma_slave_buswidth addr_width) +{ + if (addr_width > DMA_SLAVE_BUSWIDTH_4_BYTES) + return -EINVAL; + + /* 8 (1 byte) -> 0, 16 (2 bytes) -> 1, 32 (4 bytes) -> 2 */ + return (addr_width >> 1); +} + +static void sun4i_dma_free_chan_resources(struct dma_chan *chan) +{ + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + + vchan_free_chan_resources(&vchan->vc); +} + +static struct sun4i_dma_pchan *find_and_use_pchan(struct sun4i_dma_dev *priv, + struct sun4i_dma_vchan *vchan) +{ + struct sun4i_dma_pchan *pchan = NULL, *pchans = priv->pchans; + unsigned long flags; + int i, max; + + /* + * pchans 0-SUN4I_NDMA_NR_MAX_CHANNELS are normal, and + * SUN4I_NDMA_NR_MAX_CHANNELS+ are dedicated ones + */ + if (vchan->is_dedicated) { + i = SUN4I_NDMA_NR_MAX_CHANNELS; + max = SUN4I_DMA_NR_MAX_CHANNELS; + } else { + i = 0; + max = SUN4I_NDMA_NR_MAX_CHANNELS; + } + + spin_lock_irqsave(&priv->lock, flags); + for_each_clear_bit_from(i, &priv->pchans_used, max) { + pchan = &pchans[i]; + pchan->vchan = vchan; + set_bit(i, priv->pchans_used); + break; + } + spin_unlock_irqrestore(&priv->lock, flags); + + return pchan; +} + +static void release_pchan(struct sun4i_dma_dev *priv, + struct sun4i_dma_pchan *pchan) +{ + unsigned long flags; + int nr = pchan - priv->pchans; + + spin_lock_irqsave(&priv->lock, flags); + + pchan->vchan = NULL; + clear_bit(nr, priv->pchans_used); + + spin_unlock_irqrestore(&priv->lock, flags); +} + +static void configure_pchan(struct sun4i_dma_pchan *pchan, + struct sun4i_dma_promise *d) +{ + /* + * Configure addresses and misc parameters depending on type + * SUN4I_DDMA has an extra field with timing parameters + */ + if (pchan->is_dedicated) { + writel_relaxed(d->src, pchan->base + SUN4I_DDMA_SRC_ADDR_REG); + writel_relaxed(d->dst, pchan->base + SUN4I_DDMA_DST_ADDR_REG); + writel_relaxed(d->len, pchan->base + SUN4I_DDMA_BYTE_COUNT_REG); + writel_relaxed(d->para, pchan->base + SUN4I_DDMA_PARA_REG); + writel_relaxed(d->cfg, pchan->base + SUN4I_DDMA_CFG_REG); + } else { + writel_relaxed(d->src, pchan->base + SUN4I_NDMA_SRC_ADDR_REG); + writel_relaxed(d->dst, pchan->base + SUN4I_NDMA_DST_ADDR_REG); + writel_relaxed(d->len, pchan->base + SUN4I_NDMA_BYTE_COUNT_REG); + writel_relaxed(d->cfg, pchan->base + SUN4I_NDMA_CFG_REG); + } +} + +static void set_pchan_interrupt(struct sun4i_dma_dev *priv, + struct sun4i_dma_pchan *pchan, + int half, int end) +{ + u32 reg; + int pchan_number = pchan - priv->pchans; + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + + reg = readl_relaxed(priv->base + SUN4I_DMA_IRQ_ENABLE_REG); + + if (half) + reg |= BIT(pchan_number * 2); + else + reg &= ~BIT(pchan_number * 2); + + if (end) + reg |= BIT(pchan_number * 2 + 1); + else + reg &= ~BIT(pchan_number * 2 + 1); + + writel_relaxed(reg, priv->base + SUN4I_DMA_IRQ_ENABLE_REG); + + spin_unlock_irqrestore(&priv->lock, flags); +} + +/** + * Execute pending operations on a vchan + * + * When given a vchan, this function will try to acquire a suitable + * pchan and, if successful, will configure it to fulfill a promise + * from the next pending contract. + * + * This function must be called with &vchan->vc.lock held. + */ +static int __execute_vchan_pending(struct sun4i_dma_dev *priv, + struct sun4i_dma_vchan *vchan) +{ + struct sun4i_dma_promise *promise = NULL; + struct sun4i_dma_contract *contract = NULL; + struct sun4i_dma_pchan *pchan; + struct virt_dma_desc *vd; + int ret; + + lockdep_assert_held(&vchan->vc.lock); + + /* We need a pchan to do anything, so secure one if available */ + pchan = find_and_use_pchan(priv, vchan); + if (!pchan) + return -EBUSY; + + /* + * Channel endpoints must not be repeated, so if this vchan + * has already submitted some work, we can't do anything else + */ + if (vchan->processing) { + dev_dbg(chan2dev(&vchan->vc.chan), + "processing something to this endpoint already\n"); + ret = -EBUSY; + goto release_pchan; + } + + do { + /* Figure out which contract we're working with today */ + vd = vchan_next_desc(&vchan->vc); + if (!vd) { + dev_dbg(chan2dev(&vchan->vc.chan), + "No pending contract found"); + ret = 0; + goto release_pchan; + } + + contract = to_sun4i_dma_contract(vd); + if (list_empty(&contract->demands)) { + /* The contract has been completed so mark it as such */ + list_del(&contract->vd.node); + vchan_cookie_complete(&contract->vd); + dev_dbg(chan2dev(&vchan->vc.chan), + "Empty contract found and marked complete"); + } + } while (list_empty(&contract->demands)); + + /* Now find out what we need to do */ + promise = list_first_entry(&contract->demands, + struct sun4i_dma_promise, list); + vchan->processing = promise; + + /* ... and make it reality */ + if (promise) { + vchan->contract = contract; + vchan->pchan = pchan; + set_pchan_interrupt(priv, pchan, contract->is_cyclic, 1); + configure_pchan(pchan, promise); + } + + return 0; + +release_pchan: + release_pchan(priv, pchan); + return ret; +} + +static int sanitize_config(struct dma_slave_config *sconfig, + enum dma_transfer_direction direction) +{ + switch (direction) { + case DMA_MEM_TO_DEV: + if ((sconfig->dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) || + !sconfig->dst_maxburst) + return -EINVAL; + + if (sconfig->src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) + sconfig->src_addr_width = sconfig->dst_addr_width; + + if (!sconfig->src_maxburst) + sconfig->src_maxburst = sconfig->dst_maxburst; + + break; + + case DMA_DEV_TO_MEM: + if ((sconfig->src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) || + !sconfig->src_maxburst) + return -EINVAL; + + if (sconfig->dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) + sconfig->dst_addr_width = sconfig->src_addr_width; + + if (!sconfig->dst_maxburst) + sconfig->dst_maxburst = sconfig->src_maxburst; + + break; + default: + return 0; + } + + return 0; +} + +/** + * Generate a promise, to be used in a normal DMA contract. + * + * A NDMA promise contains all the information required to program the + * normal part of the DMA Engine and get data copied. A non-executed + * promise will live in the demands list on a contract. Once it has been + * completed, it will be moved to the completed demands list for later freeing. + * All linked promises will be freed when the corresponding contract is freed + */ +static struct sun4i_dma_promise * +generate_ndma_promise(struct dma_chan *chan, dma_addr_t src, dma_addr_t dest, + size_t len, struct dma_slave_config *sconfig, + enum dma_transfer_direction direction) +{ + struct sun4i_dma_promise *promise; + int ret; + + ret = sanitize_config(sconfig, direction); + if (ret) + return NULL; + + promise = kzalloc(sizeof(*promise), GFP_NOWAIT); + if (!promise) + return NULL; + + promise->src = src; + promise->dst = dest; + promise->len = len; + promise->cfg = SUN4I_DMA_CFG_LOADING | + SUN4I_NDMA_CFG_BYTE_COUNT_MODE_REMAIN; + + dev_dbg(chan2dev(chan), + "src burst %d, dst burst %d, src buswidth %d, dst buswidth %d", + sconfig->src_maxburst, sconfig->dst_maxburst, + sconfig->src_addr_width, sconfig->dst_addr_width); + + /* Source burst */ + ret = convert_burst(sconfig->src_maxburst); + if (IS_ERR_VALUE(ret)) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_SRC_BURST_LENGTH(ret); + + /* Destination burst */ + ret = convert_burst(sconfig->dst_maxburst); + if (IS_ERR_VALUE(ret)) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_DST_BURST_LENGTH(ret); + + /* Source bus width */ + ret = convert_buswidth(sconfig->src_addr_width); + if (IS_ERR_VALUE(ret)) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_SRC_DATA_WIDTH(ret); + + /* Destination bus width */ + ret = convert_buswidth(sconfig->dst_addr_width); + if (IS_ERR_VALUE(ret)) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_DST_DATA_WIDTH(ret); + + return promise; + +fail: + kfree(promise); + return NULL; +} + +/** + * Generate a promise, to be used in a dedicated DMA contract. + * + * A DDMA promise contains all the information required to program the + * Dedicated part of the DMA Engine and get data copied. A non-executed + * promise will live in the demands list on a contract. Once it has been + * completed, it will be moved to the completed demands list for later freeing. + * All linked promises will be freed when the corresponding contract is freed + */ +static struct sun4i_dma_promise * +generate_ddma_promise(struct dma_chan *chan, dma_addr_t src, dma_addr_t dest, + size_t len, struct dma_slave_config *sconfig) +{ + struct sun4i_dma_promise *promise; + int ret; + + promise = kzalloc(sizeof(*promise), GFP_NOWAIT); + if (!promise) + return NULL; + + promise->src = src; + promise->dst = dest; + promise->len = len; + promise->cfg = SUN4I_DMA_CFG_LOADING | + SUN4I_DDMA_CFG_BYTE_COUNT_MODE_REMAIN; + + /* Source burst */ + ret = convert_burst(sconfig->src_maxburst); + if (IS_ERR_VALUE(ret)) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_SRC_BURST_LENGTH(ret); + + /* Destination burst */ + ret = convert_burst(sconfig->dst_maxburst); + if (IS_ERR_VALUE(ret)) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_DST_BURST_LENGTH(ret); + + /* Source bus width */ + ret = convert_buswidth(sconfig->src_addr_width); + if (IS_ERR_VALUE(ret)) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_SRC_DATA_WIDTH(ret); + + /* Destination bus width */ + ret = convert_buswidth(sconfig->dst_addr_width); + if (IS_ERR_VALUE(ret)) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_DST_DATA_WIDTH(ret); + + return promise; + +fail: + kfree(promise); + return NULL; +} + +/** + * Generate a contract + * + * Contracts function as DMA descriptors. As our hardware does not support + * linked lists, we need to implement SG via software. We use a contract + * to hold all the pieces of the request and process them serially one + * after another. Each piece is represented as a promise. + */ +static struct sun4i_dma_contract *generate_dma_contract(void) +{ + struct sun4i_dma_contract *contract; + + contract = kzalloc(sizeof(*contract), GFP_NOWAIT); + if (!contract) + return NULL; + + INIT_LIST_HEAD(&contract->demands); + INIT_LIST_HEAD(&contract->completed_demands); + + return contract; +} + +/** + * Get next promise on a cyclic transfer + * + * Cyclic contracts contain a series of promises which are executed on a + * loop. This function returns the next promise from a cyclic contract, + * so it can be programmed into the hardware. + */ +static struct sun4i_dma_promise * +get_next_cyclic_promise(struct sun4i_dma_contract *contract) +{ + struct sun4i_dma_promise *promise; + + promise = list_first_entry_or_null(&contract->demands, + struct sun4i_dma_promise, list); + if (!promise) { + list_splice_init(&contract->completed_demands, + &contract->demands); + promise = list_first_entry(&contract->demands, + struct sun4i_dma_promise, list); + } + + return promise; +} + +/** + * Free a contract and all its associated promises + */ +static void sun4i_dma_free_contract(struct virt_dma_desc *vd) +{ + struct sun4i_dma_contract *contract = to_sun4i_dma_contract(vd); + struct sun4i_dma_promise *promise, *tmp; + + /* Free all the demands and completed demands */ + list_for_each_entry_safe(promise, tmp, &contract->demands, list) + kfree(promise); + + list_for_each_entry_safe(promise, tmp, &contract->completed_demands, list) + kfree(promise); + + kfree(contract); +} + +static struct dma_async_tx_descriptor * +sun4i_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, + dma_addr_t src, size_t len, unsigned long flags) +{ + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + struct dma_slave_config *sconfig = &vchan->cfg; + struct sun4i_dma_promise *promise; + struct sun4i_dma_contract *contract; + + contract = generate_dma_contract(); + if (!contract) + return NULL; + + /* + * We can only do the copy to bus aligned addresses, so + * choose the best one so we get decent performance. We also + * maximize the burst size for this same reason. + */ + sconfig->src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + sconfig->dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + sconfig->src_maxburst = 8; + sconfig->dst_maxburst = 8; + + if (vchan->is_dedicated) + promise = generate_ddma_promise(chan, src, dest, len, sconfig); + else + promise = generate_ndma_promise(chan, src, dest, len, sconfig, + DMA_MEM_TO_MEM); + + if (!promise) { + kfree(contract); + return NULL; + } + + /* Configure memcpy mode */ + if (vchan->is_dedicated) { + promise->cfg |= SUN4I_DMA_CFG_SRC_DRQ_TYPE(SUN4I_DDMA_DRQ_TYPE_SDRAM) | + SUN4I_DMA_CFG_DST_DRQ_TYPE(SUN4I_DDMA_DRQ_TYPE_SDRAM); + } else { + promise->cfg |= SUN4I_DMA_CFG_SRC_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM) | + SUN4I_DMA_CFG_DST_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM); + } + + /* Fill the contract with our only promise */ + list_add_tail(&promise->list, &contract->demands); + + /* And add it to the vchan */ + return vchan_tx_prep(&vchan->vc, &contract->vd, flags); +} + +static struct dma_async_tx_descriptor * +sun4i_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf, size_t len, + size_t period_len, enum dma_transfer_direction dir, + unsigned long flags) +{ + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + struct dma_slave_config *sconfig = &vchan->cfg; + struct sun4i_dma_promise *promise; + struct sun4i_dma_contract *contract; + dma_addr_t src, dest; + u32 endpoints; + int nr_periods, offset, plength, i; + + if (!is_slave_direction(dir)) { + dev_err(chan2dev(chan), "Invalid DMA direction\n"); + return NULL; + } + + if (vchan->is_dedicated) { + /* + * As we are using this just for audio data, we need to use + * normal DMA. There is nothing stopping us from supporting + * dedicated DMA here as well, so if a client comes up and + * requires it, it will be simple to implement it. + */ + dev_err(chan2dev(chan), + "Cyclic transfers are only supported on Normal DMA\n"); + return NULL; + } + + contract = generate_dma_contract(); + if (!contract) + return NULL; + + contract->is_cyclic = 1; + + /* Figure out the endpoints and the address we need */ + if (dir == DMA_MEM_TO_DEV) { + src = buf; + dest = sconfig->dst_addr; + endpoints = SUN4I_DMA_CFG_SRC_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM) | + SUN4I_DMA_CFG_DST_DRQ_TYPE(vchan->endpoint) | + SUN4I_DMA_CFG_DST_ADDR_MODE(SUN4I_NDMA_ADDR_MODE_IO); + } else { + src = sconfig->src_addr; + dest = buf; + endpoints = SUN4I_DMA_CFG_SRC_DRQ_TYPE(vchan->endpoint) | + SUN4I_DMA_CFG_SRC_ADDR_MODE(SUN4I_NDMA_ADDR_MODE_IO) | + SUN4I_DMA_CFG_DST_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM); + } + + /* + * We will be using half done interrupts to make two periods + * out of a promise, so we need to program the DMA engine less + * often + */ + + /* + * The engine can interrupt on half-transfer, so we can use + * this feature to program the engine half as often as if we + * didn't use it (keep in mind the hardware doesn't support + * linked lists). + * + * Say you have a set of periods (| marks the start/end, I for + * interrupt, P for programming the engine to do a new + * transfer), the easy but slow way would be to do + * + * |---|---|---|---| (periods / promises) + * P I,P I,P I,P I + * + * Using half transfer interrupts you can do + * + * |-------|-------| (promises as configured on hw) + * |---|---|---|---| (periods) + * P I I,P I I + * + * Which requires half the engine programming for the same + * functionality. + */ + nr_periods = DIV_ROUND_UP(len / period_len, 2); + for (i = 0; i < nr_periods; i++) { + /* Calculate the offset in the buffer and the length needed */ + offset = i * period_len * 2; + plength = min((len - offset), (period_len * 2)); + if (dir == DMA_MEM_TO_DEV) + src = buf + offset; + else + dest = buf + offset; + + /* Make the promise */ + promise = generate_ndma_promise(chan, src, dest, + plength, sconfig, dir); + if (!promise) { + /* TODO: should we free everything? */ + return NULL; + } + promise->cfg |= endpoints; + + /* Then add it to the contract */ + list_add_tail(&promise->list, &contract->demands); + } + + /* And add it to the vchan */ + return vchan_tx_prep(&vchan->vc, &contract->vd, flags); +} + +static struct dma_async_tx_descriptor * +sun4i_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction dir, + unsigned long flags, void *context) +{ + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + struct dma_slave_config *sconfig = &vchan->cfg; + struct sun4i_dma_promise *promise; + struct sun4i_dma_contract *contract; + u8 ram_type, io_mode, linear_mode; + struct scatterlist *sg; + dma_addr_t srcaddr, dstaddr; + u32 endpoints, para; + int i; + + if (!sgl) + return NULL; + + if (!is_slave_direction(dir)) { + dev_err(chan2dev(chan), "Invalid DMA direction\n"); + return NULL; + } + + contract = generate_dma_contract(); + if (!contract) + return NULL; + + if (vchan->is_dedicated) { + io_mode = SUN4I_DDMA_ADDR_MODE_IO; + linear_mode = SUN4I_DDMA_ADDR_MODE_LINEAR; + ram_type = SUN4I_DDMA_DRQ_TYPE_SDRAM; + } else { + io_mode = SUN4I_NDMA_ADDR_MODE_IO; + linear_mode = SUN4I_NDMA_ADDR_MODE_LINEAR; + ram_type = SUN4I_NDMA_DRQ_TYPE_SDRAM; + } + + if (dir == DMA_MEM_TO_DEV) + endpoints = SUN4I_DMA_CFG_DST_DRQ_TYPE(vchan->endpoint) | + SUN4I_DMA_CFG_DST_ADDR_MODE(io_mode) | + SUN4I_DMA_CFG_SRC_DRQ_TYPE(ram_type) | + SUN4I_DMA_CFG_SRC_ADDR_MODE(linear_mode); + else + endpoints = SUN4I_DMA_CFG_DST_DRQ_TYPE(ram_type) | + SUN4I_DMA_CFG_DST_ADDR_MODE(linear_mode) | + SUN4I_DMA_CFG_SRC_DRQ_TYPE(vchan->endpoint) | + SUN4I_DMA_CFG_SRC_ADDR_MODE(io_mode); + + for_each_sg(sgl, sg, sg_len, i) { + /* Figure out addresses */ + if (dir == DMA_MEM_TO_DEV) { + srcaddr = sg_dma_address(sg); + dstaddr = sconfig->dst_addr; + } else { + srcaddr = sconfig->src_addr; + dstaddr = sg_dma_address(sg); + } + + /* + * These are the magic DMA engine timings that keep SPI going. + * I haven't seen any interface on DMAEngine to configure + * timings, and so far they seem to work for everything we + * support, so I've kept them here. I don't know if other + * devices need different timings because, as usual, we only + * have the "para" bitfield meanings, but no comment on what + * the values should be when doing a certain operation :| + */ + para = SUN4I_DDMA_MAGIC_SPI_PARAMETERS; + + /* And make a suitable promise */ + if (vchan->is_dedicated) + promise = generate_ddma_promise(chan, srcaddr, dstaddr, + sg_dma_len(sg), + sconfig); + else + promise = generate_ndma_promise(chan, srcaddr, dstaddr, + sg_dma_len(sg), + sconfig, dir); + + if (!promise) + return NULL; /* TODO: should we free everything? */ + + promise->cfg |= endpoints; + promise->para = para; + + /* Then add it to the contract */ + list_add_tail(&promise->list, &contract->demands); + } + + /* + * Once we've got all the promises ready, add the contract + * to the pending list on the vchan + */ + return vchan_tx_prep(&vchan->vc, &contract->vd, flags); +} + +static int sun4i_dma_terminate_all(struct dma_chan *chan) +{ + struct sun4i_dma_dev *priv = to_sun4i_dma_dev(chan->device); + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + struct sun4i_dma_pchan *pchan = vchan->pchan; + LIST_HEAD(head); + unsigned long flags; + + spin_lock_irqsave(&vchan->vc.lock, flags); + vchan_get_all_descriptors(&vchan->vc, &head); + spin_unlock_irqrestore(&vchan->vc.lock, flags); + + /* + * Clearing the configuration register will halt the pchan. Interrupts + * may still trigger, so don't forget to disable them. + */ + if (pchan) { + if (pchan->is_dedicated) + writel(0, pchan->base + SUN4I_DDMA_CFG_REG); + else + writel(0, pchan->base + SUN4I_NDMA_CFG_REG); + set_pchan_interrupt(priv, pchan, 0, 0); + release_pchan(priv, pchan); + } + + spin_lock_irqsave(&vchan->vc.lock, flags); + vchan_dma_desc_free_list(&vchan->vc, &head); + /* Clear these so the vchan is usable again */ + vchan->processing = NULL; + vchan->pchan = NULL; + spin_unlock_irqrestore(&vchan->vc.lock, flags); + + return 0; +} + +static int sun4i_dma_config(struct dma_chan *chan, + struct dma_slave_config *config) +{ + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + + memcpy(&vchan->cfg, config, sizeof(*config)); + + return 0; +} + +static struct dma_chan *sun4i_dma_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct sun4i_dma_dev *priv = ofdma->of_dma_data; + struct sun4i_dma_vchan *vchan; + struct dma_chan *chan; + u8 is_dedicated = dma_spec->args[0]; + u8 endpoint = dma_spec->args[1]; + + /* Check if type is Normal or Dedicated */ + if (is_dedicated != 0 && is_dedicated != 1) + return NULL; + + /* Make sure the endpoint looks sane */ + if ((is_dedicated && endpoint >= SUN4I_DDMA_DRQ_TYPE_LIMIT) || + (!is_dedicated && endpoint >= SUN4I_NDMA_DRQ_TYPE_LIMIT)) + return NULL; + + chan = dma_get_any_slave_channel(&priv->slave); + if (!chan) + return NULL; + + /* Assign the endpoint to the vchan */ + vchan = to_sun4i_dma_vchan(chan); + vchan->is_dedicated = is_dedicated; + vchan->endpoint = endpoint; + + return chan; +} + +static enum dma_status sun4i_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *state) +{ + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + struct sun4i_dma_pchan *pchan = vchan->pchan; + struct sun4i_dma_contract *contract; + struct sun4i_dma_promise *promise; + struct virt_dma_desc *vd; + unsigned long flags; + enum dma_status ret; + size_t bytes = 0; + + ret = dma_cookie_status(chan, cookie, state); + if (!state || (ret == DMA_COMPLETE)) + return ret; + + spin_lock_irqsave(&vchan->vc.lock, flags); + vd = vchan_find_desc(&vchan->vc, cookie); + if (!vd) + goto exit; + contract = to_sun4i_dma_contract(vd); + + list_for_each_entry(promise, &contract->demands, list) + bytes += promise->len; + + /* + * The hardware is configured to return the remaining byte + * quantity. If possible, replace the first listed element's + * full size with the actual remaining amount + */ + promise = list_first_entry_or_null(&contract->demands, + struct sun4i_dma_promise, list); + if (promise && pchan) { + bytes -= promise->len; + if (pchan->is_dedicated) + bytes += readl(pchan->base + SUN4I_DDMA_BYTE_COUNT_REG); + else + bytes += readl(pchan->base + SUN4I_NDMA_BYTE_COUNT_REG); + } + +exit: + + dma_set_residue(state, bytes); + spin_unlock_irqrestore(&vchan->vc.lock, flags); + + return ret; +} + +static void sun4i_dma_issue_pending(struct dma_chan *chan) +{ + struct sun4i_dma_dev *priv = to_sun4i_dma_dev(chan->device); + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + unsigned long flags; + + spin_lock_irqsave(&vchan->vc.lock, flags); + + /* + * If there are pending transactions for this vchan, push one of + * them into the engine to get the ball rolling. + */ + if (vchan_issue_pending(&vchan->vc)) + __execute_vchan_pending(priv, vchan); + + spin_unlock_irqrestore(&vchan->vc.lock, flags); +} + +static irqreturn_t sun4i_dma_interrupt(int irq, void *dev_id) +{ + struct sun4i_dma_dev *priv = dev_id; + struct sun4i_dma_pchan *pchans = priv->pchans, *pchan; + struct sun4i_dma_vchan *vchan; + struct sun4i_dma_contract *contract; + struct sun4i_dma_promise *promise; + unsigned long pendirq, irqs, disableirqs; + int bit, i, free_room, allow_mitigation = 1; + + pendirq = readl_relaxed(priv->base + SUN4I_DMA_IRQ_PENDING_STATUS_REG); + +handle_pending: + + disableirqs = 0; + free_room = 0; + + for_each_set_bit(bit, &pendirq, 32) { + pchan = &pchans[bit >> 1]; + vchan = pchan->vchan; + if (!vchan) /* a terminated channel may still interrupt */ + continue; + contract = vchan->contract; + + /* + * Disable the IRQ and free the pchan if it's an end + * interrupt (odd bit) + */ + if (bit & 1) { + spin_lock(&vchan->vc.lock); + + /* + * Move the promise into the completed list now that + * we're done with it + */ + list_del(&vchan->processing->list); + list_add_tail(&vchan->processing->list, + &contract->completed_demands); + + /* + * Cyclic DMA transfers are special: + * - There's always something we can dispatch + * - We need to run the callback + * - Latency is very important, as this is used by audio + * We therefore just cycle through the list and dispatch + * whatever we have here, reusing the pchan. There's + * no need to run the thread after this. + * + * For non-cyclic transfers we need to look around, + * so we can program some more work, or notify the + * client that their transfers have been completed. + */ + if (contract->is_cyclic) { + promise = get_next_cyclic_promise(contract); + vchan->processing = promise; + configure_pchan(pchan, promise); + vchan_cyclic_callback(&contract->vd); + } else { + vchan->processing = NULL; + vchan->pchan = NULL; + + free_room = 1; + disableirqs |= BIT(bit); + release_pchan(priv, pchan); + } + + spin_unlock(&vchan->vc.lock); + } else { + /* Half done interrupt */ + if (contract->is_cyclic) + vchan_cyclic_callback(&contract->vd); + else + disableirqs |= BIT(bit); + } + } + + /* Disable the IRQs for events we handled */ + spin_lock(&priv->lock); + irqs = readl_relaxed(priv->base + SUN4I_DMA_IRQ_ENABLE_REG); + writel_relaxed(irqs & ~disableirqs, + priv->base + SUN4I_DMA_IRQ_ENABLE_REG); + spin_unlock(&priv->lock); + + /* Writing 1 to the pending field will clear the pending interrupt */ + writel_relaxed(pendirq, priv->base + SUN4I_DMA_IRQ_PENDING_STATUS_REG); + + /* + * If a pchan was freed, we may be able to schedule something else, + * so have a look around + */ + if (free_room) { + for (i = 0; i < SUN4I_DMA_NR_MAX_VCHANS; i++) { + vchan = &priv->vchans[i]; + spin_lock(&vchan->vc.lock); + __execute_vchan_pending(priv, vchan); + spin_unlock(&vchan->vc.lock); + } + } + + /* + * Handle newer interrupts if some showed up, but only do it once + * to avoid a too long a loop + */ + if (allow_mitigation) { + pendirq = readl_relaxed(priv->base + + SUN4I_DMA_IRQ_PENDING_STATUS_REG); + if (pendirq) { + allow_mitigation = 0; + goto handle_pending; + } + } + + return IRQ_HANDLED; +} + +static int sun4i_dma_probe(struct platform_device *pdev) +{ + struct sun4i_dma_dev *priv; + struct resource *res; + int i, j, ret; + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + priv->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(priv->base)) + return PTR_ERR(priv->base); + + priv->irq = platform_get_irq(pdev, 0); + if (priv->irq < 0) { + dev_err(&pdev->dev, "Cannot claim IRQ\n"); + return priv->irq; + } + + priv->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(priv->clk)) { + dev_err(&pdev->dev, "No clock specified\n"); + return PTR_ERR(priv->clk); + } + + platform_set_drvdata(pdev, priv); + spin_lock_init(&priv->lock); + + dma_cap_zero(priv->slave.cap_mask); + dma_cap_set(DMA_PRIVATE, priv->slave.cap_mask); + dma_cap_set(DMA_MEMCPY, priv->slave.cap_mask); + dma_cap_set(DMA_CYCLIC, priv->slave.cap_mask); + dma_cap_set(DMA_SLAVE, priv->slave.cap_mask); + + INIT_LIST_HEAD(&priv->slave.channels); + priv->slave.device_free_chan_resources = sun4i_dma_free_chan_resources; + priv->slave.device_tx_status = sun4i_dma_tx_status; + priv->slave.device_issue_pending = sun4i_dma_issue_pending; + priv->slave.device_prep_slave_sg = sun4i_dma_prep_slave_sg; + priv->slave.device_prep_dma_memcpy = sun4i_dma_prep_dma_memcpy; + priv->slave.device_prep_dma_cyclic = sun4i_dma_prep_dma_cyclic; + priv->slave.device_config = sun4i_dma_config; + priv->slave.device_terminate_all = sun4i_dma_terminate_all; + priv->slave.copy_align = 2; + priv->slave.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + priv->slave.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + priv->slave.directions = BIT(DMA_DEV_TO_MEM) | + BIT(DMA_MEM_TO_DEV); + priv->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + + priv->slave.dev = &pdev->dev; + + priv->pchans = devm_kcalloc(&pdev->dev, SUN4I_DMA_NR_MAX_CHANNELS, + sizeof(struct sun4i_dma_pchan), GFP_KERNEL); + priv->vchans = devm_kcalloc(&pdev->dev, SUN4I_DMA_NR_MAX_VCHANS, + sizeof(struct sun4i_dma_vchan), GFP_KERNEL); + if (!priv->vchans || !priv->pchans) + return -ENOMEM; + + /* + * [0..SUN4I_NDMA_NR_MAX_CHANNELS) are normal pchans, and + * [SUN4I_NDMA_NR_MAX_CHANNELS..SUN4I_DMA_NR_MAX_CHANNELS) are + * dedicated ones + */ + for (i = 0; i < SUN4I_NDMA_NR_MAX_CHANNELS; i++) + priv->pchans[i].base = priv->base + + SUN4I_NDMA_CHANNEL_REG_BASE(i); + + for (j = 0; i < SUN4I_DMA_NR_MAX_CHANNELS; i++, j++) { + priv->pchans[i].base = priv->base + + SUN4I_DDMA_CHANNEL_REG_BASE(j); + priv->pchans[i].is_dedicated = 1; + } + + for (i = 0; i < SUN4I_DMA_NR_MAX_VCHANS; i++) { + struct sun4i_dma_vchan *vchan = &priv->vchans[i]; + + spin_lock_init(&vchan->vc.lock); + vchan->vc.desc_free = sun4i_dma_free_contract; + vchan_init(&vchan->vc, &priv->slave); + } + + ret = clk_prepare_enable(priv->clk); + if (ret) { + dev_err(&pdev->dev, "Couldn't enable the clock\n"); + return ret; + } + + /* + * Make sure the IRQs are all disabled and accounted for. The bootloader + * likes to leave these dirty + */ + writel(0, priv->base + SUN4I_DMA_IRQ_ENABLE_REG); + writel(0xFFFFFFFF, priv->base + SUN4I_DMA_IRQ_PENDING_STATUS_REG); + + ret = devm_request_irq(&pdev->dev, priv->irq, sun4i_dma_interrupt, + 0, dev_name(&pdev->dev), priv); + if (ret) { + dev_err(&pdev->dev, "Cannot request IRQ\n"); + goto err_clk_disable; + } + + ret = dma_async_device_register(&priv->slave); + if (ret) { + dev_warn(&pdev->dev, "Failed to register DMA engine device\n"); + goto err_clk_disable; + } + + ret = of_dma_controller_register(pdev->dev.of_node, sun4i_dma_of_xlate, + priv); + if (ret) { + dev_err(&pdev->dev, "of_dma_controller_register failed\n"); + goto err_dma_unregister; + } + + dev_dbg(&pdev->dev, "Successfully probed SUN4I_DMA\n"); + + return 0; + +err_dma_unregister: + dma_async_device_unregister(&priv->slave); +err_clk_disable: + clk_disable_unprepare(priv->clk); + return ret; +} + +static int sun4i_dma_remove(struct platform_device *pdev) +{ + struct sun4i_dma_dev *priv = platform_get_drvdata(pdev); + + /* Disable IRQ so no more work is scheduled */ + disable_irq(priv->irq); + + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&priv->slave); + + clk_disable_unprepare(priv->clk); + + return 0; +} + +static const struct of_device_id sun4i_dma_match[] = { + { .compatible = "allwinner,sun4i-a10-dma" }, + { /* sentinel */ }, +}; + +static struct platform_driver sun4i_dma_driver = { + .probe = sun4i_dma_probe, + .remove = sun4i_dma_remove, + .driver = { + .name = "sun4i-dma", + .of_match_table = sun4i_dma_match, + }, +}; + +module_platform_driver(sun4i_dma_driver); + +MODULE_DESCRIPTION("Allwinner A10 Dedicated DMA Controller Driver"); +MODULE_AUTHOR("Emilio López <emilio@elopez.com.ar>"); +MODULE_LICENSE("GPL"); diff --git a/kernel/drivers/dma/sun6i-dma.c b/kernel/drivers/dma/sun6i-dma.c index 11e536586..2db12e493 100644 --- a/kernel/drivers/dma/sun6i-dma.c +++ b/kernel/drivers/dma/sun6i-dma.c @@ -891,11 +891,24 @@ static struct sun6i_dma_config sun8i_a23_dma_cfg = { .nr_max_vchans = 37, }; +/* + * The H3 has 12 physical channels, a maximum DRQ port id of 27, + * and a total of 34 usable source and destination endpoints. + */ + +static struct sun6i_dma_config sun8i_h3_dma_cfg = { + .nr_max_channels = 12, + .nr_max_requests = 27, + .nr_max_vchans = 34, +}; + static const struct of_device_id sun6i_dma_match[] = { { .compatible = "allwinner,sun6i-a31-dma", .data = &sun6i_a31_dma_cfg }, { .compatible = "allwinner,sun8i-a23-dma", .data = &sun8i_a23_dma_cfg }, + { .compatible = "allwinner,sun8i-h3-dma", .data = &sun8i_h3_dma_cfg }, { /* sentinel */ } }; +MODULE_DEVICE_TABLE(of, sun6i_dma_match); static int sun6i_dma_probe(struct platform_device *pdev) { @@ -957,7 +970,7 @@ static int sun6i_dma_probe(struct platform_device *pdev) sdc->slave.device_issue_pending = sun6i_dma_issue_pending; sdc->slave.device_prep_slave_sg = sun6i_dma_prep_slave_sg; sdc->slave.device_prep_dma_memcpy = sun6i_dma_prep_dma_memcpy; - sdc->slave.copy_align = 4; + sdc->slave.copy_align = DMAENGINE_ALIGN_4_BYTES; sdc->slave.device_config = sun6i_dma_config; sdc->slave.device_pause = sun6i_dma_pause; sdc->slave.device_resume = sun6i_dma_resume; diff --git a/kernel/drivers/dma/tegra20-apb-dma.c b/kernel/drivers/dma/tegra20-apb-dma.c index eaf585e82..c8f79dcaa 100644 --- a/kernel/drivers/dma/tegra20-apb-dma.c +++ b/kernel/drivers/dma/tegra20-apb-dma.c @@ -155,7 +155,6 @@ struct tegra_dma_sg_req { int req_len; bool configured; bool last_sg; - bool half_done; struct list_head node; struct tegra_dma_desc *dma_desc; }; @@ -188,7 +187,7 @@ struct tegra_dma_channel { bool config_init; int id; int irq; - unsigned long chan_base_offset; + void __iomem *chan_addr; spinlock_t lock; bool busy; struct tegra_dma *tdma; @@ -203,8 +202,6 @@ struct tegra_dma_channel { /* ISR handler and tasklet for bottom half of isr handling */ dma_isr_handler isr_handler; struct tasklet_struct tasklet; - dma_async_tx_callback callback; - void *callback_param; /* Channel-slave specific configuration */ unsigned int slave_id; @@ -222,6 +219,13 @@ struct tegra_dma { void __iomem *base_addr; const struct tegra_dma_chip_data *chip_data; + /* + * Counter for managing global pausing of the DMA controller. + * Only applicable for devices that don't support individual + * channel pausing. + */ + u32 global_pause_count; + /* Some register need to be cache before suspend */ u32 reg_gen; @@ -242,12 +246,12 @@ static inline u32 tdma_read(struct tegra_dma *tdma, u32 reg) static inline void tdc_write(struct tegra_dma_channel *tdc, u32 reg, u32 val) { - writel(val, tdc->tdma->base_addr + tdc->chan_base_offset + reg); + writel(val, tdc->chan_addr + reg); } static inline u32 tdc_read(struct tegra_dma_channel *tdc, u32 reg) { - return readl(tdc->tdma->base_addr + tdc->chan_base_offset + reg); + return readl(tdc->chan_addr + reg); } static inline struct tegra_dma_channel *to_tegra_dma_chan(struct dma_chan *dc) @@ -361,16 +365,32 @@ static void tegra_dma_global_pause(struct tegra_dma_channel *tdc, struct tegra_dma *tdma = tdc->tdma; spin_lock(&tdma->global_lock); - tdma_write(tdma, TEGRA_APBDMA_GENERAL, 0); - if (wait_for_burst_complete) - udelay(TEGRA_APBDMA_BURST_COMPLETE_TIME); + + if (tdc->tdma->global_pause_count == 0) { + tdma_write(tdma, TEGRA_APBDMA_GENERAL, 0); + if (wait_for_burst_complete) + udelay(TEGRA_APBDMA_BURST_COMPLETE_TIME); + } + + tdc->tdma->global_pause_count++; + + spin_unlock(&tdma->global_lock); } static void tegra_dma_global_resume(struct tegra_dma_channel *tdc) { struct tegra_dma *tdma = tdc->tdma; - tdma_write(tdma, TEGRA_APBDMA_GENERAL, TEGRA_APBDMA_GENERAL_ENABLE); + spin_lock(&tdma->global_lock); + + if (WARN_ON(tdc->tdma->global_pause_count == 0)) + goto out; + + if (--tdc->tdma->global_pause_count == 0) + tdma_write(tdma, TEGRA_APBDMA_GENERAL, + TEGRA_APBDMA_GENERAL_ENABLE); + +out: spin_unlock(&tdma->global_lock); } @@ -601,7 +621,6 @@ static void handle_once_dma_done(struct tegra_dma_channel *tdc, return; tdc_start_head_req(tdc); - return; } static void handle_cont_sngl_cycle_dma_done(struct tegra_dma_channel *tdc, @@ -628,7 +647,6 @@ static void handle_cont_sngl_cycle_dma_done(struct tegra_dma_channel *tdc, if (!st) dma_desc->dma_status = DMA_ERROR; } - return; } static void tegra_dma_tasklet(unsigned long data) @@ -720,7 +738,6 @@ static void tegra_dma_issue_pending(struct dma_chan *dc) } end: spin_unlock_irqrestore(&tdc->lock, flags); - return; } static int tegra_dma_terminate_all(struct dma_chan *dc) @@ -932,7 +949,6 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_slave_sg( struct tegra_dma_sg_req *sg_req = NULL; u32 burst_size; enum dma_slave_buswidth slave_bw; - int ret; if (!tdc->config_init) { dev_err(tdc2dev(tdc), "dma channel is not configured\n"); @@ -943,9 +959,8 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_slave_sg( return NULL; } - ret = get_transfer_param(tdc, direction, &apb_ptr, &apb_seq, &csr, - &burst_size, &slave_bw); - if (ret < 0) + if (get_transfer_param(tdc, direction, &apb_ptr, &apb_seq, &csr, + &burst_size, &slave_bw) < 0) return NULL; INIT_LIST_HEAD(&req_list); @@ -1048,7 +1063,6 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic( dma_addr_t mem = buf_addr; u32 burst_size; enum dma_slave_buswidth slave_bw; - int ret; if (!buf_len || !period_len) { dev_err(tdc2dev(tdc), "Invalid buffer/period len\n"); @@ -1087,12 +1101,10 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic( return NULL; } - ret = get_transfer_param(tdc, direction, &apb_ptr, &apb_seq, &csr, - &burst_size, &slave_bw); - if (ret < 0) + if (get_transfer_param(tdc, direction, &apb_ptr, &apb_seq, &csr, + &burst_size, &slave_bw) < 0) return NULL; - ahb_seq = TEGRA_APBDMA_AHBSEQ_INTR_ENB; ahb_seq |= TEGRA_APBDMA_AHBSEQ_WRAP_NONE << TEGRA_APBDMA_AHBSEQ_WRAP_SHIFT; @@ -1136,7 +1148,6 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic( sg_req->ch_regs.apb_seq = apb_seq; sg_req->ch_regs.ahb_seq = ahb_seq; sg_req->configured = false; - sg_req->half_done = false; sg_req->last_sg = false; sg_req->dma_desc = dma_desc; sg_req->req_len = len; @@ -1377,8 +1388,9 @@ static int tegra_dma_probe(struct platform_device *pdev) for (i = 0; i < cdata->nr_channels; i++) { struct tegra_dma_channel *tdc = &tdma->channels[i]; - tdc->chan_base_offset = TEGRA_APBDMA_CHANNEL_BASE_ADD_OFFSET + - i * cdata->channel_reg_size; + tdc->chan_addr = tdma->base_addr + + TEGRA_APBDMA_CHANNEL_BASE_ADD_OFFSET + + (i * cdata->channel_reg_size); res = platform_get_resource(pdev, IORESOURCE_IRQ, i); if (!res) { @@ -1418,6 +1430,7 @@ static int tegra_dma_probe(struct platform_device *pdev) dma_cap_set(DMA_PRIVATE, tdma->dma_dev.cap_mask); dma_cap_set(DMA_CYCLIC, tdma->dma_dev.cap_mask); + tdma->global_pause_count = 0; tdma->dma_dev.dev = &pdev->dev; tdma->dma_dev.device_alloc_chan_resources = tegra_dma_alloc_chan_resources; diff --git a/kernel/drivers/dma/ti-dma-crossbar.c b/kernel/drivers/dma/ti-dma-crossbar.c new file mode 100644 index 000000000..a415edbe6 --- /dev/null +++ b/kernel/drivers/dma/ti-dma-crossbar.c @@ -0,0 +1,400 @@ +/* + * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com + * Author: Peter Ujfalusi <peter.ujfalusi@ti.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/io.h> +#include <linux/idr.h> +#include <linux/of_address.h> +#include <linux/of_device.h> +#include <linux/of_dma.h> + +#define TI_XBAR_DRA7 0 +#define TI_XBAR_AM335X 1 + +static const struct of_device_id ti_dma_xbar_match[] = { + { + .compatible = "ti,dra7-dma-crossbar", + .data = (void *)TI_XBAR_DRA7, + }, + { + .compatible = "ti,am335x-edma-crossbar", + .data = (void *)TI_XBAR_AM335X, + }, + {}, +}; + +/* Crossbar on AM335x/AM437x family */ +#define TI_AM335X_XBAR_LINES 64 + +struct ti_am335x_xbar_data { + void __iomem *iomem; + + struct dma_router dmarouter; + + u32 xbar_events; /* maximum number of events to select in xbar */ + u32 dma_requests; /* number of DMA requests on eDMA */ +}; + +struct ti_am335x_xbar_map { + u16 dma_line; + u16 mux_val; +}; + +static inline void ti_am335x_xbar_write(void __iomem *iomem, int event, u16 val) +{ + writeb_relaxed(val & 0x1f, iomem + event); +} + +static void ti_am335x_xbar_free(struct device *dev, void *route_data) +{ + struct ti_am335x_xbar_data *xbar = dev_get_drvdata(dev); + struct ti_am335x_xbar_map *map = route_data; + + dev_dbg(dev, "Unmapping XBAR event %u on channel %u\n", + map->mux_val, map->dma_line); + + ti_am335x_xbar_write(xbar->iomem, map->dma_line, 0); + kfree(map); +} + +static void *ti_am335x_xbar_route_allocate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct platform_device *pdev = of_find_device_by_node(ofdma->of_node); + struct ti_am335x_xbar_data *xbar = platform_get_drvdata(pdev); + struct ti_am335x_xbar_map *map; + + if (dma_spec->args_count != 3) + return ERR_PTR(-EINVAL); + + if (dma_spec->args[2] >= xbar->xbar_events) { + dev_err(&pdev->dev, "Invalid XBAR event number: %d\n", + dma_spec->args[2]); + return ERR_PTR(-EINVAL); + } + + if (dma_spec->args[0] >= xbar->dma_requests) { + dev_err(&pdev->dev, "Invalid DMA request line number: %d\n", + dma_spec->args[0]); + return ERR_PTR(-EINVAL); + } + + /* The of_node_put() will be done in the core for the node */ + dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0); + if (!dma_spec->np) { + dev_err(&pdev->dev, "Can't get DMA master\n"); + return ERR_PTR(-EINVAL); + } + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) { + of_node_put(dma_spec->np); + return ERR_PTR(-ENOMEM); + } + + map->dma_line = (u16)dma_spec->args[0]; + map->mux_val = (u16)dma_spec->args[2]; + + dma_spec->args[2] = 0; + dma_spec->args_count = 2; + + dev_dbg(&pdev->dev, "Mapping XBAR event%u to DMA%u\n", + map->mux_val, map->dma_line); + + ti_am335x_xbar_write(xbar->iomem, map->dma_line, map->mux_val); + + return map; +} + +static const struct of_device_id ti_am335x_master_match[] = { + { .compatible = "ti,edma3-tpcc", }, + {}, +}; + +static int ti_am335x_xbar_probe(struct platform_device *pdev) +{ + struct device_node *node = pdev->dev.of_node; + const struct of_device_id *match; + struct device_node *dma_node; + struct ti_am335x_xbar_data *xbar; + struct resource *res; + void __iomem *iomem; + int i, ret; + + if (!node) + return -ENODEV; + + xbar = devm_kzalloc(&pdev->dev, sizeof(*xbar), GFP_KERNEL); + if (!xbar) + return -ENOMEM; + + dma_node = of_parse_phandle(node, "dma-masters", 0); + if (!dma_node) { + dev_err(&pdev->dev, "Can't get DMA master node\n"); + return -ENODEV; + } + + match = of_match_node(ti_am335x_master_match, dma_node); + if (!match) { + dev_err(&pdev->dev, "DMA master is not supported\n"); + return -EINVAL; + } + + if (of_property_read_u32(dma_node, "dma-requests", + &xbar->dma_requests)) { + dev_info(&pdev->dev, + "Missing XBAR output information, using %u.\n", + TI_AM335X_XBAR_LINES); + xbar->dma_requests = TI_AM335X_XBAR_LINES; + } + of_node_put(dma_node); + + if (of_property_read_u32(node, "dma-requests", &xbar->xbar_events)) { + dev_info(&pdev->dev, + "Missing XBAR input information, using %u.\n", + TI_AM335X_XBAR_LINES); + xbar->xbar_events = TI_AM335X_XBAR_LINES; + } + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + iomem = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(iomem)) + return PTR_ERR(iomem); + + xbar->iomem = iomem; + + xbar->dmarouter.dev = &pdev->dev; + xbar->dmarouter.route_free = ti_am335x_xbar_free; + + platform_set_drvdata(pdev, xbar); + + /* Reset the crossbar */ + for (i = 0; i < xbar->dma_requests; i++) + ti_am335x_xbar_write(xbar->iomem, i, 0); + + ret = of_dma_router_register(node, ti_am335x_xbar_route_allocate, + &xbar->dmarouter); + + return ret; +} + +/* Crossbar on DRA7xx family */ +#define TI_DRA7_XBAR_OUTPUTS 127 +#define TI_DRA7_XBAR_INPUTS 256 + +#define TI_XBAR_EDMA_OFFSET 0 +#define TI_XBAR_SDMA_OFFSET 1 + +struct ti_dra7_xbar_data { + void __iomem *iomem; + + struct dma_router dmarouter; + struct idr map_idr; + + u16 safe_val; /* Value to rest the crossbar lines */ + u32 xbar_requests; /* number of DMA requests connected to XBAR */ + u32 dma_requests; /* number of DMA requests forwarded to DMA */ + u32 dma_offset; +}; + +struct ti_dra7_xbar_map { + u16 xbar_in; + int xbar_out; +}; + +static inline void ti_dra7_xbar_write(void __iomem *iomem, int xbar, u16 val) +{ + writew_relaxed(val, iomem + (xbar * 2)); +} + +static void ti_dra7_xbar_free(struct device *dev, void *route_data) +{ + struct ti_dra7_xbar_data *xbar = dev_get_drvdata(dev); + struct ti_dra7_xbar_map *map = route_data; + + dev_dbg(dev, "Unmapping XBAR%u (was routed to %d)\n", + map->xbar_in, map->xbar_out); + + ti_dra7_xbar_write(xbar->iomem, map->xbar_out, xbar->safe_val); + idr_remove(&xbar->map_idr, map->xbar_out); + kfree(map); +} + +static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct platform_device *pdev = of_find_device_by_node(ofdma->of_node); + struct ti_dra7_xbar_data *xbar = platform_get_drvdata(pdev); + struct ti_dra7_xbar_map *map; + + if (dma_spec->args[0] >= xbar->xbar_requests) { + dev_err(&pdev->dev, "Invalid XBAR request number: %d\n", + dma_spec->args[0]); + return ERR_PTR(-EINVAL); + } + + /* The of_node_put() will be done in the core for the node */ + dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0); + if (!dma_spec->np) { + dev_err(&pdev->dev, "Can't get DMA master\n"); + return ERR_PTR(-EINVAL); + } + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) { + of_node_put(dma_spec->np); + return ERR_PTR(-ENOMEM); + } + + map->xbar_out = idr_alloc(&xbar->map_idr, NULL, 0, xbar->dma_requests, + GFP_KERNEL); + map->xbar_in = (u16)dma_spec->args[0]; + + dma_spec->args[0] = map->xbar_out + xbar->dma_offset; + + dev_dbg(&pdev->dev, "Mapping XBAR%u to DMA%d\n", + map->xbar_in, map->xbar_out); + + ti_dra7_xbar_write(xbar->iomem, map->xbar_out, map->xbar_in); + + return map; +} + +static const struct of_device_id ti_dra7_master_match[] = { + { + .compatible = "ti,omap4430-sdma", + .data = (void *)TI_XBAR_SDMA_OFFSET, + }, + { + .compatible = "ti,edma3", + .data = (void *)TI_XBAR_EDMA_OFFSET, + }, + {}, +}; + +static int ti_dra7_xbar_probe(struct platform_device *pdev) +{ + struct device_node *node = pdev->dev.of_node; + const struct of_device_id *match; + struct device_node *dma_node; + struct ti_dra7_xbar_data *xbar; + struct resource *res; + u32 safe_val; + void __iomem *iomem; + int i, ret; + + if (!node) + return -ENODEV; + + xbar = devm_kzalloc(&pdev->dev, sizeof(*xbar), GFP_KERNEL); + if (!xbar) + return -ENOMEM; + + idr_init(&xbar->map_idr); + + dma_node = of_parse_phandle(node, "dma-masters", 0); + if (!dma_node) { + dev_err(&pdev->dev, "Can't get DMA master node\n"); + return -ENODEV; + } + + match = of_match_node(ti_dra7_master_match, dma_node); + if (!match) { + dev_err(&pdev->dev, "DMA master is not supported\n"); + return -EINVAL; + } + + if (of_property_read_u32(dma_node, "dma-requests", + &xbar->dma_requests)) { + dev_info(&pdev->dev, + "Missing XBAR output information, using %u.\n", + TI_DRA7_XBAR_OUTPUTS); + xbar->dma_requests = TI_DRA7_XBAR_OUTPUTS; + } + of_node_put(dma_node); + + if (of_property_read_u32(node, "dma-requests", &xbar->xbar_requests)) { + dev_info(&pdev->dev, + "Missing XBAR input information, using %u.\n", + TI_DRA7_XBAR_INPUTS); + xbar->xbar_requests = TI_DRA7_XBAR_INPUTS; + } + + if (!of_property_read_u32(node, "ti,dma-safe-map", &safe_val)) + xbar->safe_val = (u16)safe_val; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + iomem = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(iomem)) + return PTR_ERR(iomem); + + xbar->iomem = iomem; + + xbar->dmarouter.dev = &pdev->dev; + xbar->dmarouter.route_free = ti_dra7_xbar_free; + xbar->dma_offset = (u32)match->data; + + platform_set_drvdata(pdev, xbar); + + /* Reset the crossbar */ + for (i = 0; i < xbar->dma_requests; i++) + ti_dra7_xbar_write(xbar->iomem, i, xbar->safe_val); + + ret = of_dma_router_register(node, ti_dra7_xbar_route_allocate, + &xbar->dmarouter); + if (ret) { + /* Restore the defaults for the crossbar */ + for (i = 0; i < xbar->dma_requests; i++) + ti_dra7_xbar_write(xbar->iomem, i, i); + } + + return ret; +} + +static int ti_dma_xbar_probe(struct platform_device *pdev) +{ + const struct of_device_id *match; + int ret; + + match = of_match_node(ti_dma_xbar_match, pdev->dev.of_node); + if (unlikely(!match)) + return -EINVAL; + + switch ((u32)match->data) { + case TI_XBAR_DRA7: + ret = ti_dra7_xbar_probe(pdev); + break; + case TI_XBAR_AM335X: + ret = ti_am335x_xbar_probe(pdev); + break; + default: + dev_err(&pdev->dev, "Unsupported crossbar\n"); + ret = -ENODEV; + break; + } + + return ret; +} + +static struct platform_driver ti_dma_xbar_driver = { + .driver = { + .name = "ti-dma-crossbar", + .of_match_table = of_match_ptr(ti_dma_xbar_match), + }, + .probe = ti_dma_xbar_probe, +}; + +int omap_dmaxbar_init(void) +{ + return platform_driver_register(&ti_dma_xbar_driver); +} +arch_initcall(omap_dmaxbar_init); diff --git a/kernel/drivers/dma/timb_dma.c b/kernel/drivers/dma/timb_dma.c index c4c3d93fd..559cd4073 100644 --- a/kernel/drivers/dma/timb_dma.c +++ b/kernel/drivers/dma/timb_dma.c @@ -10,10 +10,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* Supports: diff --git a/kernel/drivers/dma/virt-dma.h b/kernel/drivers/dma/virt-dma.h index 181b95267..2fa47745a 100644 --- a/kernel/drivers/dma/virt-dma.h +++ b/kernel/drivers/dma/virt-dma.h @@ -47,9 +47,9 @@ struct virt_dma_desc *vchan_find_desc(struct virt_dma_chan *, dma_cookie_t); /** * vchan_tx_prep - prepare a descriptor - * vc: virtual channel allocating this descriptor - * vd: virtual descriptor to prepare - * tx_flags: flags argument passed in to prepare function + * @vc: virtual channel allocating this descriptor + * @vd: virtual descriptor to prepare + * @tx_flags: flags argument passed in to prepare function */ static inline struct dma_async_tx_descriptor *vchan_tx_prep(struct virt_dma_chan *vc, struct virt_dma_desc *vd, unsigned long tx_flags) @@ -65,7 +65,7 @@ static inline struct dma_async_tx_descriptor *vchan_tx_prep(struct virt_dma_chan /** * vchan_issue_pending - move submitted descriptors to issued list - * vc: virtual channel to update + * @vc: virtual channel to update * * vc.lock must be held by caller */ @@ -77,7 +77,7 @@ static inline bool vchan_issue_pending(struct virt_dma_chan *vc) /** * vchan_cookie_complete - report completion of a descriptor - * vd: virtual descriptor to update + * @vd: virtual descriptor to update * * vc.lock must be held by caller */ @@ -97,7 +97,7 @@ static inline void vchan_cookie_complete(struct virt_dma_desc *vd) /** * vchan_cyclic_callback - report the completion of a period - * vd: virtual descriptor + * @vd: virtual descriptor */ static inline void vchan_cyclic_callback(struct virt_dma_desc *vd) { @@ -109,7 +109,7 @@ static inline void vchan_cyclic_callback(struct virt_dma_desc *vd) /** * vchan_next_desc - peek at the next descriptor to be processed - * vc: virtual channel to obtain descriptor from + * @vc: virtual channel to obtain descriptor from * * vc.lock must be held by caller */ @@ -123,8 +123,8 @@ static inline struct virt_dma_desc *vchan_next_desc(struct virt_dma_chan *vc) /** * vchan_get_all_descriptors - obtain all submitted and issued descriptors - * vc: virtual channel to get descriptors from - * head: list of descriptors found + * @vc: virtual channel to get descriptors from + * @head: list of descriptors found * * vc.lock must be held by caller * diff --git a/kernel/drivers/dma/xgene-dma.c b/kernel/drivers/dma/xgene-dma.c index f52e37502..9cb93c5b6 100755..100644 --- a/kernel/drivers/dma/xgene-dma.c +++ b/kernel/drivers/dma/xgene-dma.c @@ -21,6 +21,7 @@ * NOTE: PM support is currently not available. */ +#include <linux/acpi.h> #include <linux/clk.h> #include <linux/delay.h> #include <linux/dma-mapping.h> @@ -28,6 +29,7 @@ #include <linux/dmapool.h> #include <linux/interrupt.h> #include <linux/io.h> +#include <linux/irq.h> #include <linux/module.h> #include <linux/of_device.h> @@ -58,7 +60,6 @@ #define XGENE_DMA_RING_MEM_RAM_SHUTDOWN 0xD070 #define XGENE_DMA_RING_BLK_MEM_RDY 0xD074 #define XGENE_DMA_RING_BLK_MEM_RDY_VAL 0xFFFFFFFF -#define XGENE_DMA_RING_DESC_CNT(v) (((v) & 0x0001FFFE) >> 1) #define XGENE_DMA_RING_ID_GET(owner, num) (((owner) << 6) | (num)) #define XGENE_DMA_RING_DST_ID(v) ((1 << 10) | (v)) #define XGENE_DMA_RING_CMD_OFFSET 0x2C @@ -111,6 +112,7 @@ #define XGENE_DMA_MEM_RAM_SHUTDOWN 0xD070 #define XGENE_DMA_BLK_MEM_RDY 0xD074 #define XGENE_DMA_BLK_MEM_RDY_VAL 0xFFFFFFFF +#define XGENE_DMA_RING_CMD_SM_OFFSET 0x8000 /* X-Gene SoC EFUSE csr register and bit defination */ #define XGENE_SOC_JTAG1_SHADOW 0x18 @@ -124,32 +126,8 @@ #define XGENE_DMA_DESC_ELERR_POS 46 #define XGENE_DMA_DESC_RTYPE_POS 56 #define XGENE_DMA_DESC_LERR_POS 60 -#define XGENE_DMA_DESC_FLYBY_POS 4 #define XGENE_DMA_DESC_BUFLEN_POS 48 #define XGENE_DMA_DESC_HOENQ_NUM_POS 48 - -#define XGENE_DMA_DESC_NV_SET(m) \ - (((u64 *)(m))[0] |= XGENE_DMA_DESC_NV_BIT) -#define XGENE_DMA_DESC_IN_SET(m) \ - (((u64 *)(m))[0] |= XGENE_DMA_DESC_IN_BIT) -#define XGENE_DMA_DESC_RTYPE_SET(m, v) \ - (((u64 *)(m))[0] |= ((u64)(v) << XGENE_DMA_DESC_RTYPE_POS)) -#define XGENE_DMA_DESC_BUFADDR_SET(m, v) \ - (((u64 *)(m))[0] |= (v)) -#define XGENE_DMA_DESC_BUFLEN_SET(m, v) \ - (((u64 *)(m))[0] |= ((u64)(v) << XGENE_DMA_DESC_BUFLEN_POS)) -#define XGENE_DMA_DESC_C_SET(m) \ - (((u64 *)(m))[1] |= XGENE_DMA_DESC_C_BIT) -#define XGENE_DMA_DESC_FLYBY_SET(m, v) \ - (((u64 *)(m))[2] |= ((v) << XGENE_DMA_DESC_FLYBY_POS)) -#define XGENE_DMA_DESC_MULTI_SET(m, v, i) \ - (((u64 *)(m))[2] |= ((u64)(v) << (((i) + 1) * 8))) -#define XGENE_DMA_DESC_DR_SET(m) \ - (((u64 *)(m))[2] |= XGENE_DMA_DESC_DR_BIT) -#define XGENE_DMA_DESC_DST_ADDR_SET(m, v) \ - (((u64 *)(m))[3] |= (v)) -#define XGENE_DMA_DESC_H0ENQ_NUM_SET(m, v) \ - (((u64 *)(m))[3] |= ((u64)(v) << XGENE_DMA_DESC_HOENQ_NUM_POS)) #define XGENE_DMA_DESC_ELERR_RD(m) \ (((m) >> XGENE_DMA_DESC_ELERR_POS) & 0x3) #define XGENE_DMA_DESC_LERR_RD(m) \ @@ -158,14 +136,7 @@ (((elerr) << 4) | (lerr)) /* X-Gene DMA descriptor empty s/w signature */ -#define XGENE_DMA_DESC_EMPTY_INDEX 0 #define XGENE_DMA_DESC_EMPTY_SIGNATURE ~0ULL -#define XGENE_DMA_DESC_SET_EMPTY(m) \ - (((u64 *)(m))[XGENE_DMA_DESC_EMPTY_INDEX] = \ - XGENE_DMA_DESC_EMPTY_SIGNATURE) -#define XGENE_DMA_DESC_IS_EMPTY(m) \ - (((u64 *)(m))[XGENE_DMA_DESC_EMPTY_INDEX] == \ - XGENE_DMA_DESC_EMPTY_SIGNATURE) /* X-Gene DMA configurable parameters defines */ #define XGENE_DMA_RING_NUM 512 @@ -181,10 +152,9 @@ #define XGENE_DMA_PQ_CHANNEL 1 #define XGENE_DMA_MAX_BYTE_CNT 0x4000 /* 16 KB */ #define XGENE_DMA_MAX_64B_DESC_BYTE_CNT 0x14000 /* 80 KB */ -#define XGENE_DMA_XOR_ALIGNMENT 6 /* 64 Bytes */ #define XGENE_DMA_MAX_XOR_SRC 5 #define XGENE_DMA_16K_BUFFER_LEN_CODE 0x0 -#define XGENE_DMA_INVALID_LEN_CODE 0x7800 +#define XGENE_DMA_INVALID_LEN_CODE 0x7800000000000000ULL /* X-Gene DMA descriptor error codes */ #define ERR_DESC_AXI 0x01 @@ -214,10 +184,10 @@ #define ERR_DESC_SRC_INT 0xB /* X-Gene DMA flyby operation code */ -#define FLYBY_2SRC_XOR 0x8 -#define FLYBY_3SRC_XOR 0x9 -#define FLYBY_4SRC_XOR 0xA -#define FLYBY_5SRC_XOR 0xB +#define FLYBY_2SRC_XOR 0x80 +#define FLYBY_3SRC_XOR 0x90 +#define FLYBY_4SRC_XOR 0xA0 +#define FLYBY_5SRC_XOR 0xB0 /* X-Gene DMA SW descriptor flags */ #define XGENE_DMA_FLAG_64B_DESC BIT(0) @@ -238,10 +208,10 @@ dev_err(chan->dev, "%s: " fmt, chan->name, ##arg) struct xgene_dma_desc_hw { - u64 m0; - u64 m1; - u64 m2; - u64 m3; + __le64 m0; + __le64 m1; + __le64 m2; + __le64 m3; }; enum xgene_dma_ring_cfgsize { @@ -388,18 +358,11 @@ static bool is_pq_enabled(struct xgene_dma *pdma) return !(val & XGENE_DMA_PQ_DISABLE_MASK); } -static void xgene_dma_cpu_to_le64(u64 *desc, int count) -{ - int i; - - for (i = 0; i < count; i++) - desc[i] = cpu_to_le64(desc[i]); -} - -static u16 xgene_dma_encode_len(u32 len) +static u64 xgene_dma_encode_len(size_t len) { return (len < XGENE_DMA_MAX_BYTE_CNT) ? - len : XGENE_DMA_16K_BUFFER_LEN_CODE; + ((u64)len << XGENE_DMA_DESC_BUFLEN_POS) : + XGENE_DMA_16K_BUFFER_LEN_CODE; } static u8 xgene_dma_encode_xor_flyby(u32 src_cnt) @@ -416,42 +379,50 @@ static u8 xgene_dma_encode_xor_flyby(u32 src_cnt) return flyby_type[src_cnt]; } -static u32 xgene_dma_ring_desc_cnt(struct xgene_dma_ring *ring) -{ - u32 __iomem *cmd_base = ring->cmd_base; - u32 ring_state = ioread32(&cmd_base[1]); - - return XGENE_DMA_RING_DESC_CNT(ring_state); -} - -static void xgene_dma_set_src_buffer(void *ext8, size_t *len, +static void xgene_dma_set_src_buffer(__le64 *ext8, size_t *len, dma_addr_t *paddr) { size_t nbytes = (*len < XGENE_DMA_MAX_BYTE_CNT) ? *len : XGENE_DMA_MAX_BYTE_CNT; - XGENE_DMA_DESC_BUFADDR_SET(ext8, *paddr); - XGENE_DMA_DESC_BUFLEN_SET(ext8, xgene_dma_encode_len(nbytes)); + *ext8 |= cpu_to_le64(*paddr); + *ext8 |= cpu_to_le64(xgene_dma_encode_len(nbytes)); *len -= nbytes; *paddr += nbytes; } -static void xgene_dma_invalidate_buffer(void *ext8) +static void xgene_dma_invalidate_buffer(__le64 *ext8) { - XGENE_DMA_DESC_BUFLEN_SET(ext8, XGENE_DMA_INVALID_LEN_CODE); + *ext8 |= cpu_to_le64(XGENE_DMA_INVALID_LEN_CODE); } -static void *xgene_dma_lookup_ext8(u64 *desc, int idx) +static __le64 *xgene_dma_lookup_ext8(struct xgene_dma_desc_hw *desc, int idx) { - return (idx % 2) ? (desc + idx - 1) : (desc + idx + 1); + switch (idx) { + case 0: + return &desc->m1; + case 1: + return &desc->m0; + case 2: + return &desc->m3; + case 3: + return &desc->m2; + default: + pr_err("Invalid dma descriptor index\n"); + } + + return NULL; } -static void xgene_dma_init_desc(void *desc, u16 dst_ring_num) +static void xgene_dma_init_desc(struct xgene_dma_desc_hw *desc, + u16 dst_ring_num) { - XGENE_DMA_DESC_C_SET(desc); /* Coherent IO */ - XGENE_DMA_DESC_IN_SET(desc); - XGENE_DMA_DESC_H0ENQ_NUM_SET(desc, dst_ring_num); - XGENE_DMA_DESC_RTYPE_SET(desc, XGENE_DMA_RING_OWNER_DMA); + desc->m0 |= cpu_to_le64(XGENE_DMA_DESC_IN_BIT); + desc->m0 |= cpu_to_le64((u64)XGENE_DMA_RING_OWNER_DMA << + XGENE_DMA_DESC_RTYPE_POS); + desc->m1 |= cpu_to_le64(XGENE_DMA_DESC_C_BIT); + desc->m3 |= cpu_to_le64((u64)dst_ring_num << + XGENE_DMA_DESC_HOENQ_NUM_POS); } static void xgene_dma_prep_cpy_desc(struct xgene_dma_chan *chan, @@ -459,7 +430,7 @@ static void xgene_dma_prep_cpy_desc(struct xgene_dma_chan *chan, dma_addr_t dst, dma_addr_t src, size_t len) { - void *desc1, *desc2; + struct xgene_dma_desc_hw *desc1, *desc2; int i; /* Get 1st descriptor */ @@ -467,23 +438,21 @@ static void xgene_dma_prep_cpy_desc(struct xgene_dma_chan *chan, xgene_dma_init_desc(desc1, chan->tx_ring.dst_ring_num); /* Set destination address */ - XGENE_DMA_DESC_DR_SET(desc1); - XGENE_DMA_DESC_DST_ADDR_SET(desc1, dst); + desc1->m2 |= cpu_to_le64(XGENE_DMA_DESC_DR_BIT); + desc1->m3 |= cpu_to_le64(dst); /* Set 1st source address */ - xgene_dma_set_src_buffer(desc1 + 8, &len, &src); + xgene_dma_set_src_buffer(&desc1->m1, &len, &src); - if (len <= 0) { - desc2 = NULL; - goto skip_additional_src; - } + if (!len) + return; /* * We need to split this source buffer, * and need to use 2nd descriptor */ desc2 = &desc_sw->desc2; - XGENE_DMA_DESC_NV_SET(desc1); + desc1->m0 |= cpu_to_le64(XGENE_DMA_DESC_NV_BIT); /* Set 2nd to 5th source address */ for (i = 0; i < 4 && len; i++) @@ -496,12 +465,6 @@ static void xgene_dma_prep_cpy_desc(struct xgene_dma_chan *chan, /* Updated flag that we have prepared 64B descriptor */ desc_sw->flags |= XGENE_DMA_FLAG_64B_DESC; - -skip_additional_src: - /* Hardware stores descriptor in little endian format */ - xgene_dma_cpu_to_le64(desc1, 4); - if (desc2) - xgene_dma_cpu_to_le64(desc2, 4); } static void xgene_dma_prep_xor_desc(struct xgene_dma_chan *chan, @@ -510,7 +473,7 @@ static void xgene_dma_prep_xor_desc(struct xgene_dma_chan *chan, u32 src_cnt, size_t *nbytes, const u8 *scf) { - void *desc1, *desc2; + struct xgene_dma_desc_hw *desc1, *desc2; size_t len = *nbytes; int i; @@ -521,28 +484,24 @@ static void xgene_dma_prep_xor_desc(struct xgene_dma_chan *chan, xgene_dma_init_desc(desc1, chan->tx_ring.dst_ring_num); /* Set destination address */ - XGENE_DMA_DESC_DR_SET(desc1); - XGENE_DMA_DESC_DST_ADDR_SET(desc1, *dst); + desc1->m2 |= cpu_to_le64(XGENE_DMA_DESC_DR_BIT); + desc1->m3 |= cpu_to_le64(*dst); /* We have multiple source addresses, so need to set NV bit*/ - XGENE_DMA_DESC_NV_SET(desc1); + desc1->m0 |= cpu_to_le64(XGENE_DMA_DESC_NV_BIT); /* Set flyby opcode */ - XGENE_DMA_DESC_FLYBY_SET(desc1, xgene_dma_encode_xor_flyby(src_cnt)); + desc1->m2 |= cpu_to_le64(xgene_dma_encode_xor_flyby(src_cnt)); /* Set 1st to 5th source addresses */ for (i = 0; i < src_cnt; i++) { len = *nbytes; - xgene_dma_set_src_buffer((i == 0) ? (desc1 + 8) : + xgene_dma_set_src_buffer((i == 0) ? &desc1->m1 : xgene_dma_lookup_ext8(desc2, i - 1), &len, &src[i]); - XGENE_DMA_DESC_MULTI_SET(desc1, scf[i], i); + desc1->m2 |= cpu_to_le64((scf[i] << ((i + 1) * 8))); } - /* Hardware stores descriptor in little endian format */ - xgene_dma_cpu_to_le64(desc1, 4); - xgene_dma_cpu_to_le64(desc2, 4); - /* Update meta data */ *nbytes = len; *dst += XGENE_DMA_MAX_BYTE_CNT; @@ -589,14 +548,12 @@ static struct xgene_dma_desc_sw *xgene_dma_alloc_descriptor( struct xgene_dma_desc_sw *desc; dma_addr_t phys; - desc = dma_pool_alloc(chan->desc_pool, GFP_NOWAIT, &phys); + desc = dma_pool_zalloc(chan->desc_pool, GFP_NOWAIT, &phys); if (!desc) { chan_err(chan, "Failed to allocate LDs\n"); return NULL; } - memset(desc, 0, sizeof(*desc)); - INIT_LIST_HEAD(&desc->tx_list); desc->tx.phys = phys; desc->tx.tx_submit = xgene_dma_tx_submit; @@ -692,15 +649,12 @@ static void xgene_dma_clean_running_descriptor(struct xgene_dma_chan *chan, dma_pool_free(chan->desc_pool, desc, desc->tx.phys); } -static int xgene_chan_xfer_request(struct xgene_dma_ring *ring, - struct xgene_dma_desc_sw *desc_sw) +static void xgene_chan_xfer_request(struct xgene_dma_chan *chan, + struct xgene_dma_desc_sw *desc_sw) { + struct xgene_dma_ring *ring = &chan->tx_ring; struct xgene_dma_desc_hw *desc_hw; - /* Check if can push more descriptor to hw for execution */ - if (xgene_dma_ring_desc_cnt(ring) > (ring->slots - 2)) - return -EBUSY; - /* Get hw descriptor from DMA tx ring */ desc_hw = &ring->desc_hw[ring->head]; @@ -727,23 +681,24 @@ static int xgene_chan_xfer_request(struct xgene_dma_ring *ring, memcpy(desc_hw, &desc_sw->desc2, sizeof(*desc_hw)); } + /* Increment the pending transaction count */ + chan->pending += ((desc_sw->flags & + XGENE_DMA_FLAG_64B_DESC) ? 2 : 1); + /* Notify the hw that we have descriptor ready for execution */ iowrite32((desc_sw->flags & XGENE_DMA_FLAG_64B_DESC) ? 2 : 1, ring->cmd); - - return 0; } /** * xgene_chan_xfer_ld_pending - push any pending transactions to hw * @chan : X-Gene DMA channel * - * LOCKING: must hold chan->desc_lock + * LOCKING: must hold chan->lock */ static void xgene_chan_xfer_ld_pending(struct xgene_dma_chan *chan) { struct xgene_dma_desc_sw *desc_sw, *_desc_sw; - int ret; /* * If the list of pending descriptors is empty, then we @@ -768,18 +723,13 @@ static void xgene_chan_xfer_ld_pending(struct xgene_dma_chan *chan) if (chan->pending >= chan->max_outstanding) return; - ret = xgene_chan_xfer_request(&chan->tx_ring, desc_sw); - if (ret) - return; + xgene_chan_xfer_request(chan, desc_sw); /* * Delete this element from ld pending queue and append it to * ld running queue */ list_move_tail(&desc_sw->node, &chan->ld_running); - - /* Increment the pending transaction count */ - chan->pending++; } } @@ -797,18 +747,24 @@ static void xgene_dma_cleanup_descriptors(struct xgene_dma_chan *chan) struct xgene_dma_ring *ring = &chan->rx_ring; struct xgene_dma_desc_sw *desc_sw, *_desc_sw; struct xgene_dma_desc_hw *desc_hw; + struct list_head ld_completed; u8 status; + INIT_LIST_HEAD(&ld_completed); + + spin_lock_bh(&chan->lock); + /* Clean already completed and acked descriptors */ xgene_dma_clean_completed_descriptor(chan); - /* Run the callback for each descriptor, in order */ + /* Move all completed descriptors to ld completed queue, in order */ list_for_each_entry_safe(desc_sw, _desc_sw, &chan->ld_running, node) { /* Get subsequent hw descriptor from DMA rx ring */ desc_hw = &ring->desc_hw[ring->head]; /* Check if this descriptor has been completed */ - if (unlikely(XGENE_DMA_DESC_IS_EMPTY(desc_hw))) + if (unlikely(le64_to_cpu(desc_hw->m0) == + XGENE_DMA_DESC_EMPTY_SIGNATURE)) break; if (++ring->head == ring->slots) @@ -842,17 +798,20 @@ static void xgene_dma_cleanup_descriptors(struct xgene_dma_chan *chan) iowrite32(-1, ring->cmd); /* Mark this hw descriptor as processed */ - XGENE_DMA_DESC_SET_EMPTY(desc_hw); - - xgene_dma_run_tx_complete_actions(chan, desc_sw); - - xgene_dma_clean_running_descriptor(chan, desc_sw); + desc_hw->m0 = cpu_to_le64(XGENE_DMA_DESC_EMPTY_SIGNATURE); /* * Decrement the pending transaction count * as we have processed one */ - chan->pending--; + chan->pending -= ((desc_sw->flags & + XGENE_DMA_FLAG_64B_DESC) ? 2 : 1); + + /* + * Delete this node from ld running queue and append it to + * ld completed queue for further processing + */ + list_move_tail(&desc_sw->node, &ld_completed); } /* @@ -861,6 +820,14 @@ static void xgene_dma_cleanup_descriptors(struct xgene_dma_chan *chan) * ahead and free the descriptors below. */ xgene_chan_xfer_ld_pending(chan); + + spin_unlock_bh(&chan->lock); + + /* Run the callback for each descriptor, in order */ + list_for_each_entry_safe(desc_sw, _desc_sw, &ld_completed, node) { + xgene_dma_run_tx_complete_actions(chan, desc_sw); + xgene_dma_clean_running_descriptor(chan, desc_sw); + } } static int xgene_dma_alloc_chan_resources(struct dma_chan *dchan) @@ -889,7 +856,7 @@ static int xgene_dma_alloc_chan_resources(struct dma_chan *dchan) * @chan: X-Gene DMA channel * @list: the list to free * - * LOCKING: must hold chan->desc_lock + * LOCKING: must hold chan->lock */ static void xgene_dma_free_desc_list(struct xgene_dma_chan *chan, struct list_head *list) @@ -900,15 +867,6 @@ static void xgene_dma_free_desc_list(struct xgene_dma_chan *chan, xgene_dma_clean_descriptor(chan, desc); } -static void xgene_dma_free_tx_desc_list(struct xgene_dma_chan *chan, - struct list_head *list) -{ - struct xgene_dma_desc_sw *desc, *_desc; - - list_for_each_entry_safe(desc, _desc, list, node) - xgene_dma_clean_descriptor(chan, desc); -} - static void xgene_dma_free_chan_resources(struct dma_chan *dchan) { struct xgene_dma_chan *chan = to_dma_chan(dchan); @@ -918,11 +876,11 @@ static void xgene_dma_free_chan_resources(struct dma_chan *dchan) if (!chan->desc_pool) return; - spin_lock_bh(&chan->lock); - /* Process all running descriptor */ xgene_dma_cleanup_descriptors(chan); + spin_lock_bh(&chan->lock); + /* Clean all link descriptor queues */ xgene_dma_free_desc_list(chan, &chan->ld_pending); xgene_dma_free_desc_list(chan, &chan->ld_running); @@ -935,60 +893,6 @@ static void xgene_dma_free_chan_resources(struct dma_chan *dchan) chan->desc_pool = NULL; } -static struct dma_async_tx_descriptor *xgene_dma_prep_memcpy( - struct dma_chan *dchan, dma_addr_t dst, dma_addr_t src, - size_t len, unsigned long flags) -{ - struct xgene_dma_desc_sw *first = NULL, *new; - struct xgene_dma_chan *chan; - size_t copy; - - if (unlikely(!dchan || !len)) - return NULL; - - chan = to_dma_chan(dchan); - - do { - /* Allocate the link descriptor from DMA pool */ - new = xgene_dma_alloc_descriptor(chan); - if (!new) - goto fail; - - /* Create the largest transaction possible */ - copy = min_t(size_t, len, XGENE_DMA_MAX_64B_DESC_BYTE_CNT); - - /* Prepare DMA descriptor */ - xgene_dma_prep_cpy_desc(chan, new, dst, src, copy); - - if (!first) - first = new; - - new->tx.cookie = 0; - async_tx_ack(&new->tx); - - /* Update metadata */ - len -= copy; - dst += copy; - src += copy; - - /* Insert the link descriptor to the LD ring */ - list_add_tail(&new->node, &first->tx_list); - } while (len); - - new->tx.flags = flags; /* client is in control of this ack */ - new->tx.cookie = -EBUSY; - list_splice(&first->tx_list, &new->tx_list); - - return &new->tx; - -fail: - if (!first) - return NULL; - - xgene_dma_free_tx_desc_list(chan, &first->tx_list); - return NULL; -} - static struct dma_async_tx_descriptor *xgene_dma_prep_sg( struct dma_chan *dchan, struct scatterlist *dst_sg, u32 dst_nents, struct scatterlist *src_sg, @@ -1093,7 +997,7 @@ fail: if (!first) return NULL; - xgene_dma_free_tx_desc_list(chan, &first->tx_list); + xgene_dma_free_desc_list(chan, &first->tx_list); return NULL; } @@ -1141,7 +1045,7 @@ fail: if (!first) return NULL; - xgene_dma_free_tx_desc_list(chan, &first->tx_list); + xgene_dma_free_desc_list(chan, &first->tx_list); return NULL; } @@ -1218,7 +1122,7 @@ fail: if (!first) return NULL; - xgene_dma_free_tx_desc_list(chan, &first->tx_list); + xgene_dma_free_desc_list(chan, &first->tx_list); return NULL; } @@ -1242,15 +1146,11 @@ static void xgene_dma_tasklet_cb(unsigned long data) { struct xgene_dma_chan *chan = (struct xgene_dma_chan *)data; - spin_lock_bh(&chan->lock); - /* Run all cleanup for descriptors which have been completed */ xgene_dma_cleanup_descriptors(chan); /* Re-enable DMA channel IRQ */ enable_irq(chan->rx_irq); - - spin_unlock_bh(&chan->lock); } static irqreturn_t xgene_dma_chan_ring_isr(int irq, void *id) @@ -1316,7 +1216,6 @@ static void xgene_dma_setup_ring(struct xgene_dma_ring *ring) { void *ring_cfg = ring->state; u64 addr = ring->desc_paddr; - void *desc; u32 i, val; ring->slots = ring->size / XGENE_DMA_RING_WQ_DESC_SIZE; @@ -1358,8 +1257,10 @@ static void xgene_dma_setup_ring(struct xgene_dma_ring *ring) /* Set empty signature to DMA Rx ring descriptors */ for (i = 0; i < ring->slots; i++) { + struct xgene_dma_desc_hw *desc; + desc = &ring->desc_hw[i]; - XGENE_DMA_DESC_SET_EMPTY(desc); + desc->m0 = cpu_to_le64(XGENE_DMA_DESC_EMPTY_SIGNATURE); } /* Enable DMA Rx ring interrupt */ @@ -1450,15 +1351,18 @@ static int xgene_dma_create_ring_one(struct xgene_dma_chan *chan, struct xgene_dma_ring *ring, enum xgene_dma_ring_cfgsize cfgsize) { + int ret; + /* Setup DMA ring descriptor variables */ ring->pdma = chan->pdma; ring->cfgsize = cfgsize; ring->num = chan->pdma->ring_num++; ring->id = XGENE_DMA_RING_ID_GET(ring->owner, ring->buf_num); - ring->size = xgene_dma_get_ring_size(chan, cfgsize); - if (ring->size <= 0) - return ring->size; + ret = xgene_dma_get_ring_size(chan, cfgsize); + if (ret <= 0) + return ret; + ring->size = ret; /* Allocate memory for DMA ring descriptor */ ring->desc_vaddr = dma_zalloc_coherent(chan->dev, ring->size, @@ -1511,7 +1415,7 @@ static int xgene_dma_create_chan_rings(struct xgene_dma_chan *chan) tx_ring->id, tx_ring->num, tx_ring->desc_vaddr); /* Set the max outstanding request possible to this channel */ - chan->max_outstanding = rx_ring->slots; + chan->max_outstanding = tx_ring->slots; return ret; } @@ -1707,6 +1611,7 @@ static int xgene_dma_request_irqs(struct xgene_dma *pdma) /* Register DMA channel rx irq */ for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) { chan = &pdma->chan[i]; + irq_set_status_flags(chan->rx_irq, IRQ_DISABLE_UNLAZY); ret = devm_request_irq(chan->dev, chan->rx_irq, xgene_dma_chan_ring_isr, 0, chan->name, chan); @@ -1717,6 +1622,7 @@ static int xgene_dma_request_irqs(struct xgene_dma *pdma) for (j = 0; j < i; j++) { chan = &pdma->chan[i]; + irq_clear_status_flags(chan->rx_irq, IRQ_DISABLE_UNLAZY); devm_free_irq(chan->dev, chan->rx_irq, chan); } @@ -1737,6 +1643,7 @@ static void xgene_dma_free_irqs(struct xgene_dma *pdma) for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) { chan = &pdma->chan[i]; + irq_clear_status_flags(chan->rx_irq, IRQ_DISABLE_UNLAZY); devm_free_irq(chan->dev, chan->rx_irq, chan); } } @@ -1748,7 +1655,6 @@ static void xgene_dma_set_caps(struct xgene_dma_chan *chan, dma_cap_zero(dma_dev->cap_mask); /* Set DMA device capability */ - dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask); dma_cap_set(DMA_SG, dma_dev->cap_mask); /* Basically here, the X-Gene SoC DMA engine channel 0 supports XOR @@ -1775,19 +1681,18 @@ static void xgene_dma_set_caps(struct xgene_dma_chan *chan, dma_dev->device_free_chan_resources = xgene_dma_free_chan_resources; dma_dev->device_issue_pending = xgene_dma_issue_pending; dma_dev->device_tx_status = xgene_dma_tx_status; - dma_dev->device_prep_dma_memcpy = xgene_dma_prep_memcpy; dma_dev->device_prep_dma_sg = xgene_dma_prep_sg; if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { dma_dev->device_prep_dma_xor = xgene_dma_prep_xor; dma_dev->max_xor = XGENE_DMA_MAX_XOR_SRC; - dma_dev->xor_align = XGENE_DMA_XOR_ALIGNMENT; + dma_dev->xor_align = DMAENGINE_ALIGN_64_BYTES; } if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) { dma_dev->device_prep_dma_pq = xgene_dma_prep_pq; dma_dev->max_pq = XGENE_DMA_MAX_XOR_SRC; - dma_dev->pq_align = XGENE_DMA_XOR_ALIGNMENT; + dma_dev->pq_align = DMAENGINE_ALIGN_64_BYTES; } } @@ -1828,8 +1733,7 @@ static int xgene_dma_async_register(struct xgene_dma *pdma, int id) /* DMA capability info */ dev_info(pdma->dev, - "%s: CAPABILITY ( %s%s%s%s)\n", dma_chan_name(&chan->dma_chan), - dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "MEMCPY " : "", + "%s: CAPABILITY ( %s%s%s)\n", dma_chan_name(&chan->dma_chan), dma_has_cap(DMA_SG, dma_dev->cap_mask) ? "SGCPY " : "", dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "XOR " : "", dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "PQ " : ""); @@ -1928,6 +1832,8 @@ static int xgene_dma_get_resources(struct platform_device *pdev, return -ENOMEM; } + pdma->csr_ring_cmd += XGENE_DMA_RING_CMD_SM_OFFSET; + /* Get efuse csr region */ res = platform_get_resource(pdev, IORESOURCE_MEM, 3); if (!res) { @@ -1982,16 +1888,18 @@ static int xgene_dma_probe(struct platform_device *pdev) return ret; pdma->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(pdma->clk)) { + if (IS_ERR(pdma->clk) && !ACPI_COMPANION(&pdev->dev)) { dev_err(&pdev->dev, "Failed to get clk\n"); return PTR_ERR(pdma->clk); } /* Enable clk before accessing registers */ - ret = clk_prepare_enable(pdma->clk); - if (ret) { - dev_err(&pdev->dev, "Failed to enable clk %d\n", ret); - return ret; + if (!IS_ERR(pdma->clk)) { + ret = clk_prepare_enable(pdma->clk); + if (ret) { + dev_err(&pdev->dev, "Failed to enable clk %d\n", ret); + return ret; + } } /* Remove DMA RAM out of shutdown */ @@ -2036,7 +1944,8 @@ err_request_irq: err_dma_mask: err_clk_enable: - clk_disable_unprepare(pdma->clk); + if (!IS_ERR(pdma->clk)) + clk_disable_unprepare(pdma->clk); return ret; } @@ -2060,11 +1969,20 @@ static int xgene_dma_remove(struct platform_device *pdev) xgene_dma_delete_chan_rings(chan); } - clk_disable_unprepare(pdma->clk); + if (!IS_ERR(pdma->clk)) + clk_disable_unprepare(pdma->clk); return 0; } +#ifdef CONFIG_ACPI +static const struct acpi_device_id xgene_dma_acpi_match_ptr[] = { + {"APMC0D43", 0}, + {}, +}; +MODULE_DEVICE_TABLE(acpi, xgene_dma_acpi_match_ptr); +#endif + static const struct of_device_id xgene_dma_of_match_ptr[] = { {.compatible = "apm,xgene-storm-dma",}, {}, @@ -2077,6 +1995,7 @@ static struct platform_driver xgene_dma_driver = { .driver = { .name = "X-Gene-DMA", .of_match_table = xgene_dma_of_match_ptr, + .acpi_match_table = ACPI_PTR(xgene_dma_acpi_match_ptr), }, }; diff --git a/kernel/drivers/dma/xilinx/xilinx_vdma.c b/kernel/drivers/dma/xilinx/xilinx_vdma.c index d8434d465..6f4b5017c 100644 --- a/kernel/drivers/dma/xilinx/xilinx_vdma.c +++ b/kernel/drivers/dma/xilinx/xilinx_vdma.c @@ -1349,6 +1349,7 @@ static const struct of_device_id xilinx_vdma_of_ids[] = { { .compatible = "xlnx,axi-vdma-1.00.a",}, {} }; +MODULE_DEVICE_TABLE(of, xilinx_vdma_of_ids); static struct platform_driver xilinx_vdma_driver = { .driver = { diff --git a/kernel/drivers/dma/zx296702_dma.c b/kernel/drivers/dma/zx296702_dma.c new file mode 100644 index 000000000..245d759d5 --- /dev/null +++ b/kernel/drivers/dma/zx296702_dma.c @@ -0,0 +1,951 @@ +/* + * Copyright 2015 Linaro. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/sched.h> +#include <linux/device.h> +#include <linux/dmaengine.h> +#include <linux/dma-mapping.h> +#include <linux/dmapool.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/of_device.h> +#include <linux/of.h> +#include <linux/clk.h> +#include <linux/of_dma.h> + +#include "virt-dma.h" + +#define DRIVER_NAME "zx-dma" +#define DMA_ALIGN 4 +#define DMA_MAX_SIZE (0x10000 - PAGE_SIZE) +#define LLI_BLOCK_SIZE (4 * PAGE_SIZE) + +#define REG_ZX_SRC_ADDR 0x00 +#define REG_ZX_DST_ADDR 0x04 +#define REG_ZX_TX_X_COUNT 0x08 +#define REG_ZX_TX_ZY_COUNT 0x0c +#define REG_ZX_SRC_ZY_STEP 0x10 +#define REG_ZX_DST_ZY_STEP 0x14 +#define REG_ZX_LLI_ADDR 0x1c +#define REG_ZX_CTRL 0x20 +#define REG_ZX_TC_IRQ 0x800 +#define REG_ZX_SRC_ERR_IRQ 0x804 +#define REG_ZX_DST_ERR_IRQ 0x808 +#define REG_ZX_CFG_ERR_IRQ 0x80c +#define REG_ZX_TC_IRQ_RAW 0x810 +#define REG_ZX_SRC_ERR_IRQ_RAW 0x814 +#define REG_ZX_DST_ERR_IRQ_RAW 0x818 +#define REG_ZX_CFG_ERR_IRQ_RAW 0x81c +#define REG_ZX_STATUS 0x820 +#define REG_ZX_DMA_GRP_PRIO 0x824 +#define REG_ZX_DMA_ARB 0x828 + +#define ZX_FORCE_CLOSE BIT(31) +#define ZX_DST_BURST_WIDTH(x) (((x) & 0x7) << 13) +#define ZX_MAX_BURST_LEN 16 +#define ZX_SRC_BURST_LEN(x) (((x) & 0xf) << 9) +#define ZX_SRC_BURST_WIDTH(x) (((x) & 0x7) << 6) +#define ZX_IRQ_ENABLE_ALL (3 << 4) +#define ZX_DST_FIFO_MODE BIT(3) +#define ZX_SRC_FIFO_MODE BIT(2) +#define ZX_SOFT_REQ BIT(1) +#define ZX_CH_ENABLE BIT(0) + +#define ZX_DMA_BUSWIDTHS \ + (BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \ + BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_8_BYTES)) + +enum zx_dma_burst_width { + ZX_DMA_WIDTH_8BIT = 0, + ZX_DMA_WIDTH_16BIT = 1, + ZX_DMA_WIDTH_32BIT = 2, + ZX_DMA_WIDTH_64BIT = 3, +}; + +struct zx_desc_hw { + u32 saddr; + u32 daddr; + u32 src_x; + u32 src_zy; + u32 src_zy_step; + u32 dst_zy_step; + u32 reserved1; + u32 lli; + u32 ctr; + u32 reserved[7]; /* pack as hardware registers region size */ +} __aligned(32); + +struct zx_dma_desc_sw { + struct virt_dma_desc vd; + dma_addr_t desc_hw_lli; + size_t desc_num; + size_t size; + struct zx_desc_hw *desc_hw; +}; + +struct zx_dma_phy; + +struct zx_dma_chan { + struct dma_slave_config slave_cfg; + int id; /* Request phy chan id */ + u32 ccfg; + u32 cyclic; + struct virt_dma_chan vc; + struct zx_dma_phy *phy; + struct list_head node; + dma_addr_t dev_addr; + enum dma_status status; +}; + +struct zx_dma_phy { + u32 idx; + void __iomem *base; + struct zx_dma_chan *vchan; + struct zx_dma_desc_sw *ds_run; + struct zx_dma_desc_sw *ds_done; +}; + +struct zx_dma_dev { + struct dma_device slave; + void __iomem *base; + spinlock_t lock; /* lock for ch and phy */ + struct list_head chan_pending; + struct zx_dma_phy *phy; + struct zx_dma_chan *chans; + struct clk *clk; + struct dma_pool *pool; + u32 dma_channels; + u32 dma_requests; + int irq; +}; + +#define to_zx_dma(dmadev) container_of(dmadev, struct zx_dma_dev, slave) + +static struct zx_dma_chan *to_zx_chan(struct dma_chan *chan) +{ + return container_of(chan, struct zx_dma_chan, vc.chan); +} + +static void zx_dma_terminate_chan(struct zx_dma_phy *phy, struct zx_dma_dev *d) +{ + u32 val = 0; + + val = readl_relaxed(phy->base + REG_ZX_CTRL); + val &= ~ZX_CH_ENABLE; + val |= ZX_FORCE_CLOSE; + writel_relaxed(val, phy->base + REG_ZX_CTRL); + + val = 0x1 << phy->idx; + writel_relaxed(val, d->base + REG_ZX_TC_IRQ_RAW); + writel_relaxed(val, d->base + REG_ZX_SRC_ERR_IRQ_RAW); + writel_relaxed(val, d->base + REG_ZX_DST_ERR_IRQ_RAW); + writel_relaxed(val, d->base + REG_ZX_CFG_ERR_IRQ_RAW); +} + +static void zx_dma_set_desc(struct zx_dma_phy *phy, struct zx_desc_hw *hw) +{ + writel_relaxed(hw->saddr, phy->base + REG_ZX_SRC_ADDR); + writel_relaxed(hw->daddr, phy->base + REG_ZX_DST_ADDR); + writel_relaxed(hw->src_x, phy->base + REG_ZX_TX_X_COUNT); + writel_relaxed(0, phy->base + REG_ZX_TX_ZY_COUNT); + writel_relaxed(0, phy->base + REG_ZX_SRC_ZY_STEP); + writel_relaxed(0, phy->base + REG_ZX_DST_ZY_STEP); + writel_relaxed(hw->lli, phy->base + REG_ZX_LLI_ADDR); + writel_relaxed(hw->ctr, phy->base + REG_ZX_CTRL); +} + +static u32 zx_dma_get_curr_lli(struct zx_dma_phy *phy) +{ + return readl_relaxed(phy->base + REG_ZX_LLI_ADDR); +} + +static u32 zx_dma_get_chan_stat(struct zx_dma_dev *d) +{ + return readl_relaxed(d->base + REG_ZX_STATUS); +} + +static void zx_dma_init_state(struct zx_dma_dev *d) +{ + /* set same priority */ + writel_relaxed(0x0, d->base + REG_ZX_DMA_ARB); + /* clear all irq */ + writel_relaxed(0xffffffff, d->base + REG_ZX_TC_IRQ_RAW); + writel_relaxed(0xffffffff, d->base + REG_ZX_SRC_ERR_IRQ_RAW); + writel_relaxed(0xffffffff, d->base + REG_ZX_DST_ERR_IRQ_RAW); + writel_relaxed(0xffffffff, d->base + REG_ZX_CFG_ERR_IRQ_RAW); +} + +static int zx_dma_start_txd(struct zx_dma_chan *c) +{ + struct zx_dma_dev *d = to_zx_dma(c->vc.chan.device); + struct virt_dma_desc *vd = vchan_next_desc(&c->vc); + + if (!c->phy) + return -EAGAIN; + + if (BIT(c->phy->idx) & zx_dma_get_chan_stat(d)) + return -EAGAIN; + + if (vd) { + struct zx_dma_desc_sw *ds = + container_of(vd, struct zx_dma_desc_sw, vd); + /* + * fetch and remove request from vc->desc_issued + * so vc->desc_issued only contains desc pending + */ + list_del(&ds->vd.node); + c->phy->ds_run = ds; + c->phy->ds_done = NULL; + /* start dma */ + zx_dma_set_desc(c->phy, ds->desc_hw); + return 0; + } + c->phy->ds_done = NULL; + c->phy->ds_run = NULL; + return -EAGAIN; +} + +static void zx_dma_task(struct zx_dma_dev *d) +{ + struct zx_dma_phy *p; + struct zx_dma_chan *c, *cn; + unsigned pch, pch_alloc = 0; + unsigned long flags; + + /* check new dma request of running channel in vc->desc_issued */ + list_for_each_entry_safe(c, cn, &d->slave.channels, + vc.chan.device_node) { + spin_lock_irqsave(&c->vc.lock, flags); + p = c->phy; + if (p && p->ds_done && zx_dma_start_txd(c)) { + /* No current txd associated with this channel */ + dev_dbg(d->slave.dev, "pchan %u: free\n", p->idx); + /* Mark this channel free */ + c->phy = NULL; + p->vchan = NULL; + } + spin_unlock_irqrestore(&c->vc.lock, flags); + } + + /* check new channel request in d->chan_pending */ + spin_lock_irqsave(&d->lock, flags); + while (!list_empty(&d->chan_pending)) { + c = list_first_entry(&d->chan_pending, + struct zx_dma_chan, node); + p = &d->phy[c->id]; + if (!p->vchan) { + /* remove from d->chan_pending */ + list_del_init(&c->node); + pch_alloc |= 1 << c->id; + /* Mark this channel allocated */ + p->vchan = c; + c->phy = p; + } else { + dev_dbg(d->slave.dev, "pchan %u: busy!\n", c->id); + } + } + spin_unlock_irqrestore(&d->lock, flags); + + for (pch = 0; pch < d->dma_channels; pch++) { + if (pch_alloc & (1 << pch)) { + p = &d->phy[pch]; + c = p->vchan; + if (c) { + spin_lock_irqsave(&c->vc.lock, flags); + zx_dma_start_txd(c); + spin_unlock_irqrestore(&c->vc.lock, flags); + } + } + } +} + +static irqreturn_t zx_dma_int_handler(int irq, void *dev_id) +{ + struct zx_dma_dev *d = (struct zx_dma_dev *)dev_id; + struct zx_dma_phy *p; + struct zx_dma_chan *c; + u32 tc = readl_relaxed(d->base + REG_ZX_TC_IRQ); + u32 serr = readl_relaxed(d->base + REG_ZX_SRC_ERR_IRQ); + u32 derr = readl_relaxed(d->base + REG_ZX_DST_ERR_IRQ); + u32 cfg = readl_relaxed(d->base + REG_ZX_CFG_ERR_IRQ); + u32 i, irq_chan = 0, task = 0; + + while (tc) { + i = __ffs(tc); + tc &= ~BIT(i); + p = &d->phy[i]; + c = p->vchan; + if (c) { + unsigned long flags; + + spin_lock_irqsave(&c->vc.lock, flags); + if (c->cyclic) { + vchan_cyclic_callback(&p->ds_run->vd); + } else { + vchan_cookie_complete(&p->ds_run->vd); + p->ds_done = p->ds_run; + task = 1; + } + spin_unlock_irqrestore(&c->vc.lock, flags); + irq_chan |= BIT(i); + } + } + + if (serr || derr || cfg) + dev_warn(d->slave.dev, "DMA ERR src 0x%x, dst 0x%x, cfg 0x%x\n", + serr, derr, cfg); + + writel_relaxed(irq_chan, d->base + REG_ZX_TC_IRQ_RAW); + writel_relaxed(serr, d->base + REG_ZX_SRC_ERR_IRQ_RAW); + writel_relaxed(derr, d->base + REG_ZX_DST_ERR_IRQ_RAW); + writel_relaxed(cfg, d->base + REG_ZX_CFG_ERR_IRQ_RAW); + + if (task) + zx_dma_task(d); + return IRQ_HANDLED; +} + +static void zx_dma_free_chan_resources(struct dma_chan *chan) +{ + struct zx_dma_chan *c = to_zx_chan(chan); + struct zx_dma_dev *d = to_zx_dma(chan->device); + unsigned long flags; + + spin_lock_irqsave(&d->lock, flags); + list_del_init(&c->node); + spin_unlock_irqrestore(&d->lock, flags); + + vchan_free_chan_resources(&c->vc); + c->ccfg = 0; +} + +static enum dma_status zx_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *state) +{ + struct zx_dma_chan *c = to_zx_chan(chan); + struct zx_dma_phy *p; + struct virt_dma_desc *vd; + unsigned long flags; + enum dma_status ret; + size_t bytes = 0; + + ret = dma_cookie_status(&c->vc.chan, cookie, state); + if (ret == DMA_COMPLETE || !state) + return ret; + + spin_lock_irqsave(&c->vc.lock, flags); + p = c->phy; + ret = c->status; + + /* + * If the cookie is on our issue queue, then the residue is + * its total size. + */ + vd = vchan_find_desc(&c->vc, cookie); + if (vd) { + bytes = container_of(vd, struct zx_dma_desc_sw, vd)->size; + } else if ((!p) || (!p->ds_run)) { + bytes = 0; + } else { + struct zx_dma_desc_sw *ds = p->ds_run; + u32 clli = 0, index = 0; + + bytes = 0; + clli = zx_dma_get_curr_lli(p); + index = (clli - ds->desc_hw_lli) / sizeof(struct zx_desc_hw); + for (; index < ds->desc_num; index++) { + bytes += ds->desc_hw[index].src_x; + /* end of lli */ + if (!ds->desc_hw[index].lli) + break; + } + } + spin_unlock_irqrestore(&c->vc.lock, flags); + dma_set_residue(state, bytes); + return ret; +} + +static void zx_dma_issue_pending(struct dma_chan *chan) +{ + struct zx_dma_chan *c = to_zx_chan(chan); + struct zx_dma_dev *d = to_zx_dma(chan->device); + unsigned long flags; + int issue = 0; + + spin_lock_irqsave(&c->vc.lock, flags); + /* add request to vc->desc_issued */ + if (vchan_issue_pending(&c->vc)) { + spin_lock(&d->lock); + if (!c->phy && list_empty(&c->node)) { + /* if new channel, add chan_pending */ + list_add_tail(&c->node, &d->chan_pending); + issue = 1; + dev_dbg(d->slave.dev, "vchan %p: issued\n", &c->vc); + } + spin_unlock(&d->lock); + } else { + dev_dbg(d->slave.dev, "vchan %p: nothing to issue\n", &c->vc); + } + spin_unlock_irqrestore(&c->vc.lock, flags); + + if (issue) + zx_dma_task(d); +} + +static void zx_dma_fill_desc(struct zx_dma_desc_sw *ds, dma_addr_t dst, + dma_addr_t src, size_t len, u32 num, u32 ccfg) +{ + if ((num + 1) < ds->desc_num) + ds->desc_hw[num].lli = ds->desc_hw_lli + (num + 1) * + sizeof(struct zx_desc_hw); + ds->desc_hw[num].saddr = src; + ds->desc_hw[num].daddr = dst; + ds->desc_hw[num].src_x = len; + ds->desc_hw[num].ctr = ccfg; +} + +static struct zx_dma_desc_sw *zx_alloc_desc_resource(int num, + struct dma_chan *chan) +{ + struct zx_dma_chan *c = to_zx_chan(chan); + struct zx_dma_desc_sw *ds; + struct zx_dma_dev *d = to_zx_dma(chan->device); + int lli_limit = LLI_BLOCK_SIZE / sizeof(struct zx_desc_hw); + + if (num > lli_limit) { + dev_dbg(chan->device->dev, "vch %p: sg num %d exceed max %d\n", + &c->vc, num, lli_limit); + return NULL; + } + + ds = kzalloc(sizeof(*ds), GFP_ATOMIC); + if (!ds) + return NULL; + + ds->desc_hw = dma_pool_alloc(d->pool, GFP_NOWAIT, &ds->desc_hw_lli); + if (!ds->desc_hw) { + dev_dbg(chan->device->dev, "vch %p: dma alloc fail\n", &c->vc); + kfree(ds); + return NULL; + } + memset(ds->desc_hw, 0, sizeof(struct zx_desc_hw) * num); + ds->desc_num = num; + return ds; +} + +static enum zx_dma_burst_width zx_dma_burst_width(enum dma_slave_buswidth width) +{ + switch (width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + case DMA_SLAVE_BUSWIDTH_2_BYTES: + case DMA_SLAVE_BUSWIDTH_4_BYTES: + case DMA_SLAVE_BUSWIDTH_8_BYTES: + return ffs(width) - 1; + default: + return ZX_DMA_WIDTH_32BIT; + } +} + +static int zx_pre_config(struct zx_dma_chan *c, enum dma_transfer_direction dir) +{ + struct dma_slave_config *cfg = &c->slave_cfg; + enum zx_dma_burst_width src_width; + enum zx_dma_burst_width dst_width; + u32 maxburst = 0; + + switch (dir) { + case DMA_MEM_TO_MEM: + c->ccfg = ZX_CH_ENABLE | ZX_SOFT_REQ + | ZX_SRC_BURST_LEN(ZX_MAX_BURST_LEN - 1) + | ZX_SRC_BURST_WIDTH(ZX_DMA_WIDTH_32BIT) + | ZX_DST_BURST_WIDTH(ZX_DMA_WIDTH_32BIT); + break; + case DMA_MEM_TO_DEV: + c->dev_addr = cfg->dst_addr; + /* dst len is calculated from src width, len and dst width. + * We need make sure dst len not exceed MAX LEN. + * Trailing single transaction that does not fill a full + * burst also require identical src/dst data width. + */ + dst_width = zx_dma_burst_width(cfg->dst_addr_width); + maxburst = cfg->dst_maxburst; + maxburst = maxburst < ZX_MAX_BURST_LEN ? + maxburst : ZX_MAX_BURST_LEN; + c->ccfg = ZX_DST_FIFO_MODE | ZX_CH_ENABLE + | ZX_SRC_BURST_LEN(maxburst - 1) + | ZX_SRC_BURST_WIDTH(dst_width) + | ZX_DST_BURST_WIDTH(dst_width); + break; + case DMA_DEV_TO_MEM: + c->dev_addr = cfg->src_addr; + src_width = zx_dma_burst_width(cfg->src_addr_width); + maxburst = cfg->src_maxburst; + maxburst = maxburst < ZX_MAX_BURST_LEN ? + maxburst : ZX_MAX_BURST_LEN; + c->ccfg = ZX_SRC_FIFO_MODE | ZX_CH_ENABLE + | ZX_SRC_BURST_LEN(maxburst - 1) + | ZX_SRC_BURST_WIDTH(src_width) + | ZX_DST_BURST_WIDTH(src_width); + break; + default: + return -EINVAL; + } + return 0; +} + +static struct dma_async_tx_descriptor *zx_dma_prep_memcpy( + struct dma_chan *chan, dma_addr_t dst, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct zx_dma_chan *c = to_zx_chan(chan); + struct zx_dma_desc_sw *ds; + size_t copy = 0; + int num = 0; + + if (!len) + return NULL; + + if (zx_pre_config(c, DMA_MEM_TO_MEM)) + return NULL; + + num = DIV_ROUND_UP(len, DMA_MAX_SIZE); + + ds = zx_alloc_desc_resource(num, chan); + if (!ds) + return NULL; + + ds->size = len; + num = 0; + + do { + copy = min_t(size_t, len, DMA_MAX_SIZE); + zx_dma_fill_desc(ds, dst, src, copy, num++, c->ccfg); + + src += copy; + dst += copy; + len -= copy; + } while (len); + + c->cyclic = 0; + ds->desc_hw[num - 1].lli = 0; /* end of link */ + ds->desc_hw[num - 1].ctr |= ZX_IRQ_ENABLE_ALL; + return vchan_tx_prep(&c->vc, &ds->vd, flags); +} + +static struct dma_async_tx_descriptor *zx_dma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, unsigned int sglen, + enum dma_transfer_direction dir, unsigned long flags, void *context) +{ + struct zx_dma_chan *c = to_zx_chan(chan); + struct zx_dma_desc_sw *ds; + size_t len, avail, total = 0; + struct scatterlist *sg; + dma_addr_t addr, src = 0, dst = 0; + int num = sglen, i; + + if (!sgl) + return NULL; + + if (zx_pre_config(c, dir)) + return NULL; + + for_each_sg(sgl, sg, sglen, i) { + avail = sg_dma_len(sg); + if (avail > DMA_MAX_SIZE) + num += DIV_ROUND_UP(avail, DMA_MAX_SIZE) - 1; + } + + ds = zx_alloc_desc_resource(num, chan); + if (!ds) + return NULL; + + c->cyclic = 0; + num = 0; + for_each_sg(sgl, sg, sglen, i) { + addr = sg_dma_address(sg); + avail = sg_dma_len(sg); + total += avail; + + do { + len = min_t(size_t, avail, DMA_MAX_SIZE); + + if (dir == DMA_MEM_TO_DEV) { + src = addr; + dst = c->dev_addr; + } else if (dir == DMA_DEV_TO_MEM) { + src = c->dev_addr; + dst = addr; + } + + zx_dma_fill_desc(ds, dst, src, len, num++, c->ccfg); + + addr += len; + avail -= len; + } while (avail); + } + + ds->desc_hw[num - 1].lli = 0; /* end of link */ + ds->desc_hw[num - 1].ctr |= ZX_IRQ_ENABLE_ALL; + ds->size = total; + return vchan_tx_prep(&c->vc, &ds->vd, flags); +} + +static struct dma_async_tx_descriptor *zx_dma_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction dir, + unsigned long flags) +{ + struct zx_dma_chan *c = to_zx_chan(chan); + struct zx_dma_desc_sw *ds; + dma_addr_t src = 0, dst = 0; + int num_periods = buf_len / period_len; + int buf = 0, num = 0; + + if (period_len > DMA_MAX_SIZE) { + dev_err(chan->device->dev, "maximum period size exceeded\n"); + return NULL; + } + + if (zx_pre_config(c, dir)) + return NULL; + + ds = zx_alloc_desc_resource(num_periods, chan); + if (!ds) + return NULL; + c->cyclic = 1; + + while (buf < buf_len) { + if (dir == DMA_MEM_TO_DEV) { + src = dma_addr; + dst = c->dev_addr; + } else if (dir == DMA_DEV_TO_MEM) { + src = c->dev_addr; + dst = dma_addr; + } + zx_dma_fill_desc(ds, dst, src, period_len, num++, + c->ccfg | ZX_IRQ_ENABLE_ALL); + dma_addr += period_len; + buf += period_len; + } + + ds->desc_hw[num - 1].lli = ds->desc_hw_lli; + ds->size = buf_len; + return vchan_tx_prep(&c->vc, &ds->vd, flags); +} + +static int zx_dma_config(struct dma_chan *chan, + struct dma_slave_config *cfg) +{ + struct zx_dma_chan *c = to_zx_chan(chan); + + if (!cfg) + return -EINVAL; + + memcpy(&c->slave_cfg, cfg, sizeof(*cfg)); + + return 0; +} + +static int zx_dma_terminate_all(struct dma_chan *chan) +{ + struct zx_dma_chan *c = to_zx_chan(chan); + struct zx_dma_dev *d = to_zx_dma(chan->device); + struct zx_dma_phy *p = c->phy; + unsigned long flags; + LIST_HEAD(head); + + dev_dbg(d->slave.dev, "vchan %p: terminate all\n", &c->vc); + + /* Prevent this channel being scheduled */ + spin_lock(&d->lock); + list_del_init(&c->node); + spin_unlock(&d->lock); + + /* Clear the tx descriptor lists */ + spin_lock_irqsave(&c->vc.lock, flags); + vchan_get_all_descriptors(&c->vc, &head); + if (p) { + /* vchan is assigned to a pchan - stop the channel */ + zx_dma_terminate_chan(p, d); + c->phy = NULL; + p->vchan = NULL; + p->ds_run = NULL; + p->ds_done = NULL; + } + spin_unlock_irqrestore(&c->vc.lock, flags); + vchan_dma_desc_free_list(&c->vc, &head); + + return 0; +} + +static int zx_dma_transfer_pause(struct dma_chan *chan) +{ + struct zx_dma_chan *c = to_zx_chan(chan); + u32 val = 0; + + val = readl_relaxed(c->phy->base + REG_ZX_CTRL); + val &= ~ZX_CH_ENABLE; + writel_relaxed(val, c->phy->base + REG_ZX_CTRL); + + return 0; +} + +static int zx_dma_transfer_resume(struct dma_chan *chan) +{ + struct zx_dma_chan *c = to_zx_chan(chan); + u32 val = 0; + + val = readl_relaxed(c->phy->base + REG_ZX_CTRL); + val |= ZX_CH_ENABLE; + writel_relaxed(val, c->phy->base + REG_ZX_CTRL); + + return 0; +} + +static void zx_dma_free_desc(struct virt_dma_desc *vd) +{ + struct zx_dma_desc_sw *ds = + container_of(vd, struct zx_dma_desc_sw, vd); + struct zx_dma_dev *d = to_zx_dma(vd->tx.chan->device); + + dma_pool_free(d->pool, ds->desc_hw, ds->desc_hw_lli); + kfree(ds); +} + +static const struct of_device_id zx6702_dma_dt_ids[] = { + { .compatible = "zte,zx296702-dma", }, + {} +}; +MODULE_DEVICE_TABLE(of, zx6702_dma_dt_ids); + +static struct dma_chan *zx_of_dma_simple_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct zx_dma_dev *d = ofdma->of_dma_data; + unsigned int request = dma_spec->args[0]; + struct dma_chan *chan; + struct zx_dma_chan *c; + + if (request >= d->dma_requests) + return NULL; + + chan = dma_get_any_slave_channel(&d->slave); + if (!chan) { + dev_err(d->slave.dev, "get channel fail in %s.\n", __func__); + return NULL; + } + c = to_zx_chan(chan); + c->id = request; + dev_info(d->slave.dev, "zx_dma: pchan %u: alloc vchan %p\n", + c->id, &c->vc); + return chan; +} + +static int zx_dma_probe(struct platform_device *op) +{ + struct zx_dma_dev *d; + struct resource *iores; + int i, ret = 0; + + iores = platform_get_resource(op, IORESOURCE_MEM, 0); + if (!iores) + return -EINVAL; + + d = devm_kzalloc(&op->dev, sizeof(*d), GFP_KERNEL); + if (!d) + return -ENOMEM; + + d->base = devm_ioremap_resource(&op->dev, iores); + if (IS_ERR(d->base)) + return PTR_ERR(d->base); + + of_property_read_u32((&op->dev)->of_node, + "dma-channels", &d->dma_channels); + of_property_read_u32((&op->dev)->of_node, + "dma-requests", &d->dma_requests); + if (!d->dma_requests || !d->dma_channels) + return -EINVAL; + + d->clk = devm_clk_get(&op->dev, NULL); + if (IS_ERR(d->clk)) { + dev_err(&op->dev, "no dma clk\n"); + return PTR_ERR(d->clk); + } + + d->irq = platform_get_irq(op, 0); + ret = devm_request_irq(&op->dev, d->irq, zx_dma_int_handler, + 0, DRIVER_NAME, d); + if (ret) + return ret; + + /* A DMA memory pool for LLIs, align on 32-byte boundary */ + d->pool = dmam_pool_create(DRIVER_NAME, &op->dev, + LLI_BLOCK_SIZE, 32, 0); + if (!d->pool) + return -ENOMEM; + + /* init phy channel */ + d->phy = devm_kzalloc(&op->dev, + d->dma_channels * sizeof(struct zx_dma_phy), GFP_KERNEL); + if (!d->phy) + return -ENOMEM; + + for (i = 0; i < d->dma_channels; i++) { + struct zx_dma_phy *p = &d->phy[i]; + + p->idx = i; + p->base = d->base + i * 0x40; + } + + INIT_LIST_HEAD(&d->slave.channels); + dma_cap_set(DMA_SLAVE, d->slave.cap_mask); + dma_cap_set(DMA_MEMCPY, d->slave.cap_mask); + dma_cap_set(DMA_PRIVATE, d->slave.cap_mask); + d->slave.dev = &op->dev; + d->slave.device_free_chan_resources = zx_dma_free_chan_resources; + d->slave.device_tx_status = zx_dma_tx_status; + d->slave.device_prep_dma_memcpy = zx_dma_prep_memcpy; + d->slave.device_prep_slave_sg = zx_dma_prep_slave_sg; + d->slave.device_prep_dma_cyclic = zx_dma_prep_dma_cyclic; + d->slave.device_issue_pending = zx_dma_issue_pending; + d->slave.device_config = zx_dma_config; + d->slave.device_terminate_all = zx_dma_terminate_all; + d->slave.device_pause = zx_dma_transfer_pause; + d->slave.device_resume = zx_dma_transfer_resume; + d->slave.copy_align = DMA_ALIGN; + d->slave.src_addr_widths = ZX_DMA_BUSWIDTHS; + d->slave.dst_addr_widths = ZX_DMA_BUSWIDTHS; + d->slave.directions = BIT(DMA_MEM_TO_MEM) | BIT(DMA_MEM_TO_DEV) + | BIT(DMA_DEV_TO_MEM); + d->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; + + /* init virtual channel */ + d->chans = devm_kzalloc(&op->dev, + d->dma_requests * sizeof(struct zx_dma_chan), GFP_KERNEL); + if (!d->chans) + return -ENOMEM; + + for (i = 0; i < d->dma_requests; i++) { + struct zx_dma_chan *c = &d->chans[i]; + + c->status = DMA_IN_PROGRESS; + INIT_LIST_HEAD(&c->node); + c->vc.desc_free = zx_dma_free_desc; + vchan_init(&c->vc, &d->slave); + } + + /* Enable clock before accessing registers */ + ret = clk_prepare_enable(d->clk); + if (ret < 0) { + dev_err(&op->dev, "clk_prepare_enable failed: %d\n", ret); + goto zx_dma_out; + } + + zx_dma_init_state(d); + + spin_lock_init(&d->lock); + INIT_LIST_HEAD(&d->chan_pending); + platform_set_drvdata(op, d); + + ret = dma_async_device_register(&d->slave); + if (ret) + goto clk_dis; + + ret = of_dma_controller_register((&op->dev)->of_node, + zx_of_dma_simple_xlate, d); + if (ret) + goto of_dma_register_fail; + + dev_info(&op->dev, "initialized\n"); + return 0; + +of_dma_register_fail: + dma_async_device_unregister(&d->slave); +clk_dis: + clk_disable_unprepare(d->clk); +zx_dma_out: + return ret; +} + +static int zx_dma_remove(struct platform_device *op) +{ + struct zx_dma_chan *c, *cn; + struct zx_dma_dev *d = platform_get_drvdata(op); + + /* explictly free the irq */ + devm_free_irq(&op->dev, d->irq, d); + + dma_async_device_unregister(&d->slave); + of_dma_controller_free((&op->dev)->of_node); + + list_for_each_entry_safe(c, cn, &d->slave.channels, + vc.chan.device_node) { + list_del(&c->vc.chan.device_node); + } + clk_disable_unprepare(d->clk); + dmam_pool_destroy(d->pool); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int zx_dma_suspend_dev(struct device *dev) +{ + struct zx_dma_dev *d = dev_get_drvdata(dev); + u32 stat = 0; + + stat = zx_dma_get_chan_stat(d); + if (stat) { + dev_warn(d->slave.dev, + "chan %d is running fail to suspend\n", stat); + return -1; + } + clk_disable_unprepare(d->clk); + return 0; +} + +static int zx_dma_resume_dev(struct device *dev) +{ + struct zx_dma_dev *d = dev_get_drvdata(dev); + int ret = 0; + + ret = clk_prepare_enable(d->clk); + if (ret < 0) { + dev_err(d->slave.dev, "clk_prepare_enable failed: %d\n", ret); + return ret; + } + zx_dma_init_state(d); + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(zx_dma_pmops, zx_dma_suspend_dev, zx_dma_resume_dev); + +static struct platform_driver zx_pdma_driver = { + .driver = { + .name = DRIVER_NAME, + .pm = &zx_dma_pmops, + .of_match_table = zx6702_dma_dt_ids, + }, + .probe = zx_dma_probe, + .remove = zx_dma_remove, +}; + +module_platform_driver(zx_pdma_driver); + +MODULE_DESCRIPTION("ZTE ZX296702 DMA Driver"); +MODULE_AUTHOR("Jun Nie jun.nie@linaro.org"); +MODULE_LICENSE("GPL v2"); |