/* * Applied Micro X-Gene SoC DMA engine Driver * * Copyright (c) 2015, Applied Micro Circuits Corporation * Authors: Rameshwar Prasad Sahu * Loc Ho * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * * NOTE: PM support is currently not available. */ #include #include #include #include #include #include #include #include #include #include "dmaengine.h" /* X-Gene DMA ring csr registers and bit definations */ #define XGENE_DMA_RING_CONFIG 0x04 #define XGENE_DMA_RING_ENABLE BIT(31) #define XGENE_DMA_RING_ID 0x08 #define XGENE_DMA_RING_ID_SETUP(v) ((v) | BIT(31)) #define XGENE_DMA_RING_ID_BUF 0x0C #define XGENE_DMA_RING_ID_BUF_SETUP(v) (((v) << 9) | BIT(21)) #define XGENE_DMA_RING_THRESLD0_SET1 0x30 #define XGENE_DMA_RING_THRESLD0_SET1_VAL 0X64 #define XGENE_DMA_RING_THRESLD1_SET1 0x34 #define XGENE_DMA_RING_THRESLD1_SET1_VAL 0xC8 #define XGENE_DMA_RING_HYSTERESIS 0x68 #define XGENE_DMA_RING_HYSTERESIS_VAL 0xFFFFFFFF #define XGENE_DMA_RING_STATE 0x6C #define XGENE_DMA_RING_STATE_WR_BASE 0x70 #define XGENE_DMA_RING_NE_INT_MODE 0x017C #define XGENE_DMA_RING_NE_INT_MODE_SET(m, v) \ ((m) = ((m) & ~BIT(31 - (v))) | BIT(31 - (v))) #define XGENE_DMA_RING_NE_INT_MODE_RESET(m, v) \ ((m) &= (~BIT(31 - (v)))) #define XGENE_DMA_RING_CLKEN 0xC208 #define XGENE_DMA_RING_SRST 0xC200 #define XGENE_DMA_RING_MEM_RAM_SHUTDOWN 0xD070 #define XGENE_DMA_RING_BLK_MEM_RDY 0xD074 #define XGENE_DMA_RING_BLK_MEM_RDY_VAL 0xFFFFFFFF #define XGENE_DMA_RING_DESC_CNT(v) (((v) & 0x0001FFFE) >> 1) #define XGENE_DMA_RING_ID_GET(owner, num) (((owner) << 6) | (num)) #define XGENE_DMA_RING_DST_ID(v) ((1 << 10) | (v)) #define XGENE_DMA_RING_CMD_OFFSET 0x2C #define XGENE_DMA_RING_CMD_BASE_OFFSET(v) ((v) << 6) #define XGENE_DMA_RING_COHERENT_SET(m) \ (((u32 *)(m))[2] |= BIT(4)) #define XGENE_DMA_RING_ADDRL_SET(m, v) \ (((u32 *)(m))[2] |= (((v) >> 8) << 5)) #define XGENE_DMA_RING_ADDRH_SET(m, v) \ (((u32 *)(m))[3] |= ((v) >> 35)) #define XGENE_DMA_RING_ACCEPTLERR_SET(m) \ (((u32 *)(m))[3] |= BIT(19)) #define XGENE_DMA_RING_SIZE_SET(m, v) \ (((u32 *)(m))[3] |= ((v) << 23)) #define XGENE_DMA_RING_RECOMBBUF_SET(m) \ (((u32 *)(m))[3] |= BIT(27)) #define XGENE_DMA_RING_RECOMTIMEOUTL_SET(m) \ (((u32 *)(m))[3] |= (0x7 << 28)) #define XGENE_DMA_RING_RECOMTIMEOUTH_SET(m) \ (((u32 *)(m))[4] |= 0x3) #define XGENE_DMA_RING_SELTHRSH_SET(m) \ (((u32 *)(m))[4] |= BIT(3)) #define XGENE_DMA_RING_TYPE_SET(m, v) \ (((u32 *)(m))[4] |= ((v) << 19)) /* X-Gene DMA device csr registers and bit definitions */ #define XGENE_DMA_IPBRR 0x0 #define XGENE_DMA_DEV_ID_RD(v) ((v) & 0x00000FFF) #define XGENE_DMA_BUS_ID_RD(v) (((v) >> 12) & 3) #define XGENE_DMA_REV_NO_RD(v) (((v) >> 14) & 3) #define XGENE_DMA_GCR 0x10 #define XGENE_DMA_CH_SETUP(v) \ ((v) = ((v) & ~0x000FFFFF) | 0x000AAFFF) #define XGENE_DMA_ENABLE(v) ((v) |= BIT(31)) #define XGENE_DMA_DISABLE(v) ((v) &= ~BIT(31)) #define XGENE_DMA_RAID6_CONT 0x14 #define XGENE_DMA_RAID6_MULTI_CTRL(v) ((v) << 24) #define XGENE_DMA_INT 0x70 #define XGENE_DMA_INT_MASK 0x74 #define XGENE_DMA_INT_ALL_MASK 0xFFFFFFFF #define XGENE_DMA_INT_ALL_UNMASK 0x0 #define XGENE_DMA_INT_MASK_SHIFT 0x14 #define XGENE_DMA_RING_INT0_MASK 0x90A0 #define XGENE_DMA_RING_INT1_MASK 0x90A8 #define XGENE_DMA_RING_INT2_MASK 0x90B0 #define XGENE_DMA_RING_INT3_MASK 0x90B8 #define XGENE_DMA_RING_INT4_MASK 0x90C0 #define XGENE_DMA_CFG_RING_WQ_ASSOC 0x90E0 #define XGENE_DMA_ASSOC_RING_MNGR1 0xFFFFFFFF #define XGENE_DMA_MEM_RAM_SHUTDOWN 0xD070 #define XGENE_DMA_BLK_MEM_RDY 0xD074 #define XGENE_DMA_BLK_MEM_RDY_VAL 0xFFFFFFFF /* X-Gene SoC EFUSE csr register and bit defination */ #define XGENE_SOC_JTAG1_SHADOW 0x18 #define XGENE_DMA_PQ_DISABLE_MASK BIT(13) /* X-Gene DMA Descriptor format */ #define XGENE_DMA_DESC_NV_BIT BIT_ULL(50) #define XGENE_DMA_DESC_IN_BIT BIT_ULL(55) #define XGENE_DMA_DESC_C_BIT BIT_ULL(63) #define XGENE_DMA_DESC_DR_BIT BIT_ULL(61) #define XGENE_DMA_DESC_ELERR_POS 46 #define XGENE_DMA_DESC_RTYPE_POS 56 #define XGENE_DMA_DESC_LERR_POS 60 #define XGENE_DMA_DESC_FLYBY_POS 4 #define XGENE_DMA_DESC_BUFLEN_POS 48 #define XGENE_DMA_DESC_HOENQ_NUM_POS 48 #define XGENE_DMA_DESC_NV_SET(m) \ (((u64 *)(m))[0] |= XGENE_DMA_DESC_NV_BIT) #define XGENE_DMA_DESC_IN_SET(m) \ (((u64 *)(m))[0] |= XGENE_DMA_DESC_IN_BIT) #define XGENE_DMA_DESC_RTYPE_SET(m, v) \ (((u64 *)(m))[0] |= ((u64)(v) << XGENE_DMA_DESC_RTYPE_POS)) #define XGENE_DMA_DESC_BUFADDR_SET(m, v) \ (((u64 *)(m))[0] |= (v)) #define XGENE_DMA_DESC_BUFLEN_SET(m, v) \ (((u64 *)(m))[0] |= ((u64)(v) << XGENE_DMA_DESC_BUFLEN_POS)) #define XGENE_DMA_DESC_C_SET(m) \ (((u64 *)(m))[1] |= XGENE_DMA_DESC_C_BIT) #define XGENE_DMA_DESC_FLYBY_SET(m, v) \ (((u64 *)(m))[2] |= ((v) << XGENE_DMA_DESC_FLYBY_POS)) #define XGENE_DMA_DESC_MULTI_SET(m, v, i) \ (((u64 *)(m))[2] |= ((u64)(v) << (((i) + 1) * 8))) #define XGENE_DMA_DESC_DR_SET(m) \ (((u64 *)(m))[2] |= XGENE_DMA_DESC_DR_BIT) #define XGENE_DMA_DESC_DST_ADDR_SET(m, v) \ (((u64 *)(m))[3] |= (v)) #define XGENE_DMA_DESC_H0ENQ_NUM_SET(m, v) \ (((u64 *)(m))[3] |= ((u64)(v) << XGENE_DMA_DESC_HOENQ_NUM_POS)) #define XGENE_DMA_DESC_ELERR_RD(m) \ (((m) >> XGENE_DMA_DESC_ELERR_POS) & 0x3) #define XGENE_DMA_DESC_LERR_RD(m) \ (((m) >> XGENE_DMA_DESC_LERR_POS) & 0x7) #define XGENE_DMA_DESC_STATUS(elerr, lerr) \ (((elerr) << 4) | (lerr)) /* X-Gene DMA descriptor empty s/w signature */ #define XGENE_DMA_DESC_EMPTY_INDEX 0 #define XGENE_DMA_DESC_EMPTY_SIGNATURE ~0ULL #define XGENE_DMA_DESC_SET_EMPTY(m) \ (((u64 *)(m))[XGENE_DMA_DESC_EMPTY_INDEX] = \ XGENE_DMA_DESC_EMPTY_SIGNATURE) #define XGENE_DMA_DESC_IS_EMPTY(m) \ (((u64 *)(m))[XGENE_DMA_DESC_EMPTY_INDEX] == \ XGENE_DMA_DESC_EMPTY_SIGNATURE) /* X-Gene DMA configurable parameters defines */ #define XGENE_DMA_RING_NUM 512 #define XGENE_DMA_BUFNUM 0x0 #define XGENE_DMA_CPU_BUFNUM 0x18 #define XGENE_DMA_RING_OWNER_DMA 0x03 #define XGENE_DMA_RING_OWNER_CPU 0x0F #define XGENE_DMA_RING_TYPE_REGULAR 0x01 #define XGENE_DMA_RING_WQ_DESC_SIZE 32 /* 32 Bytes */ #define XGENE_DMA_RING_NUM_CONFIG 5 #define XGENE_DMA_MAX_CHANNEL 4 #define XGENE_DMA_XOR_CHANNEL 0 #define XGENE_DMA_PQ_CHANNEL 1 #define XGENE_DMA_MAX_BYTE_CNT 0x4000 /* 16 KB */ #define XGENE_DMA_MAX_64B_DESC_BYTE_CNT 0x14000 /* 80 KB */ #define XGENE_DMA_XOR_ALIGNMENT 6 /* 64 Bytes */ #define XGENE_DMA_MAX_XOR_SRC 5 #define XGENE_DMA_16K_BUFFER_LEN_CODE 0x0 #define XGENE_DMA_INVALID_LEN_CODE 0x7800 /* X-Gene DMA descriptor error codes */ #define ERR_DESC_AXI 0x01 #define ERR_BAD_DESC 0x02 #define ERR_READ_DATA_AXI 0x03 #define ERR_WRITE_DATA_AXI 0x04 #define ERR_FBP_TIMEOUT 0x05 #define ERR_ECC 0x06 #define ERR_DIFF_SIZE 0x08 #define ERR_SCT_GAT_LEN 0x09 #define ERR_CRC_ERR 0x11 #define ERR_CHKSUM 0x12 #define ERR_DIF 0x13 /* X-Gene DMA error interrupt codes */ #define ERR_DIF_SIZE_INT 0x0 #define ERR_GS_ERR_INT 0x1 #define ERR_FPB_TIMEO_INT 0x2 #define ERR_WFIFO_OVF_INT 0x3 #define ERR_RFIFO_OVF_INT 0x4 #define ERR_WR_TIMEO_INT 0x5 #define ERR_RD_TIMEO_INT 0x6 #define ERR_WR_ERR_INT 0x7 #define ERR_RD_ERR_INT 0x8 #define ERR_BAD_DESC_INT 0x9 #define ERR_DESC_DST_INT 0xA #define ERR_DESC_SRC_INT 0xB /* X-Gene DMA flyby operation code */ #define FLYBY_2SRC_XOR 0x8 #define FLYBY_3SRC_XOR 0x9 #define FLYBY_4SRC_XOR 0xA #define FLYBY_5SRC_XOR 0xB /* X-Gene DMA SW descriptor flags */ #define XGENE_DMA_FLAG_64B_DESC BIT(0) /* Define to dump X-Gene DMA descriptor */ #define XGENE_DMA_DESC_DUMP(desc, m) \ print_hex_dump(KERN_ERR, (m), \ DUMP_PREFIX_ADDRESS, 16, 8, (desc), 32, 0) #define to_dma_desc_sw(tx) \ container_of(tx, struct xgene_dma_desc_sw, tx) #define to_dma_chan(dchan) \ container_of(dchan, struct xgene_dma_chan, dma_chan) #define chan_dbg(chan, fmt, arg...) \ dev_dbg(chan->dev, "%s: " fmt, chan->name, ##arg) #define chan_err(chan, fmt, arg...) \ dev_err(chan->dev, "%s: " fmt, chan->name, ##arg) struct xgene_dma_desc_hw { u64 m0; u64 m1; u64 m2; u64 m3; }; enum xgene_dma_ring_cfgsize { XGENE_DMA_RING_CFG_SIZE_512B, XGENE_DMA_RING_CFG_SIZE_2KB, XGENE_DMA_RING_CFG_SIZE_16KB, XGENE_DMA_RING_CFG_SIZE_64KB, XGENE_DMA_RING_CFG_SIZE_512KB, XGENE_DMA_RING_CFG_SIZE_INVALID }; struct xgene_dma_ring { struct xgene_dma *pdma; u8 buf_num; u16 id; u16 num; u16 head; u16 owner; u16 slots; u16 dst_ring_num; u32 size; void __iomem *cmd; void __iomem *cmd_base; dma_addr_t desc_paddr; u32 state[XGENE_DMA_RING_NUM_CONFIG]; enum xgene_dma_ring_cfgsize cfgsize; union { void *desc_vaddr; struct xgene_dma_desc_hw *desc_hw; }; }; struct xgene_dma_desc_sw { struct xgene_dma_desc_hw desc1; struct xgene_dma_desc_hw desc2; u32 flags; struct list_head node; struct list_head tx_list; struct dma_async_tx_descriptor tx; }; /** * struct xgene_dma_chan - internal representation of an X-Gene DMA channel * @dma_chan: dmaengine channel object member * @pdma: X-Gene DMA device structure reference * @dev: struct device reference for dma mapping api * @id: raw id of this channel * @rx_irq: channel IRQ * @name: name of X-Gene DMA channel * @lock: serializes enqueue/dequeue operations to the descriptor pool * @pending: number of transaction request pushed to DMA controller for * execution, but still waiting for completion, * @max_outstanding: max number of outstanding request we can push to channel * @ld_pending: descriptors which are queued to run, but have not yet been * submitted to the hardware for execution * @ld_running: descriptors which are currently being executing by the hardware * @ld_completed: descriptors which have finished execution by the hardware. * These descriptors have already had their cleanup actions run. They * are waiting for the ACK bit to be set by the async tx API. * @desc_pool: descriptor pool for DMA operations * @tasklet: bottom half where all completed descriptors cleans * @tx_ring: transmit ring descriptor that we use to prepare actual * descriptors for further executions * @rx_ring: receive ring descriptor that we use to get completed DMA * descriptors during cleanup time */ struct xgene_dma_chan { struct dma_chan dma_chan; struct xgene_dma *pdma; struct device *dev; int id; int rx_irq; char name[10]; spinlock_t lock; int pending; int max_outstanding; struct list_head ld_pending; struct list_head ld_running; struct list_head ld_completed; struct dma_pool *desc_pool; struct tasklet_struct tasklet; struct xgene_dma_ring tx_ring; struct xgene_dma_ring rx_ring; }; /** * struct xgene_dma - internal representation of an X-Gene DMA device * @err_irq: DMA error irq number * @ring_num: start id number for DMA ring * @csr_dma: base for DMA register access * @csr_ring: base for DMA ring register access * @csr_ring_cmd: base for DMA ring command register access * @csr_efuse: base for efuse register access * @dma_dev: embedded struct dma_device * @chan: reference to X-Gene DMA channels */ struct xgene_dma { struct device *dev; struct clk *clk; int err_irq; int ring_num; void __iomem *csr_dma; void __iomem *csr_ring; void __iomem *csr_ring_cmd; void __iomem *csr_efuse; struct dma_device dma_dev[XGENE_DMA_MAX_CHANNEL]; struct xgene_dma_chan chan[XGENE_DMA_MAX_CHANNEL]; }; static const char * const xgene_dma_desc_err[] = { [ERR_DESC_AXI] = "AXI error when reading src/dst link list", [ERR_BAD_DESC] = "ERR or El_ERR fields not set to zero in desc", [ERR_READ_DATA_AXI] = "AXI error when reading data", [ERR_WRITE_DATA_AXI] = "AXI error when writing data", [ERR_FBP_TIMEOUT] = "Timeout on bufpool fetch", [ERR_ECC] = "ECC double bit error", [ERR_DIFF_SIZE] = "Bufpool too small to hold all the DIF result", [ERR_SCT_GAT_LEN] = "Gather and scatter data length not same", [ERR_CRC_ERR] = "CRC error", [ERR_CHKSUM] = "Checksum error", [ERR_DIF] = "DIF error", }; static const char * const xgene_dma_err[] = { [ERR_DIF_SIZE_INT] = "DIF size error", [ERR_GS_ERR_INT] = "Gather scatter not same size error", [ERR_FPB_TIMEO_INT] = "Free pool time out error", [ERR_WFIFO_OVF_INT] = "Write FIFO over flow error", [ERR_RFIFO_OVF_INT] = "Read FIFO over flow error", [ERR_WR_TIMEO_INT] = "Write time out error", [ERR_RD_TIMEO_INT] = "Read time out error", [ERR_WR_ERR_INT] = "HBF bus write error", [ERR_RD_ERR_INT] = "HBF bus read error", [ERR_BAD_DESC_INT] = "Ring descriptor HE0 not set error", [ERR_DESC_DST_INT] = "HFB reading dst link address error", [ERR_DESC_SRC_INT] = "HFB reading src link address error", }; static bool is_pq_enabled(struct xgene_dma *pdma) { u32 val; val = ioread32(pdma->csr_efuse + XGENE_SOC_JTAG1_SHADOW); return !(val & XGENE_DMA_PQ_DISABLE_MASK); } static void xgene_dma_cpu_to_le64(u64 *desc, int count) { int i; for (i = 0; i < count; i++) desc[i] = cpu_to_le64(desc[i]); } static u16 xgene_dma_encode_len(u32 len) { return (len < XGENE_DMA_MAX_BYTE_CNT) ? len : XGENE_DMA_16K_BUFFER_LEN_CODE; } static u8 xgene_dma_encode_xor_flyby(u32 src_cnt) { static u8 flyby_type[] = { FLYBY_2SRC_XOR, /* Dummy */ FLYBY_2SRC_XOR, /* Dummy */ FLYBY_2SRC_XOR, FLYBY_3SRC_XOR, FLYBY_4SRC_XOR, FLYBY_5SRC_XOR }; return flyby_type[src_cnt]; } static u32 xgene_dma_ring_desc_cnt(struct xgene_dma_ring *ring) { u32 __iomem *cmd_base = ring->cmd_base; u32 ring_state = ioread32(&cmd_base[1]); return XGENE_DMA_RING_DESC_CNT(ring_state); } static void xgene_dma_set_src_buffer(void *ext8, size_t *len, dma_addr_t *paddr) { size_t nbytes = (*len < XGENE_DMA_MAX_BYTE_CNT) ? *len : XGENE_DMA_MAX_BYTE_CNT; XGENE_DMA_DESC_BUFADDR_SET(ext8, *paddr); XGENE_DMA_DESC_BUFLEN_SET(ext8, xgene_dma_encode_len(nbytes)); *len -= nbytes; *paddr += nbytes; } static void xgene_dma_invalidate_buffer(void *ext8) { XGENE_DMA_DESC_BUFLEN_SET(ext8, XGENE_DMA_INVALID_LEN_CODE); } static void *xgene_dma_lookup_ext8(u64 *desc, int idx) { return (idx % 2) ? (desc + idx - 1) : (desc + idx + 1); } static void xgene_dma_init_desc(void *desc, u16 dst_ring_num) { XGENE_DMA_DESC_C_SET(desc); /* Coherent IO */ XGENE_DMA_DESC_IN_SET(desc); XGENE_DMA_DESC_H0ENQ_NUM_SET(desc, dst_ring_num); XGENE_DMA_DESC_RTYPE_SET(desc, XGENE_DMA_RING_OWNER_DMA); } static void xgene_dma_prep_cpy_desc(struct xgene_dma_chan *chan, struct xgene_dma_desc_sw *desc_sw, dma_addr_t dst, dma_addr_t src, size_t len) { void *desc1, *desc2; int i; /* Get 1st descriptor */ desc1 = &desc_sw->desc1; xgene_dma_init_desc(desc1, chan->tx_ring.dst_ring_num); /* Set destination address */ XGENE_DMA_DESC_DR_SET(desc1); XGENE_DMA_DESC_DST_ADDR_SET(desc1, dst); /* Set 1st source address */ xgene_dma_set_src_buffer(desc1 + 8, &len, &src); if (len <= 0) { desc2 = NULL; goto skip_additional_src; } /* * We need to split this source buffer, * and need to use 2nd descriptor */ desc2 = &desc_sw->desc2; XGENE_DMA_DESC_NV_SET(desc1); /* Set 2nd to 5th source address */ for (i = 0; i < 4 && len; i++) xgene_dma_set_src_buffer(xgene_dma_lookup_ext8(desc2, i), &len, &src); /* Invalidate unused source address field */ for (; i < 4; i++) xgene_dma_invalidate_buffer(xgene_dma_lookup_ext8(desc2, i)); /* Updated flag that we have prepared 64B descriptor */ desc_sw->flags |= XGENE_DMA_FLAG_64B_DESC; skip_additional_src: /* Hardware stores descriptor in little endian format */ xgene_dma_cpu_to_le64(desc1, 4); if (desc2) xgene_dma_cpu_to_le64(desc2, 4); } static void xgene_dma_prep_xor_desc(struct xgene_dma_chan *chan, struct xgene_dma_desc_sw *desc_sw, dma_addr_t *dst, dma_addr_t *src, u32 src_cnt, size_t *nbytes, const u8 *scf) { void *desc1, *desc2; size_t len = *nbytes; int i; desc1 = &desc_sw->desc1; desc2 = &desc_sw->desc2; /* Initialize DMA descriptor */ xgene_dma_init_desc(desc1, chan->tx_ring.dst_ring_num); /* Set destination address */ XGENE_DMA_DESC_DR_SET(desc1); XGENE_DMA_DESC_DST_ADDR_SET(desc1, *dst); /* We have multiple source addresses, so need to set NV bit*/ XGENE_DMA_DESC_NV_SET(desc1); /* Set flyby opcode */ XGENE_DMA_DESC_FLYBY_SET(desc1, xgene_dma_encode_xor_flyby(src_cnt)); /* Set 1st to 5th source addresses */ for (i = 0; i < src_cnt; i++) { len = *nbytes; xgene_dma_set_src_buffer((i == 0) ? (desc1 + 8) : xgene_dma_lookup_ext8(desc2, i - 1), &len, &src[i]); XGENE_DMA_DESC_MULTI_SET(desc1, scf[i], i); } /* Hardware stores descriptor in little endian format */ xgene_dma_cpu_to_le64(desc1, 4); xgene_dma_cpu_to_le64(desc2, 4); /* Update meta data */ *nbytes = len; *dst += XGENE_DMA_MAX_BYTE_CNT; /* We need always 64B descriptor to perform xor or pq operations */ desc_sw->flags |= XGENE_DMA_FLAG_64B_DESC; } static dma_cookie_t xgene_dma_tx_submit(struct dma_async_tx_descriptor *tx) { struct xgene_dma_desc_sw *desc; struct xgene_dma_chan *chan; dma_cookie_t cookie; if (unlikely(!tx)) return -EINVAL; chan = to_dma_chan(tx->chan); desc = to_dma_desc_sw(tx); spin_lock_bh(&chan->lock); cookie = dma_cookie_assign(tx); /* Add this transaction list onto the tail of the pending queue */ list_splice_tail_init(&desc->tx_list, &chan->ld_pending); spin_unlock_bh(&chan->lock); return cookie; } static void xgene_dma_clean_descriptor(struct xgene_dma_chan *chan, struct xgene_dma_desc_sw *desc) { list_del(&desc->node); chan_dbg(chan, "LD %p free\n", desc); dma_pool_free(chan->desc_pool, desc, desc->tx.phys); } static struct xgene_dma_desc_sw *xgene_dma_alloc_descriptor( struct xgene_dma_chan *chan) { struct xgene_dma_desc_sw *desc; dma_addr_t phys; desc = dma_pool_alloc(chan->desc_pool, GFP_NOWAIT, &phys); if (!desc) { chan_err(chan, "Failed to allocate LDs\n"); return NULL; } memset(desc, 0, sizeof(*desc)); INIT_LIST_HEAD(&desc->tx_list); desc->tx.phys = phys; desc->tx.tx_submit = xgene_dma_tx_submit; dma_async_tx_descriptor_init(&desc->tx, &chan->dma_chan); chan_dbg(chan, "LD %p allocated\n", desc); return desc; } /** * xgene_dma_clean_completed_descriptor - free all descriptors which * has been completed and acked * @chan: X-Gene DMA channel * * This function is used on all completed and acked descriptors. */ static void xgene_dma_clean_completed_descriptor(struct xgene_dma_chan *chan) { struct xgene_dma_desc_sw *desc, *_desc; /* Run the callback for each descriptor, in order */ list_for_each_entry_safe(desc, _desc, &chan->ld_completed, node) { if (async_tx_test_ack(&desc->tx)) xgene_dma_clean_descriptor(chan, desc); } } /** * xgene_dma_run_tx_complete_actions - cleanup a single link descriptor * @chan: X-Gene DMA channel * @desc: descriptor to cleanup and free * * This function is used on a descriptor which has been executed by the DMA * controller. It will run any callbacks, submit any dependencies. */ static void xgene_dma_run_tx_complete_actions(struct xgene_dma_chan *chan, struct xgene_dma_desc_sw *desc) { struct dma_async_tx_descriptor *tx = &desc->tx; /* * If this is not the last transaction in the group, * then no need to complete cookie and run any callback as * this is not the tx_descriptor which had been sent to caller * of this DMA request */ if (tx->cookie == 0) return; dma_cookie_complete(tx); /* Run the link descriptor callback function */ if (tx->callback) tx->callback(tx->callback_param); dma_descriptor_unmap(tx); /* Run any dependencies */ dma_run_dependencies(tx); } /** * xgene_dma_clean_running_descriptor - move the completed descriptor from * ld_running to ld_completed * @chan: X-Gene DMA channel * @desc: the descriptor which is completed * * Free the descriptor directly if acked by async_tx api, * else move it to queue ld_completed. */ static void xgene_dma_clean_running_descriptor(struct xgene_dma_chan *chan, struct xgene_dma_desc_sw *desc) { /* Remove from the list of running transactions */ list_del(&desc->node); /* * the client is allowed to attach dependent operations * until 'ack' is set */ if (!async_tx_test_ack(&desc->tx)) { /* * Move this descriptor to the list of descriptors which is * completed, but still awaiting the 'ack' bit to be set. */ list_add_tail(&desc->node, &chan->ld_completed); return; } chan_dbg(chan, "LD %p free\n", desc); dma_pool_free(chan->desc_pool, desc, desc->tx.phys); } static int xgene_chan_xfer_request(struct xgene_dma_ring *ring, struct xgene_dma_desc_sw *desc_sw) { struct xgene_dma_desc_hw *desc_hw; /* Check if can push more descriptor to hw for execution */ if (xgene_dma_ring_desc_cnt(ring) > (ring->slots - 2)) return -EBUSY; /* Get hw descriptor from DMA tx ring */ desc_hw = &ring->desc_hw[ring->head]; /* * Increment the head count to point next * descriptor for next time */ if (++ring->head == ring->slots) ring->head = 0; /* Copy prepared sw descriptor data to hw descriptor */ memcpy(desc_hw, &desc_sw->desc1, sizeof(*desc_hw)); /* * Check if we have prepared 64B descriptor, * in this case we need one more hw descriptor */ if (desc_sw->flags & XGENE_DMA_FLAG_64B_DESC) { desc_hw = &ring->desc_hw[ring->head]; if (++ring->head == ring->slots) ring->head = 0; memcpy(desc_hw, &desc_sw->desc2, sizeof(*desc_hw)); } /* Notify the hw that we have descriptor ready for execution */ iowrite32((desc_sw->flags & XGENE_DMA_FLAG_64B_DESC) ? 2 : 1, ring->cmd); return 0; } /** * xgene_chan_xfer_ld_pending - push any pending transactions to hw * @chan : X-Gene DMA channel * * LOCKING: must hold chan->desc_lock */ static void xgene_chan_xfer_ld_pending(struct xgene_dma_chan *chan) { struct xgene_dma_desc_sw *desc_sw, *_desc_sw; int ret; /* * If the list of pending descriptors is empty, then we * don't need to do any work at all */ if (list_empty(&chan->ld_pending)) { chan_dbg(chan, "No pending LDs\n"); return; } /* * Move elements from the queue of pending transactions onto the list * of running transactions and push it to hw for further executions */ list_for_each_entry_safe(desc_sw, _desc_sw, &chan->ld_pending, node) { /* * Check if have pushed max number of transactions to hw * as capable, so let's stop here and will push remaining * elements from pening ld queue after completing some * descriptors that we have already pushed */ if (chan->pending >= chan->max_outstanding) return; ret = xgene_chan_xfer_request(&chan->tx_ring, desc_sw); if (ret) return; /* * Delete this element from ld pending queue and append it to * ld running queue */ list_move_tail(&desc_sw->node, &chan->ld_running); /* Increment the pending transaction count */ chan->pending++; } } /** * xgene_dma_cleanup_descriptors - cleanup link descriptors which are completed * and move them to ld_completed to free until flag 'ack' is set * @chan: X-Gene DMA channel * * This function is used on descriptors which have been executed by the DMA * controller. It will run any callbacks, submit any dependencies, then * free these descriptors if flag 'ack' is set. */ static void xgene_dma_cleanup_descriptors(struct xgene_dma_chan *chan) { struct xgene_dma_ring *ring = &chan->rx_ring; struct xgene_dma_desc_sw *desc_sw, *_desc_sw; struct xgene_dma_desc_hw *desc_hw; u8 status; /* Clean already completed and acked descriptors */ xgene_dma_clean_completed_descriptor(chan); /* Run the callback for each descriptor, in order */ list_for_each_entry_safe(desc_sw, _desc_sw, &chan->ld_running, node) { /* Get subsequent hw descriptor from DMA rx ring */ desc_hw = &ring->desc_hw[ring->head]; /* Check if this descriptor has been completed */ if (unlikely(XGENE_DMA_DESC_IS_EMPTY(desc_hw))) break; if (++ring->head == ring->slots) ring->head = 0; /* Check if we have any error with DMA transactions */ status = XGENE_DMA_DESC_STATUS( XGENE_DMA_DESC_ELERR_RD(le64_to_cpu( desc_hw->m0)), XGENE_DMA_DESC_LERR_RD(le64_to_cpu( desc_hw->m0))); if (status) { /* Print the DMA error type */ chan_err(chan, "%s\n", xgene_dma_desc_err[status]); /* * We have DMA transactions error here. Dump DMA Tx * and Rx descriptors for this request */ XGENE_DMA_DESC_DUMP(&desc_sw->desc1, "X-Gene DMA TX DESC1: "); if (desc_sw->flags & XGENE_DMA_FLAG_64B_DESC) XGENE_DMA_DESC_DUMP(&desc_sw->desc2, "X-Gene DMA TX DESC2: "); XGENE_DMA_DESC_DUMP(desc_hw, "X-Gene DMA RX ERR DESC: "); } /* Notify the hw about this completed descriptor */ iowrite32(-1, ring->cmd); /* Mark this hw descriptor as processed */ XGENE_DMA_DESC_SET_EMPTY(desc_hw); xgene_dma_run_tx_complete_actions(chan, desc_sw); xgene_dma_clean_running_descriptor(chan, desc_sw); /* * Decrement the pending transaction count * as we have processed one */ chan->pending--; } /* * Start any pending transactions automatically * In the ideal case, we keep the DMA controller busy while we go * ahead and free the descriptors below. */ xgene_chan_xfer_ld_pending(chan); } static int xgene_dma_alloc_chan_resources(struct dma_chan *dchan) { struct xgene_dma_chan *chan = to_dma_chan(dchan); /* Has this channel already been allocated? */ if (chan->desc_pool) return 1; chan->desc_pool = dma_pool_create(chan->name, chan->dev, sizeof(struct xgene_dma_desc_sw), 0, 0); if (!chan->desc_pool) { chan_err(chan, "Failed to allocate descriptor pool\n"); return -ENOMEM; } chan_dbg(chan, "Allocate descripto pool\n"); return 1; } /** * xgene_dma_free_desc_list - Free all descriptors in a queue * @chan: X-Gene DMA channel * @list: the list to free * * LOCKING: must hold chan->desc_lock */ static void xgene_dma_free_desc_list(struct xgene_dma_chan *chan, struct list_head *list) { struct xgene_dma_desc_sw *desc, *_desc; list_for_each_entry_safe(desc, _desc, list, node) xgene_dma_clean_descriptor(chan, desc); } static void xgene_dma_free_tx_desc_list(struct xgene_dma_chan *chan, struct list_head *list) { struct xgene_dma_desc_sw *desc, *_desc; list_for_each_entry_safe(desc, _desc, list, node) xgene_dma_clean_descriptor(chan, desc); } static void xgene_dma_free_chan_resources(struct dma_chan *dchan) { struct xgene_dma_chan *chan = to_dma_chan(dchan); chan_dbg(chan, "Free all resources\n"); if (!chan->desc_pool) return; spin_lock_bh(&chan->lock); /* Process all running descriptor */ xgene_dma_cleanup_descriptors(chan); /* Clean all link descriptor queues */ xgene_dma_free_desc_list(chan, &chan->ld_pending); xgene_dma_free_desc_list(chan, &chan->ld_running); xgene_dma_free_desc_list(chan, &chan->ld_completed); spin_unlock_bh(&chan->lock); /* Delete this channel DMA pool */ dma_pool_destroy(chan->desc_pool); chan->desc_pool = NULL; } static struct dma_async_tx_descriptor *xgene_dma_prep_memcpy( struct dma_chan *dchan, dma_addr_t dst, dma_addr_t src, size_t len, unsigned long flags) { struct xgene_dma_desc_sw *first = NULL, *new; struct xgene_dma_chan *chan; size_t copy; if (unlikely(!dchan || !len)) return NULL; chan = to_dma_chan(dchan); do { /* Allocate the link descriptor from DMA pool */ new = xgene_dma_alloc_descriptor(chan); if (!new) goto fail; /* Create the largest transaction possible */ copy = min_t(size_t, len, XGENE_DMA_MAX_64B_DESC_BYTE_CNT); /* Prepare DMA descriptor */ xgene_dma_prep_cpy_desc(chan, new, dst, src, copy); if (!first) first = new; new->tx.cookie = 0; async_tx_ack(&new->tx); /* Update metadata */ len -= copy; dst += copy; src += copy; /* Insert the link descriptor to the LD ring */ list_add_tail(&new->node, &first->tx_list); } while (len); new->tx.flags = flags; /* client is in control of this ack */ new->tx.cookie = -EBUSY; list_splice(&first->tx_list, &new->tx_list); return &new->tx; fail: if (!first) return NULL; xgene_dma_free_tx_desc_list(chan, &first->tx_list); return NULL; } static struct dma_async_tx_descriptor *xgene_dma_prep_sg( struct dma_chan *dchan, struct scatterlist *dst_sg, u32 dst_nents, struct scatterlist *src_sg, u32 src_nents, unsigned long flags) { struct xgene_dma_desc_sw *first = NULL, *new = NULL; struct xgene_dma_chan *chan; size_t dst_avail, src_avail; dma_addr_t dst, src; size_t len; if (unlikely(!dchan)) return NULL; if (unlikely(!dst_nents || !src_nents)) return NULL; if (unlikely(!dst_sg || !src_sg)) return NULL; chan = to_dma_chan(dchan); /* Get prepared for the loop */ dst_avail = sg_dma_len(dst_sg); src_avail = sg_dma_len(src_sg); dst_nents--; src_nents--; /* Run until we are out of scatterlist entries */ while (true) { /* Create the largest transaction possible */ len = min_t(size_t, src_avail, dst_avail); len = min_t(size_t, len, XGENE_DMA_MAX_64B_DESC_BYTE_C
/*
 * Intel MIC Platform Software Stack (MPSS)
 *
 * Copyright(c) 2014 Intel Corporation.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License, version 2, as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 *
 * The full GNU General Public License is included in this distribution in
 * the file called "COPYING".
 *
 * Intel MIC X100 DMA Driver.
 *
 * Adapted from IOAT dma driver.
 */
#include <linux/module.h>
#include <linux/io.h>
#include <linux/seq_file.h>

#include "mic_x100_dma.h"

#define MIC_DMA_MAX_XFER_SIZE_CARD  (1 * 1024 * 1024 -\
				       MIC_DMA_ALIGN_BYTES)
#define MIC_DMA_MAX_XFER_SIZE_HOST  (1 * 1024 * 1024 >> 1)
#define MIC_DMA_DESC_TYPE_SHIFT	60
#define MIC_DMA_MEMCPY_LEN_SHIFT 46
#define MIC_DMA_STAT_INTR_SHIFT 59

/* high-water mark for pushing dma descriptors */
static int mic_dma_pending_level = 4;

/* Status descriptor is used to write a 64 bit value to a memory location */
enum mic_dma_desc_format_type {
	MIC_DMA_MEMCPY = 1,
	MIC_DMA_STATUS,
};

static inline u32 mic_dma_hw_ring_inc(u32 val)
{
	return (val + 1) % MIC_DMA_DESC_RX_SIZE;
}

static inline u32 mic_dma_hw_ring_dec(u32 val)
{
	return val ? val - 1 : MIC_DMA_DESC_RX_SIZE - 1;
}

static inline void mic_dma_hw_ring_inc_head(struct mic_dma_chan *ch)
{
	ch->head = mic_dma_hw_ring_inc(ch->head);
}

/* Prepare a memcpy desc */
static inline void mic_dma_memcpy_desc(struct mic_dma_desc *desc,
	dma_addr_t src_phys, dma_addr_t dst_phys, u64 size)
{
	u64 qw0, qw1;

	qw0 = src_phys;
	qw0 |= (size >> MIC_DMA_ALIGN_SHIFT) << MIC_DMA_MEMCPY_LEN_SHIFT;
	qw1 = MIC_DMA_MEMCPY;
	qw1 <<= MIC_DMA_DESC_TYPE_SHIFT;
	qw1 |= dst_phys;
	desc->qw0 = qw0;
	desc->qw1 = qw1;
}

/* Prepare a status desc. with @data to be written at @dst_phys */
static inline void mic_dma_prep_status_desc(struct mic_dma_desc *desc, u64 data,
	dma_addr_t dst_phys, bool generate_intr)
{
	u64 qw0, qw1;

	qw0 = data;
	qw1 = (u64) MIC_DMA_STATUS << MIC_DMA_DESC_TYPE_SHIFT | dst_phys;
	if (generate_intr)
		qw1 |= (1ULL << MIC_DMA_STAT_INTR_SHIFT);
	desc->qw0 = qw0;
	desc->qw1 = qw1;
}

static void mic_dma_cleanup(struct mic_dma_chan *ch)
{
	struct dma_async_tx_descriptor *tx;
	u32 tail;
	u32 last_tail;

	spin_lock(&ch->cleanup_lock);
	tail = mic_dma_read_cmp_cnt(ch);
	/*
	 * This is the barrier pair for smp_wmb() in fn.
	 * mic_dma_tx_submit_unlock. It's required so that we read the
	 * updated cookie value from tx->cookie.
	 */
	smp_rmb();
	for (last_tail = ch->last_tail; tail != last_tail;) {
		tx = &ch->tx_array[last_tail];
		if (tx->cookie) {
			dma_cookie_complete(tx);
			if (tx->callback) {
				tx->callback(tx->callback_param);
				tx->callback = NULL;
			}
		}
		last_tail = mic_dma_hw_ring_inc(last_tail);
	}
	/* finish all completion callbacks before incrementing tail */
	smp_mb();
	ch->last_tail = last_tail;
	spin_unlock(&ch->cleanup_lock);
}

static u32 mic_dma_ring_count(u32 head, u32 tail)
{
	u32 count;

	if (head >= tail)
		count = (tail - 0) + (MIC_DMA_DESC_RX_SIZE - head);
	else
		count = tail - head;
	return count - 1;
}

/* Returns the num. of free descriptors on success, -ENOMEM on failure */
static int mic_dma_avail_desc_ring_space(struct mic_dma_chan *ch, int required)
{
	struct device *dev = mic_dma_ch_to_device(ch);
	u32 count;

	count = mic_dma_ring_count(ch->head, ch->last_tail);
	if (count < required) {
		mic_dma_cleanup(ch);
		count = mic_dma_ring_count(ch->head, ch->last_tail);
	}

	if (count < required) {
		dev_dbg(dev, "Not enough desc space");
		dev_dbg(dev, "%s %d required=%u, avail=%u\n",
			__func__, __LINE__, required, count);
		return -ENOMEM;
	} else {
		return count;
	}
}

/* Program memcpy descriptors into the descriptor ring and update s/w head ptr*/
static int mic_dma_prog_memcpy_desc(struct mic_dma_chan *ch, dma_addr_t src,
				    dma_addr_t dst, size_t len)
{
	size_t current_transfer_len;
	size_t max_xfer_size = to_mic_dma_dev(ch)->max_xfer_size;
	/* 3 is added to make sure we have enough space for status desc */
	int num_desc = len / max_xfer_size + 3;
	int ret;

	if (len % max_xfer_size)
		num_desc++;

	ret = mic_dma_avail_desc_ring_s