From e09b41010ba33a20a87472ee821fa407a5b8da36 Mon Sep 17 00:00:00 2001 From: José Pekkarinen Date: Mon, 11 Apr 2016 10:41:07 +0300 Subject: These changes are the raw update to linux-4.4.6-rt14. Kernel sources are taken from kernel.org, and rt patch from the rt wiki download page. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During the rebasing, the following patch collided: Force tick interrupt and get rid of softirq magic(I70131fb85). Collisions have been removed because its logic was found on the source already. Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769 Signed-off-by: José Pekkarinen --- kernel/drivers/net/ethernet/intel/i40e/i40e_txrx.c | 729 ++++++++++++--------- 1 file changed, 425 insertions(+), 304 deletions(-) (limited to 'kernel/drivers/net/ethernet/intel/i40e/i40e_txrx.c') diff --git a/kernel/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/kernel/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 9d95042d5..635b3ac17 100644 --- a/kernel/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/kernel/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -165,9 +165,6 @@ int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet, tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0); - /* set the timestamp */ - tx_buf->time_stamp = jiffies; - /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. */ @@ -283,7 +280,8 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi, if (add) { pf->fd_tcp_rule++; if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) { - dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n"); + if (I40E_DEBUG_FD & pf->hw.debug_mask) + dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n"); pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED; } } else { @@ -291,7 +289,8 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi, (pf->fd_tcp_rule - 1) : 0; if (pf->fd_tcp_rule == 0) { pf->flags |= I40E_FLAG_FD_ATR_ENABLED; - dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n"); + if (I40E_DEBUG_FD & pf->hw.debug_mask) + dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n"); } } @@ -465,11 +464,12 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring, error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >> I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT; - if (error == (0x1 << I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) { + if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) { + pf->fd_inv = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fd_id); if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) || (I40E_DEBUG_FD & pf->hw.debug_mask)) dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n", - rx_desc->wb.qword0.hi_dword.fd_id); + pf->fd_inv); /* Check if the programming error is for ATR. * If so, auto disable ATR and set a state for @@ -501,7 +501,8 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring, if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) && !(pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED)) { - dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n"); + if (I40E_DEBUG_FD & pf->hw.debug_mask) + dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n"); pf->auto_disable_flags |= I40E_FLAG_FD_SB_ENABLED; } @@ -509,8 +510,7 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring, dev_info(&pdev->dev, "FD filter programming failed due to incorrect filter parameters\n"); } - } else if (error == - (0x1 << I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) { + } else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) { if (I40E_DEBUG_FD & pf->hw.debug_mask) dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n", rx_desc->wb.qword0.hi_dword.fd_id); @@ -601,20 +601,6 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring) } } -/** - * i40e_get_head - Retrieve head from head writeback - * @tx_ring: tx ring to fetch head of - * - * Returns value of Tx ring head based on value stored - * in head write-back location - **/ -static inline u32 i40e_get_head(struct i40e_ring *tx_ring) -{ - void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count; - - return le32_to_cpu(*(volatile __le32 *)head); -} - /** * i40e_get_tx_pending - how many tx descriptors not processed * @tx_ring: the ring of descriptors @@ -622,7 +608,7 @@ static inline u32 i40e_get_head(struct i40e_ring *tx_ring) * Since there is no access to the ring head register * in XL710, we need to use our local copies **/ -static u32 i40e_get_tx_pending(struct i40e_ring *ring) +u32 i40e_get_tx_pending(struct i40e_ring *ring) { u32 head, tail; @@ -636,50 +622,6 @@ static u32 i40e_get_tx_pending(struct i40e_ring *ring) return 0; } -/** - * i40e_check_tx_hang - Is there a hang in the Tx queue - * @tx_ring: the ring of descriptors - **/ -static bool i40e_check_tx_hang(struct i40e_ring *tx_ring) -{ - u32 tx_done = tx_ring->stats.packets; - u32 tx_done_old = tx_ring->tx_stats.tx_done_old; - u32 tx_pending = i40e_get_tx_pending(tx_ring); - struct i40e_pf *pf = tx_ring->vsi->back; - bool ret = false; - - clear_check_for_tx_hang(tx_ring); - - /* Check for a hung queue, but be thorough. This verifies - * that a transmit has been completed since the previous - * check AND there is at least one packet pending. The - * ARMED bit is set to indicate a potential hang. The - * bit is cleared if a pause frame is received to remove - * false hang detection due to PFC or 802.3x frames. By - * requiring this to fail twice we avoid races with - * PFC clearing the ARMED bit and conditions where we - * run the check_tx_hang logic with a transmit completion - * pending but without time to complete it yet. - */ - if ((tx_done_old == tx_done) && tx_pending) { - /* make sure it is true for two checks in a row */ - ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED, - &tx_ring->state); - } else if (tx_done_old == tx_done && - (tx_pending < I40E_MIN_DESC_PENDING) && (tx_pending > 0)) { - if (I40E_DEBUG_FLOW & pf->hw.debug_mask) - dev_info(tx_ring->dev, "HW needs some more descs to do a cacheline flush. tx_pending %d, queue %d", - tx_pending, tx_ring->queue_index); - pf->tx_sluggish_count++; - } else { - /* update completed stats and disarm the hang check */ - tx_ring->tx_stats.tx_done_old = tx_done; - clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state); - } - - return ret; -} - #define WB_STRIDE 0x3 /** @@ -785,46 +727,21 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) tx_ring->q_vector->tx.total_bytes += total_bytes; tx_ring->q_vector->tx.total_packets += total_packets; - /* check to see if there are any non-cache aligned descriptors - * waiting to be written back, and kick the hardware to force - * them to be written back in case of napi polling - */ - if (budget && - !((i & WB_STRIDE) == WB_STRIDE) && - !test_bit(__I40E_DOWN, &tx_ring->vsi->state) && - (I40E_DESC_UNUSED(tx_ring) != tx_ring->count)) - tx_ring->arm_wb = true; - else - tx_ring->arm_wb = false; - - if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) { - /* schedule immediate reset if we believe we hung */ - dev_info(tx_ring->dev, "Detected Tx Unit Hang\n" - " VSI <%d>\n" - " Tx Queue <%d>\n" - " next_to_use <%x>\n" - " next_to_clean <%x>\n", - tx_ring->vsi->seid, - tx_ring->queue_index, - tx_ring->next_to_use, i); - dev_info(tx_ring->dev, "tx_bi[next_to_clean]\n" - " time_stamp <%lx>\n" - " jiffies <%lx>\n", - tx_ring->tx_bi[i].time_stamp, jiffies); - - netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); - - dev_info(tx_ring->dev, - "tx hang detected on queue %d, reset requested\n", - tx_ring->queue_index); - - /* do not fire the reset immediately, wait for the stack to - * decide we are truly stuck, also prevents every queue from - * simultaneously requesting a reset + if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) { + unsigned int j = 0; + + /* check to see if there are < 4 descriptors + * waiting to be written back, then kick the hardware to force + * them to be written back in case we stay in NAPI. + * In this mode on X722 we do not enable Interrupt. */ + j = i40e_get_tx_pending(tx_ring); - /* the adapter is about to reset, no point in enabling polling */ - budget = 1; + if (budget && + ((j / (WB_STRIDE + 1)) == 0) && (j != 0) && + !test_bit(__I40E_DOWN, &tx_ring->vsi->state) && + (I40E_DESC_UNUSED(tx_ring) != tx_ring->count)) + tx_ring->arm_wb = true; } netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev, @@ -856,23 +773,50 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) * @q_vector: the vector on which to force writeback * **/ -static void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) +void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) { - u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK | - I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */ - I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK | - I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK; - /* allow 00 to be written to the index */ - - wr32(&vsi->back->hw, - I40E_PFINT_DYN_CTLN(q_vector->v_idx + vsi->base_vector - 1), - val); + u16 flags = q_vector->tx.ring[0].flags; + + if (flags & I40E_TXR_FLAGS_WB_ON_ITR) { + u32 val; + + if (q_vector->arm_wb_state) + return; + + val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK; + + wr32(&vsi->back->hw, + I40E_PFINT_DYN_CTLN(q_vector->v_idx + + vsi->base_vector - 1), + val); + q_vector->arm_wb_state = true; + } else if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { + u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK | + I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */ + I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK | + I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK; + /* allow 00 to be written to the index */ + + wr32(&vsi->back->hw, + I40E_PFINT_DYN_CTLN(q_vector->v_idx + + vsi->base_vector - 1), val); + } else { + u32 val = I40E_PFINT_DYN_CTL0_INTENA_MASK | + I40E_PFINT_DYN_CTL0_ITR_INDX_MASK | /* set noitr */ + I40E_PFINT_DYN_CTL0_SWINT_TRIG_MASK | + I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK; + /* allow 00 to be written to the index */ + + wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val); + } } /** * i40e_set_new_dynamic_itr - Find new ITR level * @rc: structure containing ring performance data * + * Returns true if ITR changed, false if not + * * Stores a new ITR value based on packets and byte counts during * the last interrupt. The advantage of per interrupt computation * is faster updates and more accurate ITR for the current traffic @@ -881,22 +825,33 @@ static void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) * testing data as well as attempting to minimize response time * while increasing bulk throughput. **/ -static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) +static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) { enum i40e_latency_range new_latency_range = rc->latency_range; + struct i40e_q_vector *qv = rc->ring->q_vector; u32 new_itr = rc->itr; int bytes_per_int; + int usecs; if (rc->total_packets == 0 || !rc->itr) - return; + return false; /* simple throttlerate management - * 0-10MB/s lowest (100000 ints/s) + * 0-10MB/s lowest (50000 ints/s) * 10-20MB/s low (20000 ints/s) - * 20-1249MB/s bulk (8000 ints/s) + * 20-1249MB/s bulk (18000 ints/s) + * > 40000 Rx packets per second (8000 ints/s) + * + * The math works out because the divisor is in 10^(-6) which + * turns the bytes/us input value into MB/s values, but + * make sure to use usecs, as the register values written + * are in 2 usec increments in the ITR registers, and make sure + * to use the smoothed values that the countdown timer gives us. */ - bytes_per_int = rc->total_bytes / rc->itr; - switch (rc->itr) { + usecs = (rc->itr << 1) * ITR_COUNTDOWN_START; + bytes_per_int = rc->total_bytes / usecs; + + switch (new_latency_range) { case I40E_LOWEST_LATENCY: if (bytes_per_int > 10) new_latency_range = I40E_LOW_LATENCY; @@ -908,58 +863,52 @@ static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) new_latency_range = I40E_LOWEST_LATENCY; break; case I40E_BULK_LATENCY: + case I40E_ULTRA_LATENCY: + default: if (bytes_per_int <= 20) - rc->latency_range = I40E_LOW_LATENCY; + new_latency_range = I40E_LOW_LATENCY; break; } + /* this is to adjust RX more aggressively when streaming small + * packets. The value of 40000 was picked as it is just beyond + * what the hardware can receive per second if in low latency + * mode. + */ +#define RX_ULTRA_PACKET_RATE 40000 + + if ((((rc->total_packets * 1000000) / usecs) > RX_ULTRA_PACKET_RATE) && + (&qv->rx == rc)) + new_latency_range = I40E_ULTRA_LATENCY; + + rc->latency_range = new_latency_range; + switch (new_latency_range) { case I40E_LOWEST_LATENCY: - new_itr = I40E_ITR_100K; + new_itr = I40E_ITR_50K; break; case I40E_LOW_LATENCY: new_itr = I40E_ITR_20K; break; case I40E_BULK_LATENCY: + new_itr = I40E_ITR_18K; + break; + case I40E_ULTRA_LATENCY: new_itr = I40E_ITR_8K; break; default: break; } - if (new_itr != rc->itr) { - /* do an exponential smoothing */ - new_itr = (10 * new_itr * rc->itr) / - ((9 * new_itr) + rc->itr); - rc->itr = new_itr & I40E_MAX_ITR; - } - rc->total_bytes = 0; rc->total_packets = 0; -} -/** - * i40e_update_dynamic_itr - Adjust ITR based on bytes per int - * @q_vector: the vector to adjust - **/ -static void i40e_update_dynamic_itr(struct i40e_q_vector *q_vector) -{ - u16 vector = q_vector->vsi->base_vector + q_vector->v_idx; - struct i40e_hw *hw = &q_vector->vsi->back->hw; - u32 reg_addr; - u16 old_itr; - - reg_addr = I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1); - old_itr = q_vector->rx.itr; - i40e_set_new_dynamic_itr(&q_vector->rx); - if (old_itr != q_vector->rx.itr) - wr32(hw, reg_addr, q_vector->rx.itr); - - reg_addr = I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1); - old_itr = q_vector->tx.itr; - i40e_set_new_dynamic_itr(&q_vector->tx); - if (old_itr != q_vector->tx.itr) - wr32(hw, reg_addr, q_vector->tx.itr); + if (new_itr != rc->itr) { + rc->itr = new_itr; + return true; + } + + return false; } /** @@ -1005,6 +954,8 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring) if (!dev) return -ENOMEM; + /* warn if we are about to overwrite the pointer */ + WARN_ON(tx_ring->tx_bi); bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count; tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL); if (!tx_ring->tx_bi) @@ -1165,6 +1116,8 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring) struct device *dev = rx_ring->dev; int bi_size; + /* warn if we are about to overwrite the pointer */ + WARN_ON(rx_ring->rx_bi); bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count; rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL); if (!rx_ring->rx_bi) @@ -1345,16 +1298,11 @@ static void i40e_receive_skb(struct i40e_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag) { struct i40e_q_vector *q_vector = rx_ring->q_vector; - struct i40e_vsi *vsi = rx_ring->vsi; - u64 flags = vsi->back->flags; if (vlan_tag & VLAN_VID_MASK) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); - if (flags & I40E_FLAG_IN_NETPOLL) - netif_rx(skb); - else - napi_gro_receive(&q_vector->napi, skb); + napi_gro_receive(&q_vector->napi, skb); } /** @@ -1390,7 +1338,7 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, return; /* did the hardware decode the packet and checksum? */ - if (!(rx_status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT))) + if (!(rx_status & BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT))) return; /* both known and outer_ip must be set for the below code to work */ @@ -1405,25 +1353,25 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, ipv6 = true; if (ipv4 && - (rx_error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) | - (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT)))) + (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) | + BIT(I40E_RX_DESC_ERROR_EIPE_SHIFT)))) goto checksum_fail; /* likely incorrect csum if alternate IP extension headers found */ if (ipv6 && - rx_status & (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) + rx_status & BIT(I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) /* don't increment checksum err here, non-fatal err */ return; /* there was some L4 error, count error and punt packet to the stack */ - if (rx_error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT)) + if (rx_error & BIT(I40E_RX_DESC_ERROR_L4E_SHIFT)) goto checksum_fail; /* handle packets that were not able to be checksummed due * to arrival speed, in this case the stack can compute * the csum. */ - if (rx_error & (1 << I40E_RX_DESC_ERROR_PPRS_SHIFT)) + if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT)) return; /* If VXLAN traffic has an outer UDPv4 checksum we need to check @@ -1432,7 +1380,8 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, * so the total length of IPv4 header is IHL*4 bytes * The UDP_0 bit *may* bet set if the *inner* header is UDP */ - if (ipv4_tunnel) { + if (!(vsi->back->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE) && + (ipv4_tunnel)) { skb->transport_header = skb->mac_header + sizeof(struct ethhdr) + (ip_hdr(skb)->ihl * 4); @@ -1520,7 +1469,7 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) unsigned int total_rx_bytes = 0, total_rx_packets = 0; u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo; u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); - const int current_node = numa_node_id(); + const int current_node = numa_mem_id(); struct i40e_vsi *vsi = rx_ring->vsi; u16 i = rx_ring->next_to_clean; union i40e_rx_desc *rx_desc; @@ -1547,7 +1496,7 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> I40E_RXD_QW1_STATUS_SHIFT; - if (!(rx_status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT))) + if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT))) break; /* This memory barrier is needed to keep us from reading @@ -1588,8 +1537,8 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >> I40E_RXD_QW1_ERROR_SHIFT; - rx_hbo = rx_error & (1 << I40E_RX_DESC_ERROR_HBO_SHIFT); - rx_error &= ~(1 << I40E_RX_DESC_ERROR_HBO_SHIFT); + rx_hbo = rx_error & BIT(I40E_RX_DESC_ERROR_HBO_SHIFT); + rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT); rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT; @@ -1598,6 +1547,7 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) cleaned_count++; if (rx_hbo || rx_sph) { int len; + if (rx_hbo) len = I40E_RX_HDR_SIZE; else @@ -1641,7 +1591,7 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) I40E_RX_INCREMENT(rx_ring, i); if (unlikely( - !(rx_status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)))) { + !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) { struct i40e_rx_buffer *next_buffer; next_buffer = &rx_ring->rx_bi[i]; @@ -1651,11 +1601,8 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) } /* ERR_MASK will only have valid bits if EOP set */ - if (unlikely(rx_error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) { + if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) { dev_kfree_skb_any(skb); - /* TODO: shouldn't we increment a counter indicating the - * drop? - */ continue; } @@ -1676,7 +1623,7 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype); - vlan_tag = rx_status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT) + vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT) ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0; #ifdef I40E_FCOE @@ -1688,7 +1635,6 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) skb_mark_napi_id(skb, &rx_ring->q_vector->napi); i40e_receive_skb(rx_ring, skb, vlan_tag); - rx_ring->netdev->last_rx = jiffies; rx_desc->wb.qword1.status_error_len = 0; } while (likely(total_rx_packets < budget)); @@ -1738,7 +1684,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> I40E_RXD_QW1_STATUS_SHIFT; - if (!(rx_status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT))) + if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT))) break; /* This memory barrier is needed to keep us from reading @@ -1761,7 +1707,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >> I40E_RXD_QW1_ERROR_SHIFT; - rx_error &= ~(1 << I40E_RX_DESC_ERROR_HBO_SHIFT); + rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT); rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT; @@ -1779,17 +1725,14 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) I40E_RX_INCREMENT(rx_ring, i); if (unlikely( - !(rx_status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)))) { + !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) { rx_ring->rx_stats.non_eop_descs++; continue; } /* ERR_MASK will only have valid bits if EOP set */ - if (unlikely(rx_error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) { + if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) { dev_kfree_skb_any(skb); - /* TODO: shouldn't we increment a counter indicating the - * drop? - */ continue; } @@ -1810,7 +1753,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype); - vlan_tag = rx_status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT) + vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT) ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0; #ifdef I40E_FCOE @@ -1821,7 +1764,6 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) #endif i40e_receive_skb(rx_ring, skb, vlan_tag); - rx_ring->netdev->last_rx = jiffies; rx_desc->wb.qword1.status_error_len = 0; } while (likely(total_rx_packets < budget)); @@ -1835,6 +1777,96 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) return total_rx_packets; } +static u32 i40e_buildreg_itr(const int type, const u16 itr) +{ + u32 val; + + val = I40E_PFINT_DYN_CTLN_INTENA_MASK | + I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | + (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) | + (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT); + + return val; +} + +/* a small macro to shorten up some long lines */ +#define INTREG I40E_PFINT_DYN_CTLN + +/** + * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt + * @vsi: the VSI we care about + * @q_vector: q_vector for which itr is being updated and interrupt enabled + * + **/ +static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, + struct i40e_q_vector *q_vector) +{ + struct i40e_hw *hw = &vsi->back->hw; + bool rx = false, tx = false; + u32 rxval, txval; + int vector; + + vector = (q_vector->v_idx + vsi->base_vector); + + /* avoid dynamic calculation if in countdown mode OR if + * all dynamic is disabled + */ + rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); + + if (q_vector->itr_countdown > 0 || + (!ITR_IS_DYNAMIC(vsi->rx_itr_setting) && + !ITR_IS_DYNAMIC(vsi->tx_itr_setting))) { + goto enable_int; + } + + if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) { + rx = i40e_set_new_dynamic_itr(&q_vector->rx); + rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); + } + + if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) { + tx = i40e_set_new_dynamic_itr(&q_vector->tx); + txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); + } + + if (rx || tx) { + /* get the higher of the two ITR adjustments and + * use the same value for both ITR registers + * when in adaptive mode (Rx and/or Tx) + */ + u16 itr = max(q_vector->tx.itr, q_vector->rx.itr); + + q_vector->tx.itr = q_vector->rx.itr = itr; + txval = i40e_buildreg_itr(I40E_TX_ITR, itr); + tx = true; + rxval = i40e_buildreg_itr(I40E_RX_ITR, itr); + rx = true; + } + + /* only need to enable the interrupt once, but need + * to possibly update both ITR values + */ + if (rx) { + /* set the INTENA_MSK_MASK so that this first write + * won't actually enable the interrupt, instead just + * updating the ITR (it's bit 31 PF and VF) + */ + rxval |= BIT(31); + /* don't check _DOWN because interrupt isn't being enabled */ + wr32(hw, INTREG(vector - 1), rxval); + } + +enable_int: + if (!test_bit(__I40E_DOWN, &vsi->state)) + wr32(hw, INTREG(vector - 1), txval); + + if (q_vector->itr_countdown) + q_vector->itr_countdown--; + else + q_vector->itr_countdown = ITR_COUNTDOWN_START; + +} + /** * i40e_napi_poll - NAPI polling Rx/Tx cleanup routine * @napi: napi struct with our devices info in it @@ -1853,7 +1885,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) bool clean_complete = true; bool arm_wb = false; int budget_per_ring; - int cleaned; + int work_done = 0; if (test_bit(__I40E_DOWN, &vsi->state)) { napi_complete(napi); @@ -1866,58 +1898,62 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) i40e_for_each_ring(ring, q_vector->tx) { clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit); arm_wb |= ring->arm_wb; + ring->arm_wb = false; } + /* Handle case where we are called by netpoll with a budget of 0 */ + if (budget <= 0) + goto tx_only; + /* We attempt to distribute budget to each Rx queue fairly, but don't * allow the budget to go below 1 because that would exit polling early. */ budget_per_ring = max(budget/q_vector->num_ringpairs, 1); i40e_for_each_ring(ring, q_vector->rx) { + int cleaned; + if (ring_is_ps_enabled(ring)) cleaned = i40e_clean_rx_irq_ps(ring, budget_per_ring); else cleaned = i40e_clean_rx_irq_1buf(ring, budget_per_ring); + + work_done += cleaned; /* if we didn't clean as many as budgeted, we must be done */ clean_complete &= (budget_per_ring != cleaned); } /* If work not completed, return budget and polling will return */ if (!clean_complete) { +tx_only: if (arm_wb) i40e_force_wb(vsi, q_vector); return budget; } + if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR) + q_vector->arm_wb_state = false; + /* Work is done so exit the polling mode and re-enable the interrupt */ - napi_complete(napi); - if (ITR_IS_DYNAMIC(vsi->rx_itr_setting) || - ITR_IS_DYNAMIC(vsi->tx_itr_setting)) - i40e_update_dynamic_itr(q_vector); - - if (!test_bit(__I40E_DOWN, &vsi->state)) { - if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { - i40e_irq_dynamic_enable(vsi, - q_vector->v_idx + vsi->base_vector); - } else { - struct i40e_hw *hw = &vsi->back->hw; - /* We re-enable the queue 0 cause, but - * don't worry about dynamic_enable - * because we left it on for the other - * possible interrupts during napi - */ - u32 qval = rd32(hw, I40E_QINT_RQCTL(0)); - qval |= I40E_QINT_RQCTL_CAUSE_ENA_MASK; - wr32(hw, I40E_QINT_RQCTL(0), qval); - - qval = rd32(hw, I40E_QINT_TQCTL(0)); - qval |= I40E_QINT_TQCTL_CAUSE_ENA_MASK; - wr32(hw, I40E_QINT_TQCTL(0), qval); - - i40e_irq_dynamic_enable_icr0(vsi->back); - } + napi_complete_done(napi, work_done); + if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { + i40e_update_enable_itr(vsi, q_vector); + } else { /* Legacy mode */ + struct i40e_hw *hw = &vsi->back->hw; + /* We re-enable the queue 0 cause, but + * don't worry about dynamic_enable + * because we left it on for the other + * possible interrupts during napi + */ + u32 qval = rd32(hw, I40E_QINT_RQCTL(0)) | + I40E_QINT_RQCTL_CAUSE_ENA_MASK; + + wr32(hw, I40E_QINT_RQCTL(0), qval); + qval = rd32(hw, I40E_QINT_TQCTL(0)) | + I40E_QINT_TQCTL_CAUSE_ENA_MASK; + wr32(hw, I40E_QINT_TQCTL(0), qval); + i40e_irq_dynamic_enable_icr0(vsi->back); } - return 0; } @@ -1925,11 +1961,11 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) * i40e_atr - Add a Flow Director ATR filter * @tx_ring: ring to add programming descriptor to * @skb: send buffer - * @flags: send flags + * @tx_flags: send tx flags * @protocol: wire protocol **/ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, - u32 flags, __be16 protocol) + u32 tx_flags, __be16 protocol) { struct i40e_filter_program_desc *fdir_desc; struct i40e_pf *pf = tx_ring->vsi->back; @@ -1954,30 +1990,50 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, if (!tx_ring->atr_sample_rate) return; - /* snag network header to get L4 type and address */ - hdr.network = skb_network_header(skb); + if (!(tx_flags & (I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6))) + return; - /* Currently only IPv4/IPv6 with TCP is supported */ - if (protocol == htons(ETH_P_IP)) { - if (hdr.ipv4->protocol != IPPROTO_TCP) - return; + if (!(tx_flags & I40E_TX_FLAGS_VXLAN_TUNNEL)) { + /* snag network header to get L4 type and address */ + hdr.network = skb_network_header(skb); - /* access ihl as a u8 to avoid unaligned access on ia64 */ - hlen = (hdr.network[0] & 0x0F) << 2; - } else if (protocol == htons(ETH_P_IPV6)) { - if (hdr.ipv6->nexthdr != IPPROTO_TCP) + /* Currently only IPv4/IPv6 with TCP is supported + * access ihl as u8 to avoid unaligned access on ia64 + */ + if (tx_flags & I40E_TX_FLAGS_IPV4) + hlen = (hdr.network[0] & 0x0F) << 2; + else if (protocol == htons(ETH_P_IPV6)) + hlen = sizeof(struct ipv6hdr); + else return; - - hlen = sizeof(struct ipv6hdr); } else { - return; + hdr.network = skb_inner_network_header(skb); + hlen = skb_inner_network_header_len(skb); } + /* Currently only IPv4/IPv6 with TCP is supported + * Note: tx_flags gets modified to reflect inner protocols in + * tx_enable_csum function if encap is enabled. + */ + if ((tx_flags & I40E_TX_FLAGS_IPV4) && + (hdr.ipv4->protocol != IPPROTO_TCP)) + return; + else if ((tx_flags & I40E_TX_FLAGS_IPV6) && + (hdr.ipv6->nexthdr != IPPROTO_TCP)) + return; + th = (struct tcphdr *)(hdr.network + hlen); /* Due to lack of space, no more new filters can be programmed */ if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) return; + if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) { + /* HW ATR eviction will take care of removing filters on FIN + * and RST packets. + */ + if (th->fin || th->rst) + return; + } tx_ring->atr_count++; @@ -2022,9 +2078,19 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT; dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK; - dtype_cmd |= - ((u32)pf->fd_atr_cnt_idx << I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & - I40E_TXD_FLTR_QW1_CNTINDEX_MASK; + if (!(tx_flags & I40E_TX_FLAGS_VXLAN_TUNNEL)) + dtype_cmd |= + ((u32)I40E_FD_ATR_STAT_IDX(pf->hw.pf_id) << + I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & + I40E_TXD_FLTR_QW1_CNTINDEX_MASK; + else + dtype_cmd |= + ((u32)I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id) << + I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & + I40E_TXD_FLTR_QW1_CNTINDEX_MASK; + + if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) + dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK; fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype); fdir_desc->rsvd = cpu_to_le32(0); @@ -2045,13 +2111,13 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, * otherwise returns 0 to indicate the flags has been set properly. **/ #ifdef I40E_FCOE -int i40e_tx_prepare_vlan_flags(struct sk_buff *skb, - struct i40e_ring *tx_ring, - u32 *flags) -#else -static int i40e_tx_prepare_vlan_flags(struct sk_buff *skb, +inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb, struct i40e_ring *tx_ring, u32 *flags) +#else +static inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb, + struct i40e_ring *tx_ring, + u32 *flags) #endif { __be16 protocol = skb->protocol; @@ -2077,6 +2143,7 @@ static int i40e_tx_prepare_vlan_flags(struct sk_buff *skb, /* else if it is a SW VLAN, check the next protocol and store the tag */ } else if (protocol == htons(ETH_P_8021Q)) { struct vlan_hdr *vhdr, _vhdr; + vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr); if (!vhdr) return -EINVAL; @@ -2119,16 +2186,15 @@ out: * i40e_tso - set up the tso context descriptor * @tx_ring: ptr to the ring to send * @skb: ptr to the skb we're sending - * @tx_flags: the collected send information - * @protocol: the send protocol * @hdr_len: ptr to the size of the packet header + * @cd_type_cmd_tso_mss: ptr to u64 object * @cd_tunneling: ptr to context descriptor bits * * Returns 0 if no TSO can happen, 1 if tso is going, or error **/ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, - u32 tx_flags, __be16 protocol, u8 *hdr_len, - u64 *cd_type_cmd_tso_mss, u32 *cd_tunneling) + u8 *hdr_len, u64 *cd_type_cmd_tso_mss, + u32 *cd_tunneling) { u32 cd_cmd, cd_tso_len, cd_mss; struct ipv6hdr *ipv6h; @@ -2181,6 +2247,7 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, * @tx_ring: ptr to the ring to send * @skb: ptr to the skb we're sending * @tx_flags: the collected send information + * @cd_type_cmd_tso_mss: ptr to u64 object * * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen **/ @@ -2220,12 +2287,13 @@ static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb, /** * i40e_tx_enable_csum - Enable Tx checksum offloads * @skb: send buffer - * @tx_flags: Tx flags currently set + * @tx_flags: pointer to Tx flags currently set * @td_cmd: Tx descriptor command bits to set * @td_offset: Tx descriptor header offsets to set + * @tx_ring: Tx descriptor ring * @cd_tunneling: ptr to context desc bits **/ -static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags, +static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, u32 *td_cmd, u32 *td_offset, struct i40e_ring *tx_ring, u32 *cd_tunneling) @@ -2235,12 +2303,20 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags, struct iphdr *this_ip_hdr; u32 network_hdr_len; u8 l4_hdr = 0; + struct udphdr *oudph; + struct iphdr *oiph; u32 l4_tunnel = 0; if (skb->encapsulation) { switch (ip_hdr(skb)->protocol) { case IPPROTO_UDP: + oudph = udp_hdr(skb); + oiph = ip_hdr(skb); l4_tunnel = I40E_TXD_CTX_UDP_TUNNELING; + *tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL; + break; + case IPPROTO_GRE: + l4_tunnel = I40E_TXD_CTX_GRE_TUNNELING; break; default: return; @@ -2250,18 +2326,17 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags, this_ipv6_hdr = inner_ipv6_hdr(skb); this_tcp_hdrlen = inner_tcp_hdrlen(skb); - if (tx_flags & I40E_TX_FLAGS_IPV4) { - - if (tx_flags & I40E_TX_FLAGS_TSO) { + if (*tx_flags & I40E_TX_FLAGS_IPV4) { + if (*tx_flags & I40E_TX_FLAGS_TSO) { *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4; ip_hdr(skb)->check = 0; } else { *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; } - } else if (tx_flags & I40E_TX_FLAGS_IPV6) { + } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6; - if (tx_flags & I40E_TX_FLAGS_TSO) + if (*tx_flags & I40E_TX_FLAGS_TSO) ip_hdr(skb)->check = 0; } @@ -2273,8 +2348,17 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags, skb_transport_offset(skb)) >> 1) << I40E_TXD_CTX_QW0_NATLEN_SHIFT; if (this_ip_hdr->version == 6) { - tx_flags &= ~I40E_TX_FLAGS_IPV4; - tx_flags |= I40E_TX_FLAGS_IPV6; + *tx_flags &= ~I40E_TX_FLAGS_IPV4; + *tx_flags |= I40E_TX_FLAGS_IPV6; + } + if ((tx_ring->flags & I40E_TXR_FLAGS_OUTER_UDP_CSUM) && + (l4_tunnel == I40E_TXD_CTX_UDP_TUNNELING) && + (*cd_tunneling & I40E_TXD_CTX_QW0_EXT_IP_MASK)) { + oudph->check = ~csum_tcpudp_magic(oiph->saddr, + oiph->daddr, + (skb->len - skb_transport_offset(skb)), + IPPROTO_UDP, 0); + *cd_tunneling |= I40E_TXD_CTX_QW0_L4T_CS_MASK; } } else { network_hdr_len = skb_network_header_len(skb); @@ -2284,12 +2368,12 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags, } /* Enable IP checksum offloads */ - if (tx_flags & I40E_TX_FLAGS_IPV4) { + if (*tx_flags & I40E_TX_FLAGS_IPV4) { l4_hdr = this_ip_hdr->protocol; /* the stack computes the IP header already, the only time we * need the hardware to recompute it is in the case of TSO. */ - if (tx_flags & I40E_TX_FLAGS_TSO) { + if (*tx_flags & I40E_TX_FLAGS_TSO) { *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; this_ip_hdr->check = 0; } else { @@ -2298,7 +2382,7 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags, /* Now set the td_offset for IP header length */ *td_offset = (network_hdr_len >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; - } else if (tx_flags & I40E_TX_FLAGS_IPV6) { + } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { l4_hdr = this_ipv6_hdr->nexthdr; *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; /* Now set the td_offset for IP header length */ @@ -2396,9 +2480,9 @@ static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) * Returns 0 if stop is not needed **/ #ifdef I40E_FCOE -int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) +inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) #else -static int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) +static inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) #endif { if (likely(I40E_DESC_UNUSED(tx_ring) >= size)) @@ -2473,13 +2557,13 @@ linearize_chk_done: * @td_offset: offset for checksum or crc **/ #ifdef I40E_FCOE -void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, - struct i40e_tx_buffer *first, u32 tx_flags, - const u8 hdr_len, u32 td_cmd, u32 td_offset) -#else -static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, +inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, struct i40e_tx_buffer *first, u32 tx_flags, const u8 hdr_len, u32 td_cmd, u32 td_offset) +#else +static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, + struct i40e_tx_buffer *first, u32 tx_flags, + const u8 hdr_len, u32 td_cmd, u32 td_offset) #endif { unsigned int data_len = skb->data_len; @@ -2491,6 +2575,9 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, u32 td_tag = 0; dma_addr_t dma; u16 gso_segs; + u16 desc_count = 0; + bool tail_bump = true; + bool do_rs = false; if (tx_flags & I40E_TX_FLAGS_HW_VLAN) { td_cmd |= I40E_TX_DESC_CMD_IL2TAG1; @@ -2531,6 +2618,8 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_desc++; i++; + desc_count++; + if (i == tx_ring->count) { tx_desc = I40E_TX_DESC(tx_ring, 0); i = 0; @@ -2550,6 +2639,8 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_desc++; i++; + desc_count++; + if (i == tx_ring->count) { tx_desc = I40E_TX_DESC(tx_ring, 0); i = 0; @@ -2564,37 +2655,6 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_bi = &tx_ring->tx_bi[i]; } - /* Place RS bit on last descriptor of any packet that spans across the - * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline. - */ - if (((i & WB_STRIDE) != WB_STRIDE) && - (first <= &tx_ring->tx_bi[i]) && - (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) { - tx_desc->cmd_type_offset_bsz = - build_ctob(td_cmd, td_offset, size, td_tag) | - cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP << - I40E_TXD_QW1_CMD_SHIFT); - } else { - tx_desc->cmd_type_offset_bsz = - build_ctob(td_cmd, td_offset, size, td_tag) | - cpu_to_le64((u64)I40E_TXD_CMD << - I40E_TXD_QW1_CMD_SHIFT); - } - - netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index), - first->bytecount); - - /* set the timestamp */ - first->time_stamp = jiffies; - - /* Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. (Only - * applicable for weak-ordered memory model archs, - * such as IA-64). - */ - wmb(); - /* set next_to_watch value indicating a packet is present */ first->next_to_watch = tx_desc; @@ -2604,12 +2664,71 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_ring->next_to_use = i; + netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->queue_index), + first->bytecount); i40e_maybe_stop_tx(tx_ring, DESC_NEEDED); + + /* Algorithm to optimize tail and RS bit setting: + * if xmit_more is supported + * if xmit_more is true + * do not update tail and do not mark RS bit. + * if xmit_more is false and last xmit_more was false + * if every packet spanned less than 4 desc + * then set RS bit on 4th packet and update tail + * on every packet + * else + * update tail and set RS bit on every packet. + * if xmit_more is false and last_xmit_more was true + * update tail and set RS bit. + * + * Optimization: wmb to be issued only in case of tail update. + * Also optimize the Descriptor WB path for RS bit with the same + * algorithm. + * + * Note: If there are less than 4 packets + * pending and interrupts were disabled the service task will + * trigger a force WB. + */ + if (skb->xmit_more && + !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->queue_index))) { + tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; + tail_bump = false; + } else if (!skb->xmit_more && + !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->queue_index)) && + (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) && + (tx_ring->packet_stride < WB_STRIDE) && + (desc_count < WB_STRIDE)) { + tx_ring->packet_stride++; + } else { + tx_ring->packet_stride = 0; + tx_ring->flags &= ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; + do_rs = true; + } + if (do_rs) + tx_ring->packet_stride = 0; + + tx_desc->cmd_type_offset_bsz = + build_ctob(td_cmd, td_offset, size, td_tag) | + cpu_to_le64((u64)(do_rs ? I40E_TXD_CMD : + I40E_TX_DESC_CMD_EOP) << + I40E_TXD_QW1_CMD_SHIFT); + /* notify HW of packet */ - if (!skb->xmit_more || - netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index))) + if (!tail_bump) + prefetchw(tx_desc + 1); + + if (tail_bump) { + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only + * applicable for weak-ordered memory model archs, + * such as IA-64). + */ + wmb(); writel(i, tx_ring->tail); + } return; @@ -2640,11 +2759,11 @@ dma_error: * one descriptor. **/ #ifdef I40E_FCOE -int i40e_xmit_descriptor_count(struct sk_buff *skb, - struct i40e_ring *tx_ring) -#else -static int i40e_xmit_descriptor_count(struct sk_buff *skb, +inline int i40e_xmit_descriptor_count(struct sk_buff *skb, struct i40e_ring *tx_ring) +#else +static inline int i40e_xmit_descriptor_count(struct sk_buff *skb, + struct i40e_ring *tx_ring) #endif { unsigned int f; @@ -2687,6 +2806,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, u8 hdr_len = 0; int tsyn; int tso; + if (0 == i40e_xmit_descriptor_count(skb, tx_ring)) return NETDEV_TX_BUSY; @@ -2706,7 +2826,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, else if (protocol == htons(ETH_P_IPV6)) tx_flags |= I40E_TX_FLAGS_IPV6; - tso = i40e_tso(tx_ring, skb, tx_flags, protocol, &hdr_len, + tso = i40e_tso(tx_ring, skb, &hdr_len, &cd_type_cmd_tso_mss, &cd_tunneling); if (tso < 0) @@ -2719,10 +2839,11 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, if (tsyn) tx_flags |= I40E_TX_FLAGS_TSYN; - if (i40e_chk_linearize(skb, tx_flags)) + if (i40e_chk_linearize(skb, tx_flags)) { if (skb_linearize(skb)) goto out_drop; - + tx_ring->tx_stats.tx_linearize++; + } skb_tx_timestamp(skb); /* always enable CRC insertion offload */ @@ -2732,7 +2853,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, if (skb->ip_summed == CHECKSUM_PARTIAL) { tx_flags |= I40E_TX_FLAGS_CSUM; - i40e_tx_enable_csum(skb, tx_flags, &td_cmd, &td_offset, + i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset, tx_ring, &cd_tunneling); } -- cgit 1.2.3-korg