summaryrefslogtreecommitdiffstats
path: root/kernel/drivers/net/ethernet/intel/i40e/i40e_txrx.c
diff options
context:
space:
mode:
authorJosé Pekkarinen <jose.pekkarinen@nokia.com>2016-04-11 10:41:07 +0300
committerJosé Pekkarinen <jose.pekkarinen@nokia.com>2016-04-13 08:17:18 +0300
commite09b41010ba33a20a87472ee821fa407a5b8da36 (patch)
treed10dc367189862e7ca5c592f033dc3726e1df4e3 /kernel/drivers/net/ethernet/intel/i40e/i40e_txrx.c
parentf93b97fd65072de626c074dbe099a1fff05ce060 (diff)
These changes are the raw update to linux-4.4.6-rt14. Kernel sources
are taken from kernel.org, and rt patch from the rt wiki download page. During the rebasing, the following patch collided: Force tick interrupt and get rid of softirq magic(I70131fb85). Collisions have been removed because its logic was found on the source already. Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769 Signed-off-by: José Pekkarinen <jose.pekkarinen@nokia.com>
Diffstat (limited to 'kernel/drivers/net/ethernet/intel/i40e/i40e_txrx.c')
-rw-r--r--kernel/drivers/net/ethernet/intel/i40e/i40e_txrx.c729
1 files changed, 425 insertions, 304 deletions
diff --git a/kernel/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/kernel/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 9d95042d5..635b3ac17 100644
--- a/kernel/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/kernel/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -165,9 +165,6 @@ int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet,
tx_desc->cmd_type_offset_bsz =
build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0);
- /* set the timestamp */
- tx_buf->time_stamp = jiffies;
-
/* Force memory writes to complete before letting h/w
* know there are new descriptors to fetch.
*/
@@ -283,7 +280,8 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
if (add) {
pf->fd_tcp_rule++;
if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) {
- dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
+ if (I40E_DEBUG_FD & pf->hw.debug_mask)
+ dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
}
} else {
@@ -291,7 +289,8 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
(pf->fd_tcp_rule - 1) : 0;
if (pf->fd_tcp_rule == 0) {
pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
- dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n");
+ if (I40E_DEBUG_FD & pf->hw.debug_mask)
+ dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n");
}
}
@@ -465,11 +464,12 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
- if (error == (0x1 << I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
+ if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
+ pf->fd_inv = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fd_id);
if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) ||
(I40E_DEBUG_FD & pf->hw.debug_mask))
dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
- rx_desc->wb.qword0.hi_dword.fd_id);
+ pf->fd_inv);
/* Check if the programming error is for ATR.
* If so, auto disable ATR and set a state for
@@ -501,7 +501,8 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
!(pf->auto_disable_flags &
I40E_FLAG_FD_SB_ENABLED)) {
- dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n");
+ if (I40E_DEBUG_FD & pf->hw.debug_mask)
+ dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n");
pf->auto_disable_flags |=
I40E_FLAG_FD_SB_ENABLED;
}
@@ -509,8 +510,7 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
dev_info(&pdev->dev,
"FD filter programming failed due to incorrect filter parameters\n");
}
- } else if (error ==
- (0x1 << I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
+ } else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
if (I40E_DEBUG_FD & pf->hw.debug_mask)
dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n",
rx_desc->wb.qword0.hi_dword.fd_id);
@@ -602,27 +602,13 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring)
}
/**
- * i40e_get_head - Retrieve head from head writeback
- * @tx_ring: tx ring to fetch head of
- *
- * Returns value of Tx ring head based on value stored
- * in head write-back location
- **/
-static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
-{
- void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
-
- return le32_to_cpu(*(volatile __le32 *)head);
-}
-
-/**
* i40e_get_tx_pending - how many tx descriptors not processed
* @tx_ring: the ring of descriptors
*
* Since there is no access to the ring head register
* in XL710, we need to use our local copies
**/
-static u32 i40e_get_tx_pending(struct i40e_ring *ring)
+u32 i40e_get_tx_pending(struct i40e_ring *ring)
{
u32 head, tail;
@@ -636,50 +622,6 @@ static u32 i40e_get_tx_pending(struct i40e_ring *ring)
return 0;
}
-/**
- * i40e_check_tx_hang - Is there a hang in the Tx queue
- * @tx_ring: the ring of descriptors
- **/
-static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
-{
- u32 tx_done = tx_ring->stats.packets;
- u32 tx_done_old = tx_ring->tx_stats.tx_done_old;
- u32 tx_pending = i40e_get_tx_pending(tx_ring);
- struct i40e_pf *pf = tx_ring->vsi->back;
- bool ret = false;
-
- clear_check_for_tx_hang(tx_ring);
-
- /* Check for a hung queue, but be thorough. This verifies
- * that a transmit has been completed since the previous
- * check AND there is at least one packet pending. The
- * ARMED bit is set to indicate a potential hang. The
- * bit is cleared if a pause frame is received to remove
- * false hang detection due to PFC or 802.3x frames. By
- * requiring this to fail twice we avoid races with
- * PFC clearing the ARMED bit and conditions where we
- * run the check_tx_hang logic with a transmit completion
- * pending but without time to complete it yet.
- */
- if ((tx_done_old == tx_done) && tx_pending) {
- /* make sure it is true for two checks in a row */
- ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED,
- &tx_ring->state);
- } else if (tx_done_old == tx_done &&
- (tx_pending < I40E_MIN_DESC_PENDING) && (tx_pending > 0)) {
- if (I40E_DEBUG_FLOW & pf->hw.debug_mask)
- dev_info(tx_ring->dev, "HW needs some more descs to do a cacheline flush. tx_pending %d, queue %d",
- tx_pending, tx_ring->queue_index);
- pf->tx_sluggish_count++;
- } else {
- /* update completed stats and disarm the hang check */
- tx_ring->tx_stats.tx_done_old = tx_done;
- clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state);
- }
-
- return ret;
-}
-
#define WB_STRIDE 0x3
/**
@@ -785,46 +727,21 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
tx_ring->q_vector->tx.total_bytes += total_bytes;
tx_ring->q_vector->tx.total_packets += total_packets;
- /* check to see if there are any non-cache aligned descriptors
- * waiting to be written back, and kick the hardware to force
- * them to be written back in case of napi polling
- */
- if (budget &&
- !((i & WB_STRIDE) == WB_STRIDE) &&
- !test_bit(__I40E_DOWN, &tx_ring->vsi->state) &&
- (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
- tx_ring->arm_wb = true;
- else
- tx_ring->arm_wb = false;
-
- if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) {
- /* schedule immediate reset if we believe we hung */
- dev_info(tx_ring->dev, "Detected Tx Unit Hang\n"
- " VSI <%d>\n"
- " Tx Queue <%d>\n"
- " next_to_use <%x>\n"
- " next_to_clean <%x>\n",
- tx_ring->vsi->seid,
- tx_ring->queue_index,
- tx_ring->next_to_use, i);
- dev_info(tx_ring->dev, "tx_bi[next_to_clean]\n"
- " time_stamp <%lx>\n"
- " jiffies <%lx>\n",
- tx_ring->tx_bi[i].time_stamp, jiffies);
-
- netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
-
- dev_info(tx_ring->dev,
- "tx hang detected on queue %d, reset requested\n",
- tx_ring->queue_index);
-
- /* do not fire the reset immediately, wait for the stack to
- * decide we are truly stuck, also prevents every queue from
- * simultaneously requesting a reset
+ if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) {
+ unsigned int j = 0;
+
+ /* check to see if there are < 4 descriptors
+ * waiting to be written back, then kick the hardware to force
+ * them to be written back in case we stay in NAPI.
+ * In this mode on X722 we do not enable Interrupt.
*/
+ j = i40e_get_tx_pending(tx_ring);
- /* the adapter is about to reset, no point in enabling polling */
- budget = 1;
+ if (budget &&
+ ((j / (WB_STRIDE + 1)) == 0) && (j != 0) &&
+ !test_bit(__I40E_DOWN, &tx_ring->vsi->state) &&
+ (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
+ tx_ring->arm_wb = true;
}
netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
@@ -856,23 +773,50 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
* @q_vector: the vector on which to force writeback
*
**/
-static void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
+void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
{
- u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
- I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */
- I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
- I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK;
- /* allow 00 to be written to the index */
-
- wr32(&vsi->back->hw,
- I40E_PFINT_DYN_CTLN(q_vector->v_idx + vsi->base_vector - 1),
- val);
+ u16 flags = q_vector->tx.ring[0].flags;
+
+ if (flags & I40E_TXR_FLAGS_WB_ON_ITR) {
+ u32 val;
+
+ if (q_vector->arm_wb_state)
+ return;
+
+ val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK;
+
+ wr32(&vsi->back->hw,
+ I40E_PFINT_DYN_CTLN(q_vector->v_idx +
+ vsi->base_vector - 1),
+ val);
+ q_vector->arm_wb_state = true;
+ } else if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
+ u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
+ I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */
+ I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
+ I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK;
+ /* allow 00 to be written to the index */
+
+ wr32(&vsi->back->hw,
+ I40E_PFINT_DYN_CTLN(q_vector->v_idx +
+ vsi->base_vector - 1), val);
+ } else {
+ u32 val = I40E_PFINT_DYN_CTL0_INTENA_MASK |
+ I40E_PFINT_DYN_CTL0_ITR_INDX_MASK | /* set noitr */
+ I40E_PFINT_DYN_CTL0_SWINT_TRIG_MASK |
+ I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK;
+ /* allow 00 to be written to the index */
+
+ wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val);
+ }
}
/**
* i40e_set_new_dynamic_itr - Find new ITR level
* @rc: structure containing ring performance data
*
+ * Returns true if ITR changed, false if not
+ *
* Stores a new ITR value based on packets and byte counts during
* the last interrupt. The advantage of per interrupt computation
* is faster updates and more accurate ITR for the current traffic
@@ -881,22 +825,33 @@ static void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
* testing data as well as attempting to minimize response time
* while increasing bulk throughput.
**/
-static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
+static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
{
enum i40e_latency_range new_latency_range = rc->latency_range;
+ struct i40e_q_vector *qv = rc->ring->q_vector;
u32 new_itr = rc->itr;
int bytes_per_int;
+ int usecs;
if (rc->total_packets == 0 || !rc->itr)
- return;
+ return false;
/* simple throttlerate management
- * 0-10MB/s lowest (100000 ints/s)
+ * 0-10MB/s lowest (50000 ints/s)
* 10-20MB/s low (20000 ints/s)
- * 20-1249MB/s bulk (8000 ints/s)
+ * 20-1249MB/s bulk (18000 ints/s)
+ * > 40000 Rx packets per second (8000 ints/s)
+ *
+ * The math works out because the divisor is in 10^(-6) which
+ * turns the bytes/us input value into MB/s values, but
+ * make sure to use usecs, as the register values written
+ * are in 2 usec increments in the ITR registers, and make sure
+ * to use the smoothed values that the countdown timer gives us.
*/
- bytes_per_int = rc->total_bytes / rc->itr;
- switch (rc->itr) {
+ usecs = (rc->itr << 1) * ITR_COUNTDOWN_START;
+ bytes_per_int = rc->total_bytes / usecs;
+
+ switch (new_latency_range) {
case I40E_LOWEST_LATENCY:
if (bytes_per_int > 10)
new_latency_range = I40E_LOW_LATENCY;
@@ -908,58 +863,52 @@ static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
new_latency_range = I40E_LOWEST_LATENCY;
break;
case I40E_BULK_LATENCY:
+ case I40E_ULTRA_LATENCY:
+ default:
if (bytes_per_int <= 20)
- rc->latency_range = I40E_LOW_LATENCY;
+ new_latency_range = I40E_LOW_LATENCY;
break;
}
+ /* this is to adjust RX more aggressively when streaming small
+ * packets. The value of 40000 was picked as it is just beyond
+ * what the hardware can receive per second if in low latency
+ * mode.
+ */
+#define RX_ULTRA_PACKET_RATE 40000
+
+ if ((((rc->total_packets * 1000000) / usecs) > RX_ULTRA_PACKET_RATE) &&
+ (&qv->rx == rc))
+ new_latency_range = I40E_ULTRA_LATENCY;
+
+ rc->latency_range = new_latency_range;
+
switch (new_latency_range) {
case I40E_LOWEST_LATENCY:
- new_itr = I40E_ITR_100K;
+ new_itr = I40E_ITR_50K;
break;
case I40E_LOW_LATENCY:
new_itr = I40E_ITR_20K;
break;
case I40E_BULK_LATENCY:
+ new_itr = I40E_ITR_18K;
+ break;
+ case I40E_ULTRA_LATENCY:
new_itr = I40E_ITR_8K;
break;
default:
break;
}
- if (new_itr != rc->itr) {
- /* do an exponential smoothing */
- new_itr = (10 * new_itr * rc->itr) /
- ((9 * new_itr) + rc->itr);
- rc->itr = new_itr & I40E_MAX_ITR;
- }
-
rc->total_bytes = 0;
rc->total_packets = 0;
-}
-/**
- * i40e_update_dynamic_itr - Adjust ITR based on bytes per int
- * @q_vector: the vector to adjust
- **/
-static void i40e_update_dynamic_itr(struct i40e_q_vector *q_vector)
-{
- u16 vector = q_vector->vsi->base_vector + q_vector->v_idx;
- struct i40e_hw *hw = &q_vector->vsi->back->hw;
- u32 reg_addr;
- u16 old_itr;
-
- reg_addr = I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1);
- old_itr = q_vector->rx.itr;
- i40e_set_new_dynamic_itr(&q_vector->rx);
- if (old_itr != q_vector->rx.itr)
- wr32(hw, reg_addr, q_vector->rx.itr);
-
- reg_addr = I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1);
- old_itr = q_vector->tx.itr;
- i40e_set_new_dynamic_itr(&q_vector->tx);
- if (old_itr != q_vector->tx.itr)
- wr32(hw, reg_addr, q_vector->tx.itr);
+ if (new_itr != rc->itr) {
+ rc->itr = new_itr;
+ return true;
+ }
+
+ return false;
}
/**
@@ -1005,6 +954,8 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
if (!dev)
return -ENOMEM;
+ /* warn if we are about to overwrite the pointer */
+ WARN_ON(tx_ring->tx_bi);
bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
if (!tx_ring->tx_bi)
@@ -1165,6 +1116,8 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
struct device *dev = rx_ring->dev;
int bi_size;
+ /* warn if we are about to overwrite the pointer */
+ WARN_ON(rx_ring->rx_bi);
bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
if (!rx_ring->rx_bi)
@@ -1345,16 +1298,11 @@ static void i40e_receive_skb(struct i40e_ring *rx_ring,
struct sk_buff *skb, u16 vlan_tag)
{
struct i40e_q_vector *q_vector = rx_ring->q_vector;
- struct i40e_vsi *vsi = rx_ring->vsi;
- u64 flags = vsi->back->flags;
if (vlan_tag & VLAN_VID_MASK)
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
- if (flags & I40E_FLAG_IN_NETPOLL)
- netif_rx(skb);
- else
- napi_gro_receive(&q_vector->napi, skb);
+ napi_gro_receive(&q_vector->napi, skb);
}
/**
@@ -1390,7 +1338,7 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
return;
/* did the hardware decode the packet and checksum? */
- if (!(rx_status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
+ if (!(rx_status & BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
return;
/* both known and outer_ip must be set for the below code to work */
@@ -1405,25 +1353,25 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
ipv6 = true;
if (ipv4 &&
- (rx_error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
- (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT))))
+ (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) |
+ BIT(I40E_RX_DESC_ERROR_EIPE_SHIFT))))
goto checksum_fail;
/* likely incorrect csum if alternate IP extension headers found */
if (ipv6 &&
- rx_status & (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
+ rx_status & BIT(I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
/* don't increment checksum err here, non-fatal err */
return;
/* there was some L4 error, count error and punt packet to the stack */
- if (rx_error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))
+ if (rx_error & BIT(I40E_RX_DESC_ERROR_L4E_SHIFT))
goto checksum_fail;
/* handle packets that were not able to be checksummed due
* to arrival speed, in this case the stack can compute
* the csum.
*/
- if (rx_error & (1 << I40E_RX_DESC_ERROR_PPRS_SHIFT))
+ if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT))
return;
/* If VXLAN traffic has an outer UDPv4 checksum we need to check
@@ -1432,7 +1380,8 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
* so the total length of IPv4 header is IHL*4 bytes
* The UDP_0 bit *may* bet set if the *inner* header is UDP
*/
- if (ipv4_tunnel) {
+ if (!(vsi->back->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE) &&
+ (ipv4_tunnel)) {
skb->transport_header = skb->mac_header +
sizeof(struct ethhdr) +
(ip_hdr(skb)->ihl * 4);
@@ -1520,7 +1469,7 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo;
u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
- const int current_node = numa_node_id();
+ const int current_node = numa_mem_id();
struct i40e_vsi *vsi = rx_ring->vsi;
u16 i = rx_ring->next_to_clean;
union i40e_rx_desc *rx_desc;
@@ -1547,7 +1496,7 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
I40E_RXD_QW1_STATUS_SHIFT;
- if (!(rx_status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)))
+ if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
break;
/* This memory barrier is needed to keep us from reading
@@ -1588,8 +1537,8 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
I40E_RXD_QW1_ERROR_SHIFT;
- rx_hbo = rx_error & (1 << I40E_RX_DESC_ERROR_HBO_SHIFT);
- rx_error &= ~(1 << I40E_RX_DESC_ERROR_HBO_SHIFT);
+ rx_hbo = rx_error & BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
+ rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
I40E_RXD_QW1_PTYPE_SHIFT;
@@ -1598,6 +1547,7 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
cleaned_count++;
if (rx_hbo || rx_sph) {
int len;
+
if (rx_hbo)
len = I40E_RX_HDR_SIZE;
else
@@ -1641,7 +1591,7 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
I40E_RX_INCREMENT(rx_ring, i);
if (unlikely(
- !(rx_status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
+ !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
struct i40e_rx_buffer *next_buffer;
next_buffer = &rx_ring->rx_bi[i];
@@ -1651,11 +1601,8 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
}
/* ERR_MASK will only have valid bits if EOP set */
- if (unlikely(rx_error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
+ if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
dev_kfree_skb_any(skb);
- /* TODO: shouldn't we increment a counter indicating the
- * drop?
- */
continue;
}
@@ -1676,7 +1623,7 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
- vlan_tag = rx_status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
+ vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
: 0;
#ifdef I40E_FCOE
@@ -1688,7 +1635,6 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
skb_mark_napi_id(skb, &rx_ring->q_vector->napi);
i40e_receive_skb(rx_ring, skb, vlan_tag);
- rx_ring->netdev->last_rx = jiffies;
rx_desc->wb.qword1.status_error_len = 0;
} while (likely(total_rx_packets < budget));
@@ -1738,7 +1684,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
I40E_RXD_QW1_STATUS_SHIFT;
- if (!(rx_status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)))
+ if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
break;
/* This memory barrier is needed to keep us from reading
@@ -1761,7 +1707,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
I40E_RXD_QW1_ERROR_SHIFT;
- rx_error &= ~(1 << I40E_RX_DESC_ERROR_HBO_SHIFT);
+ rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
I40E_RXD_QW1_PTYPE_SHIFT;
@@ -1779,17 +1725,14 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
I40E_RX_INCREMENT(rx_ring, i);
if (unlikely(
- !(rx_status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
+ !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
rx_ring->rx_stats.non_eop_descs++;
continue;
}
/* ERR_MASK will only have valid bits if EOP set */
- if (unlikely(rx_error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
+ if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
dev_kfree_skb_any(skb);
- /* TODO: shouldn't we increment a counter indicating the
- * drop?
- */
continue;
}
@@ -1810,7 +1753,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
- vlan_tag = rx_status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
+ vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
: 0;
#ifdef I40E_FCOE
@@ -1821,7 +1764,6 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
#endif
i40e_receive_skb(rx_ring, skb, vlan_tag);
- rx_ring->netdev->last_rx = jiffies;
rx_desc->wb.qword1.status_error_len = 0;
} while (likely(total_rx_packets < budget));
@@ -1835,6 +1777,96 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
return total_rx_packets;
}
+static u32 i40e_buildreg_itr(const int type, const u16 itr)
+{
+ u32 val;
+
+ val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
+ I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
+ (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
+ (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
+
+ return val;
+}
+
+/* a small macro to shorten up some long lines */
+#define INTREG I40E_PFINT_DYN_CTLN
+
+/**
+ * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
+ * @vsi: the VSI we care about
+ * @q_vector: q_vector for which itr is being updated and interrupt enabled
+ *
+ **/
+static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
+ struct i40e_q_vector *q_vector)
+{
+ struct i40e_hw *hw = &vsi->back->hw;
+ bool rx = false, tx = false;
+ u32 rxval, txval;
+ int vector;
+
+ vector = (q_vector->v_idx + vsi->base_vector);
+
+ /* avoid dynamic calculation if in countdown mode OR if
+ * all dynamic is disabled
+ */
+ rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
+
+ if (q_vector->itr_countdown > 0 ||
+ (!ITR_IS_DYNAMIC(vsi->rx_itr_setting) &&
+ !ITR_IS_DYNAMIC(vsi->tx_itr_setting))) {
+ goto enable_int;
+ }
+
+ if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) {
+ rx = i40e_set_new_dynamic_itr(&q_vector->rx);
+ rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
+ }
+
+ if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) {
+ tx = i40e_set_new_dynamic_itr(&q_vector->tx);
+ txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
+ }
+
+ if (rx || tx) {
+ /* get the higher of the two ITR adjustments and
+ * use the same value for both ITR registers
+ * when in adaptive mode (Rx and/or Tx)
+ */
+ u16 itr = max(q_vector->tx.itr, q_vector->rx.itr);
+
+ q_vector->tx.itr = q_vector->rx.itr = itr;
+ txval = i40e_buildreg_itr(I40E_TX_ITR, itr);
+ tx = true;
+ rxval = i40e_buildreg_itr(I40E_RX_ITR, itr);
+ rx = true;
+ }
+
+ /* only need to enable the interrupt once, but need
+ * to possibly update both ITR values
+ */
+ if (rx) {
+ /* set the INTENA_MSK_MASK so that this first write
+ * won't actually enable the interrupt, instead just
+ * updating the ITR (it's bit 31 PF and VF)
+ */
+ rxval |= BIT(31);
+ /* don't check _DOWN because interrupt isn't being enabled */
+ wr32(hw, INTREG(vector - 1), rxval);
+ }
+
+enable_int:
+ if (!test_bit(__I40E_DOWN, &vsi->state))
+ wr32(hw, INTREG(vector - 1), txval);
+
+ if (q_vector->itr_countdown)
+ q_vector->itr_countdown--;
+ else
+ q_vector->itr_countdown = ITR_COUNTDOWN_START;
+
+}
+
/**
* i40e_napi_poll - NAPI polling Rx/Tx cleanup routine
* @napi: napi struct with our devices info in it
@@ -1853,7 +1885,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
bool clean_complete = true;
bool arm_wb = false;
int budget_per_ring;
- int cleaned;
+ int work_done = 0;
if (test_bit(__I40E_DOWN, &vsi->state)) {
napi_complete(napi);
@@ -1866,58 +1898,62 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
i40e_for_each_ring(ring, q_vector->tx) {
clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit);
arm_wb |= ring->arm_wb;
+ ring->arm_wb = false;
}
+ /* Handle case where we are called by netpoll with a budget of 0 */
+ if (budget <= 0)
+ goto tx_only;
+
/* We attempt to distribute budget to each Rx queue fairly, but don't
* allow the budget to go below 1 because that would exit polling early.
*/
budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
i40e_for_each_ring(ring, q_vector->rx) {
+ int cleaned;
+
if (ring_is_ps_enabled(ring))
cleaned = i40e_clean_rx_irq_ps(ring, budget_per_ring);
else
cleaned = i40e_clean_rx_irq_1buf(ring, budget_per_ring);
+
+ work_done += cleaned;
/* if we didn't clean as many as budgeted, we must be done */
clean_complete &= (budget_per_ring != cleaned);
}
/* If work not completed, return budget and polling will return */
if (!clean_complete) {
+tx_only:
if (arm_wb)
i40e_force_wb(vsi, q_vector);
return budget;
}
+ if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR)
+ q_vector->arm_wb_state = false;
+
/* Work is done so exit the polling mode and re-enable the interrupt */
- napi_complete(napi);
- if (ITR_IS_DYNAMIC(vsi->rx_itr_setting) ||
- ITR_IS_DYNAMIC(vsi->tx_itr_setting))
- i40e_update_dynamic_itr(q_vector);
-
- if (!test_bit(__I40E_DOWN, &vsi->state)) {
- if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
- i40e_irq_dynamic_enable(vsi,
- q_vector->v_idx + vsi->base_vector);
- } else {
- struct i40e_hw *hw = &vsi->back->hw;
- /* We re-enable the queue 0 cause, but
- * don't worry about dynamic_enable
- * because we left it on for the other
- * possible interrupts during napi
- */
- u32 qval = rd32(hw, I40E_QINT_RQCTL(0));
- qval |= I40E_QINT_RQCTL_CAUSE_ENA_MASK;
- wr32(hw, I40E_QINT_RQCTL(0), qval);
-
- qval = rd32(hw, I40E_QINT_TQCTL(0));
- qval |= I40E_QINT_TQCTL_CAUSE_ENA_MASK;
- wr32(hw, I40E_QINT_TQCTL(0), qval);
-
- i40e_irq_dynamic_enable_icr0(vsi->back);
- }
+ napi_complete_done(napi, work_done);
+ if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
+ i40e_update_enable_itr(vsi, q_vector);
+ } else { /* Legacy mode */
+ struct i40e_hw *hw = &vsi->back->hw;
+ /* We re-enable the queue 0 cause, but
+ * don't worry about dynamic_enable
+ * because we left it on for the other
+ * possible interrupts during napi
+ */
+ u32 qval = rd32(hw, I40E_QINT_RQCTL(0)) |
+ I40E_QINT_RQCTL_CAUSE_ENA_MASK;
+
+ wr32(hw, I40E_QINT_RQCTL(0), qval);
+ qval = rd32(hw, I40E_QINT_TQCTL(0)) |
+ I40E_QINT_TQCTL_CAUSE_ENA_MASK;
+ wr32(hw, I40E_QINT_TQCTL(0), qval);
+ i40e_irq_dynamic_enable_icr0(vsi->back);
}
-
return 0;
}
@@ -1925,11 +1961,11 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
* i40e_atr - Add a Flow Director ATR filter
* @tx_ring: ring to add programming descriptor to
* @skb: send buffer
- * @flags: send flags
+ * @tx_flags: send tx flags
* @protocol: wire protocol
**/
static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
- u32 flags, __be16 protocol)
+ u32 tx_flags, __be16 protocol)
{
struct i40e_filter_program_desc *fdir_desc;
struct i40e_pf *pf = tx_ring->vsi->back;
@@ -1954,30 +1990,50 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
if (!tx_ring->atr_sample_rate)
return;
- /* snag network header to get L4 type and address */
- hdr.network = skb_network_header(skb);
+ if (!(tx_flags & (I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6)))
+ return;
- /* Currently only IPv4/IPv6 with TCP is supported */
- if (protocol == htons(ETH_P_IP)) {
- if (hdr.ipv4->protocol != IPPROTO_TCP)
- return;
+ if (!(tx_flags & I40E_TX_FLAGS_VXLAN_TUNNEL)) {
+ /* snag network header to get L4 type and address */
+ hdr.network = skb_network_header(skb);
- /* access ihl as a u8 to avoid unaligned access on ia64 */
- hlen = (hdr.network[0] & 0x0F) << 2;
- } else if (protocol == htons(ETH_P_IPV6)) {
- if (hdr.ipv6->nexthdr != IPPROTO_TCP)
+ /* Currently only IPv4/IPv6 with TCP is supported
+ * access ihl as u8 to avoid unaligned access on ia64
+ */
+ if (tx_flags & I40E_TX_FLAGS_IPV4)
+ hlen = (hdr.network[0] & 0x0F) << 2;
+ else if (protocol == htons(ETH_P_IPV6))
+ hlen = sizeof(struct ipv6hdr);
+ else
return;
-
- hlen = sizeof(struct ipv6hdr);
} else {
- return;
+ hdr.network = skb_inner_network_header(skb);
+ hlen = skb_inner_network_header_len(skb);
}
+ /* Currently only IPv4/IPv6 with TCP is supported
+ * Note: tx_flags gets modified to reflect inner protocols in
+ * tx_enable_csum function if encap is enabled.
+ */
+ if ((tx_flags & I40E_TX_FLAGS_IPV4) &&
+ (hdr.ipv4->protocol != IPPROTO_TCP))
+ return;
+ else if ((tx_flags & I40E_TX_FLAGS_IPV6) &&
+ (hdr.ipv6->nexthdr != IPPROTO_TCP))
+ return;
+
th = (struct tcphdr *)(hdr.network + hlen);
/* Due to lack of space, no more new filters can be programmed */
if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
return;
+ if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) {
+ /* HW ATR eviction will take care of removing filters on FIN
+ * and RST packets.
+ */
+ if (th->fin || th->rst)
+ return;
+ }
tx_ring->atr_count++;
@@ -2022,9 +2078,19 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT;
dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
- dtype_cmd |=
- ((u32)pf->fd_atr_cnt_idx << I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
- I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
+ if (!(tx_flags & I40E_TX_FLAGS_VXLAN_TUNNEL))
+ dtype_cmd |=
+ ((u32)I40E_FD_ATR_STAT_IDX(pf->hw.pf_id) <<
+ I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
+ I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
+ else
+ dtype_cmd |=
+ ((u32)I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id) <<
+ I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
+ I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
+
+ if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)
+ dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK;
fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
fdir_desc->rsvd = cpu_to_le32(0);
@@ -2045,13 +2111,13 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
* otherwise returns 0 to indicate the flags has been set properly.
**/
#ifdef I40E_FCOE
-int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
- struct i40e_ring *tx_ring,
- u32 *flags)
-#else
-static int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
+inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
struct i40e_ring *tx_ring,
u32 *flags)
+#else
+static inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
+ struct i40e_ring *tx_ring,
+ u32 *flags)
#endif
{
__be16 protocol = skb->protocol;
@@ -2077,6 +2143,7 @@ static int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
/* else if it is a SW VLAN, check the next protocol and store the tag */
} else if (protocol == htons(ETH_P_8021Q)) {
struct vlan_hdr *vhdr, _vhdr;
+
vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr);
if (!vhdr)
return -EINVAL;
@@ -2119,16 +2186,15 @@ out:
* i40e_tso - set up the tso context descriptor
* @tx_ring: ptr to the ring to send
* @skb: ptr to the skb we're sending
- * @tx_flags: the collected send information
- * @protocol: the send protocol
* @hdr_len: ptr to the size of the packet header
+ * @cd_type_cmd_tso_mss: ptr to u64 object
* @cd_tunneling: ptr to context descriptor bits
*
* Returns 0 if no TSO can happen, 1 if tso is going, or error
**/
static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
- u32 tx_flags, __be16 protocol, u8 *hdr_len,
- u64 *cd_type_cmd_tso_mss, u32 *cd_tunneling)
+ u8 *hdr_len, u64 *cd_type_cmd_tso_mss,
+ u32 *cd_tunneling)
{
u32 cd_cmd, cd_tso_len, cd_mss;
struct ipv6hdr *ipv6h;
@@ -2181,6 +2247,7 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
* @tx_ring: ptr to the ring to send
* @skb: ptr to the skb we're sending
* @tx_flags: the collected send information
+ * @cd_type_cmd_tso_mss: ptr to u64 object
*
* Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen
**/
@@ -2220,12 +2287,13 @@ static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb,
/**
* i40e_tx_enable_csum - Enable Tx checksum offloads
* @skb: send buffer
- * @tx_flags: Tx flags currently set
+ * @tx_flags: pointer to Tx flags currently set
* @td_cmd: Tx descriptor command bits to set
* @td_offset: Tx descriptor header offsets to set
+ * @tx_ring: Tx descriptor ring
* @cd_tunneling: ptr to context desc bits
**/
-static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags,
+static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
u32 *td_cmd, u32 *td_offset,
struct i40e_ring *tx_ring,
u32 *cd_tunneling)
@@ -2235,12 +2303,20 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags,
struct iphdr *this_ip_hdr;
u32 network_hdr_len;
u8 l4_hdr = 0;
+ struct udphdr *oudph;
+ struct iphdr *oiph;
u32 l4_tunnel = 0;
if (skb->encapsulation) {
switch (ip_hdr(skb)->protocol) {
case IPPROTO_UDP:
+ oudph = udp_hdr(skb);
+ oiph = ip_hdr(skb);
l4_tunnel = I40E_TXD_CTX_UDP_TUNNELING;
+ *tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL;
+ break;
+ case IPPROTO_GRE:
+ l4_tunnel = I40E_TXD_CTX_GRE_TUNNELING;
break;
default:
return;
@@ -2250,18 +2326,17 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags,
this_ipv6_hdr = inner_ipv6_hdr(skb);
this_tcp_hdrlen = inner_tcp_hdrlen(skb);
- if (tx_flags & I40E_TX_FLAGS_IPV4) {
-
- if (tx_flags & I40E_TX_FLAGS_TSO) {
+ if (*tx_flags & I40E_TX_FLAGS_IPV4) {
+ if (*tx_flags & I40E_TX_FLAGS_TSO) {
*cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4;
ip_hdr(skb)->check = 0;
} else {
*cd_tunneling |=
I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
}
- } else if (tx_flags & I40E_TX_FLAGS_IPV6) {
+ } else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
*cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
- if (tx_flags & I40E_TX_FLAGS_TSO)
+ if (*tx_flags & I40E_TX_FLAGS_TSO)
ip_hdr(skb)->check = 0;
}
@@ -2273,8 +2348,17 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags,
skb_transport_offset(skb)) >> 1) <<
I40E_TXD_CTX_QW0_NATLEN_SHIFT;
if (this_ip_hdr->version == 6) {
- tx_flags &= ~I40E_TX_FLAGS_IPV4;
- tx_flags |= I40E_TX_FLAGS_IPV6;
+ *tx_flags &= ~I40E_TX_FLAGS_IPV4;
+ *tx_flags |= I40E_TX_FLAGS_IPV6;
+ }
+ if ((tx_ring->flags & I40E_TXR_FLAGS_OUTER_UDP_CSUM) &&
+ (l4_tunnel == I40E_TXD_CTX_UDP_TUNNELING) &&
+ (*cd_tunneling & I40E_TXD_CTX_QW0_EXT_IP_MASK)) {
+ oudph->check = ~csum_tcpudp_magic(oiph->saddr,
+ oiph->daddr,
+ (skb->len - skb_transport_offset(skb)),
+ IPPROTO_UDP, 0);
+ *cd_tunneling |= I40E_TXD_CTX_QW0_L4T_CS_MASK;
}
} else {
network_hdr_len = skb_network_header_len(skb);
@@ -2284,12 +2368,12 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags,
}
/* Enable IP checksum offloads */
- if (tx_flags & I40E_TX_FLAGS_IPV4) {
+ if (*tx_flags & I40E_TX_FLAGS_IPV4) {
l4_hdr = this_ip_hdr->protocol;
/* the stack computes the IP header already, the only time we
* need the hardware to recompute it is in the case of TSO.
*/
- if (tx_flags & I40E_TX_FLAGS_TSO) {
+ if (*tx_flags & I40E_TX_FLAGS_TSO) {
*td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
this_ip_hdr->check = 0;
} else {
@@ -2298,7 +2382,7 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags,
/* Now set the td_offset for IP header length */
*td_offset = (network_hdr_len >> 2) <<
I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
- } else if (tx_flags & I40E_TX_FLAGS_IPV6) {
+ } else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
l4_hdr = this_ipv6_hdr->nexthdr;
*td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
/* Now set the td_offset for IP header length */
@@ -2396,9 +2480,9 @@ static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
* Returns 0 if stop is not needed
**/
#ifdef I40E_FCOE
-int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
+inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
#else
-static int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
+static inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
#endif
{
if (likely(I40E_DESC_UNUSED(tx_ring) >= size))
@@ -2473,13 +2557,13 @@ linearize_chk_done:
* @td_offset: offset for checksum or crc
**/
#ifdef I40E_FCOE
-void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
- struct i40e_tx_buffer *first, u32 tx_flags,
- const u8 hdr_len, u32 td_cmd, u32 td_offset)
-#else
-static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
+inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
struct i40e_tx_buffer *first, u32 tx_flags,
const u8 hdr_len, u32 td_cmd, u32 td_offset)
+#else
+static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
+ struct i40e_tx_buffer *first, u32 tx_flags,
+ const u8 hdr_len, u32 td_cmd, u32 td_offset)
#endif
{
unsigned int data_len = skb->data_len;
@@ -2491,6 +2575,9 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
u32 td_tag = 0;
dma_addr_t dma;
u16 gso_segs;
+ u16 desc_count = 0;
+ bool tail_bump = true;
+ bool do_rs = false;
if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
@@ -2531,6 +2618,8 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
tx_desc++;
i++;
+ desc_count++;
+
if (i == tx_ring->count) {
tx_desc = I40E_TX_DESC(tx_ring, 0);
i = 0;
@@ -2550,6 +2639,8 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
tx_desc++;
i++;
+ desc_count++;
+
if (i == tx_ring->count) {
tx_desc = I40E_TX_DESC(tx_ring, 0);
i = 0;
@@ -2564,37 +2655,6 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
tx_bi = &tx_ring->tx_bi[i];
}
- /* Place RS bit on last descriptor of any packet that spans across the
- * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
- */
- if (((i & WB_STRIDE) != WB_STRIDE) &&
- (first <= &tx_ring->tx_bi[i]) &&
- (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
- tx_desc->cmd_type_offset_bsz =
- build_ctob(td_cmd, td_offset, size, td_tag) |
- cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP <<
- I40E_TXD_QW1_CMD_SHIFT);
- } else {
- tx_desc->cmd_type_offset_bsz =
- build_ctob(td_cmd, td_offset, size, td_tag) |
- cpu_to_le64((u64)I40E_TXD_CMD <<
- I40E_TXD_QW1_CMD_SHIFT);
- }
-
- netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
- tx_ring->queue_index),
- first->bytecount);
-
- /* set the timestamp */
- first->time_stamp = jiffies;
-
- /* Force memory writes to complete before letting h/w
- * know there are new descriptors to fetch. (Only
- * applicable for weak-ordered memory model archs,
- * such as IA-64).
- */
- wmb();
-
/* set next_to_watch value indicating a packet is present */
first->next_to_watch = tx_desc;
@@ -2604,12 +2664,71 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
tx_ring->next_to_use = i;
+ netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
+ tx_ring->queue_index),
+ first->bytecount);
i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
+
+ /* Algorithm to optimize tail and RS bit setting:
+ * if xmit_more is supported
+ * if xmit_more is true
+ * do not update tail and do not mark RS bit.
+ * if xmit_more is false and last xmit_more was false
+ * if every packet spanned less than 4 desc
+ * then set RS bit on 4th packet and update tail
+ * on every packet
+ * else
+ * update tail and set RS bit on every packet.
+ * if xmit_more is false and last_xmit_more was true
+ * update tail and set RS bit.
+ *
+ * Optimization: wmb to be issued only in case of tail update.
+ * Also optimize the Descriptor WB path for RS bit with the same
+ * algorithm.
+ *
+ * Note: If there are less than 4 packets
+ * pending and interrupts were disabled the service task will
+ * trigger a force WB.
+ */
+ if (skb->xmit_more &&
+ !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
+ tx_ring->queue_index))) {
+ tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
+ tail_bump = false;
+ } else if (!skb->xmit_more &&
+ !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
+ tx_ring->queue_index)) &&
+ (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) &&
+ (tx_ring->packet_stride < WB_STRIDE) &&
+ (desc_count < WB_STRIDE)) {
+ tx_ring->packet_stride++;
+ } else {
+ tx_ring->packet_stride = 0;
+ tx_ring->flags &= ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
+ do_rs = true;
+ }
+ if (do_rs)
+ tx_ring->packet_stride = 0;
+
+ tx_desc->cmd_type_offset_bsz =
+ build_ctob(td_cmd, td_offset, size, td_tag) |
+ cpu_to_le64((u64)(do_rs ? I40E_TXD_CMD :
+ I40E_TX_DESC_CMD_EOP) <<
+ I40E_TXD_QW1_CMD_SHIFT);
+
/* notify HW of packet */
- if (!skb->xmit_more ||
- netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
- tx_ring->queue_index)))
+ if (!tail_bump)
+ prefetchw(tx_desc + 1);
+
+ if (tail_bump) {
+ /* Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch. (Only
+ * applicable for weak-ordered memory model archs,
+ * such as IA-64).
+ */
+ wmb();
writel(i, tx_ring->tail);
+ }
return;
@@ -2640,11 +2759,11 @@ dma_error:
* one descriptor.
**/
#ifdef I40E_FCOE
-int i40e_xmit_descriptor_count(struct sk_buff *skb,
- struct i40e_ring *tx_ring)
-#else
-static int i40e_xmit_descriptor_count(struct sk_buff *skb,
+inline int i40e_xmit_descriptor_count(struct sk_buff *skb,
struct i40e_ring *tx_ring)
+#else
+static inline int i40e_xmit_descriptor_count(struct sk_buff *skb,
+ struct i40e_ring *tx_ring)
#endif
{
unsigned int f;
@@ -2687,6 +2806,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
u8 hdr_len = 0;
int tsyn;
int tso;
+
if (0 == i40e_xmit_descriptor_count(skb, tx_ring))
return NETDEV_TX_BUSY;
@@ -2706,7 +2826,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
else if (protocol == htons(ETH_P_IPV6))
tx_flags |= I40E_TX_FLAGS_IPV6;
- tso = i40e_tso(tx_ring, skb, tx_flags, protocol, &hdr_len,
+ tso = i40e_tso(tx_ring, skb, &hdr_len,
&cd_type_cmd_tso_mss, &cd_tunneling);
if (tso < 0)
@@ -2719,10 +2839,11 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
if (tsyn)
tx_flags |= I40E_TX_FLAGS_TSYN;
- if (i40e_chk_linearize(skb, tx_flags))
+ if (i40e_chk_linearize(skb, tx_flags)) {
if (skb_linearize(skb))
goto out_drop;
-
+ tx_ring->tx_stats.tx_linearize++;
+ }
skb_tx_timestamp(skb);
/* always enable CRC insertion offload */
@@ -2732,7 +2853,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
if (skb->ip_summed == CHECKSUM_PARTIAL) {
tx_flags |= I40E_TX_FLAGS_CSUM;
- i40e_tx_enable_csum(skb, tx_flags, &td_cmd, &td_offset,
+ i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset,
tx_ring, &cd_tunneling);
}