summaryrefslogtreecommitdiffstats
path: root/kernel/net/ipv4/inet_lro.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/net/ipv4/inet_lro.c')
-rw-r--r--kernel/net/ipv4/inet_lro.c374
1 files changed, 374 insertions, 0 deletions
diff --git a/kernel/net/ipv4/inet_lro.c b/kernel/net/ipv4/inet_lro.c
new file mode 100644
index 000000000..f17ea49b2
--- /dev/null
+++ b/kernel/net/ipv4/inet_lro.c
@@ -0,0 +1,374 @@
+/*
+ * linux/net/ipv4/inet_lro.c
+ *
+ * Large Receive Offload (ipv4 / tcp)
+ *
+ * (C) Copyright IBM Corp. 2007
+ *
+ * Authors:
+ * Jan-Bernd Themann <themann@de.ibm.com>
+ * Christoph Raisch <raisch@de.ibm.com>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+#include <linux/module.h>
+#include <linux/if_vlan.h>
+#include <linux/inet_lro.h>
+#include <net/checksum.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>");
+MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)");
+
+#define TCP_HDR_LEN(tcph) (tcph->doff << 2)
+#define IP_HDR_LEN(iph) (iph->ihl << 2)
+#define TCP_PAYLOAD_LENGTH(iph, tcph) \
+ (ntohs(iph->tot_len) - IP_HDR_LEN(iph) - TCP_HDR_LEN(tcph))
+
+#define IPH_LEN_WO_OPTIONS 5
+#define TCPH_LEN_WO_OPTIONS 5
+#define TCPH_LEN_W_TIMESTAMP 8
+
+#define LRO_MAX_PG_HLEN 64
+
+#define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; }
+
+/*
+ * Basic tcp checks whether packet is suitable for LRO
+ */
+
+static int lro_tcp_ip_check(const struct iphdr *iph, const struct tcphdr *tcph,
+ int len, const struct net_lro_desc *lro_desc)
+{
+ /* check ip header: don't aggregate padded frames */
+ if (ntohs(iph->tot_len) != len)
+ return -1;
+
+ if (TCP_PAYLOAD_LENGTH(iph, tcph) == 0)
+ return -1;
+
+ if (iph->ihl != IPH_LEN_WO_OPTIONS)
+ return -1;
+
+ if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack ||
+ tcph->rst || tcph->syn || tcph->fin)
+ return -1;
+
+ if (INET_ECN_is_ce(ipv4_get_dsfield(iph)))
+ return -1;
+
+ if (tcph->doff != TCPH_LEN_WO_OPTIONS &&
+ tcph->doff != TCPH_LEN_W_TIMESTAMP)
+ return -1;
+
+ /* check tcp options (only timestamp allowed) */
+ if (tcph->doff == TCPH_LEN_W_TIMESTAMP) {
+ __be32 *topt = (__be32 *)(tcph + 1);
+
+ if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
+ | (TCPOPT_TIMESTAMP << 8)
+ | TCPOLEN_TIMESTAMP))
+ return -1;
+
+ /* timestamp should be in right order */
+ topt++;
+ if (lro_desc && after(ntohl(lro_desc->tcp_rcv_tsval),
+ ntohl(*topt)))
+ return -1;
+
+ /* timestamp reply should not be zero */
+ topt++;
+ if (*topt == 0)
+ return -1;
+ }
+
+ return 0;
+}
+
+static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)
+{
+ struct iphdr *iph = lro_desc->iph;
+ struct tcphdr *tcph = lro_desc->tcph;
+ __be32 *p;
+ __wsum tcp_hdr_csum;
+
+ tcph->ack_seq = lro_desc->tcp_ack;
+ tcph->window = lro_desc->tcp_window;
+
+ if (lro_desc->tcp_saw_tstamp) {
+ p = (__be32 *)(tcph + 1);
+ *(p+2) = lro_desc->tcp_rcv_tsecr;
+ }
+
+ csum_replace2(&iph->check, iph->tot_len, htons(lro_desc->ip_tot_len));
+ iph->tot_len = htons(lro_desc->ip_tot_len);
+
+ tcph->check = 0;
+ tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0);
+ lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum);
+ tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
+ lro_desc->ip_tot_len -
+ IP_HDR_LEN(iph), IPPROTO_TCP,
+ lro_desc->data_csum);
+}
+
+static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len)
+{
+ __wsum tcp_csum;
+ __wsum tcp_hdr_csum;
+ __wsum tcp_ps_hdr_csum;
+
+ tcp_csum = ~csum_unfold(tcph->check);
+ tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum);
+
+ tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
+ len + TCP_HDR_LEN(tcph),
+ IPPROTO_TCP, 0);
+
+ return csum_sub(csum_sub(tcp_csum, tcp_hdr_csum),
+ tcp_ps_hdr_csum);
+}
+
+static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
+ struct iphdr *iph, struct tcphdr *tcph)
+{
+ int nr_frags;
+ __be32 *ptr;
+ u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
+
+ nr_frags = skb_shinfo(skb)->nr_frags;
+ lro_desc->parent = skb;
+ lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]);
+ lro_desc->iph = iph;
+ lro_desc->tcph = tcph;
+ lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len;
+ lro_desc->tcp_ack = tcph->ack_seq;
+ lro_desc->tcp_window = tcph->window;
+
+ lro_desc->pkt_aggr_cnt = 1;
+ lro_desc->ip_tot_len = ntohs(iph->tot_len);
+
+ if (tcph->doff == 8) {
+ ptr = (__be32 *)(tcph+1);
+ lro_desc->tcp_saw_tstamp = 1;
+ lro_desc->tcp_rcv_tsval = *(ptr+1);
+ lro_desc->tcp_rcv_tsecr = *(ptr+2);
+ }
+
+ lro_desc->mss = tcp_data_len;
+ lro_desc->active = 1;
+
+ lro_desc->data_csum = lro_tcp_data_csum(iph, tcph,
+ tcp_data_len);
+}
+
+static inline void lro_clear_desc(struct net_lro_desc *lro_desc)
+{
+ memset(lro_desc, 0, sizeof(struct net_lro_desc));
+}
+
+static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph,
+ struct tcphdr *tcph, int tcp_data_len)
+{
+ struct sk_buff *parent = lro_desc->parent;
+ __be32 *topt;
+
+ lro_desc->pkt_aggr_cnt++;
+ lro_desc->ip_tot_len += tcp_data_len;
+ lro_desc->tcp_next_seq += tcp_data_len;
+ lro_desc->tcp_window = tcph->window;
+ lro_desc->tcp_ack = tcph->ack_seq;
+
+ /* don't update tcp_rcv_tsval, would not work with PAWS */
+ if (lro_desc->tcp_saw_tstamp) {
+ topt = (__be32 *) (tcph + 1);
+ lro_desc->tcp_rcv_tsecr = *(topt + 2);
+ }
+
+ lro_desc->data_csum = csum_block_add(lro_desc->data_csum,
+ lro_tcp_data_csum(iph, tcph,
+ tcp_data_len),
+ parent->len);
+
+ parent->len += tcp_data_len;
+ parent->data_len += tcp_data_len;
+ if (tcp_data_len > lro_desc->mss)
+ lro_desc->mss = tcp_data_len;
+}
+
+static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb,
+ struct iphdr *iph, struct tcphdr *tcph)
+{
+ struct sk_buff *parent = lro_desc->parent;
+ int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
+
+ lro_add_common(lro_desc, iph, tcph, tcp_data_len);
+
+ skb_pull(skb, (skb->len - tcp_data_len));
+ parent->truesize += skb->truesize;
+
+ if (lro_desc->last_skb)
+ lro_desc->last_skb->next = skb;
+ else
+ skb_shinfo(parent)->frag_list = skb;
+
+ lro_desc->last_skb = skb;
+}
+
+
+static int lro_check_tcp_conn(struct net_lro_desc *lro_desc,
+ struct iphdr *iph,
+ struct tcphdr *tcph)
+{
+ if ((lro_desc->iph->saddr != iph->saddr) ||
+ (lro_desc->iph->daddr != iph->daddr) ||
+ (lro_desc->tcph->source != tcph->source) ||
+ (lro_desc->tcph->dest != tcph->dest))
+ return -1;
+ return 0;
+}
+
+static struct net_lro_desc *lro_get_desc(struct net_lro_mgr *lro_mgr,
+ struct net_lro_desc *lro_arr,
+ struct iphdr *iph,
+ struct tcphdr *tcph)
+{
+ struct net_lro_desc *lro_desc = NULL;
+ struct net_lro_desc *tmp;
+ int max_desc = lro_mgr->max_desc;
+ int i;
+
+ for (i = 0; i < max_desc; i++) {
+ tmp = &lro_arr[i];
+ if (tmp->active)
+ if (!lro_check_tcp_conn(tmp, iph, tcph)) {
+ lro_desc = tmp;
+ goto out;
+ }
+ }
+
+ for (i = 0; i < max_desc; i++) {
+ if (!lro_arr[i].active) {
+ lro_desc = &lro_arr[i];
+ goto out;
+ }
+ }
+
+ LRO_INC_STATS(lro_mgr, no_desc);
+out:
+ return lro_desc;
+}
+
+static void lro_flush(struct net_lro_mgr *lro_mgr,
+ struct net_lro_desc *lro_desc)
+{
+ if (lro_desc->pkt_aggr_cnt > 1)
+ lro_update_tcp_ip_header(lro_desc);
+
+ skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss;
+
+ if (lro_mgr->features & LRO_F_NAPI)
+ netif_receive_skb(lro_desc->parent);
+ else
+ netif_rx(lro_desc->parent);
+
+ LRO_INC_STATS(lro_mgr, flushed);
+ lro_clear_desc(lro_desc);
+}
+
+static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
+ void *priv)
+{
+ struct net_lro_desc *lro_desc;
+ struct iphdr *iph;
+ struct tcphdr *tcph;
+ u64 flags;
+ int vlan_hdr_len = 0;
+
+ if (!lro_mgr->get_skb_header ||
+ lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph,
+ &flags, priv))
+ goto out;
+
+ if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
+ goto out;
+
+ lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
+ if (!lro_desc)
+ goto out;
+
+ if ((skb->protocol == htons(ETH_P_8021Q)) &&
+ !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
+ vlan_hdr_len = VLAN_HLEN;
+
+ if (!lro_desc->active) { /* start new lro session */
+ if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL))
+ goto out;
+
+ skb->ip_summed = lro_mgr->ip_summed_aggr;
+ lro_init_desc(lro_desc, skb, iph, tcph);
+ LRO_INC_STATS(lro_mgr, aggregated);
+ return 0;
+ }
+
+ if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
+ goto out2;
+
+ if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc))
+ goto out2;
+
+ lro_add_packet(lro_desc, skb, iph, tcph);
+ LRO_INC_STATS(lro_mgr, aggregated);
+
+ if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) ||
+ lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
+ lro_flush(lro_mgr, lro_desc);
+
+ return 0;
+
+out2: /* send aggregated SKBs to stack */
+ lro_flush(lro_mgr, lro_desc);
+
+out:
+ return 1;
+}
+
+void lro_receive_skb(struct net_lro_mgr *lro_mgr,
+ struct sk_buff *skb,
+ void *priv)
+{
+ if (__lro_proc_skb(lro_mgr, skb, priv)) {
+ if (lro_mgr->features & LRO_F_NAPI)
+ netif_receive_skb(skb);
+ else
+ netif_rx(skb);
+ }
+}
+EXPORT_SYMBOL(lro_receive_skb);
+
+void lro_flush_all(struct net_lro_mgr *lro_mgr)
+{
+ int i;
+ struct net_lro_desc *lro_desc = lro_mgr->lro_arr;
+
+ for (i = 0; i < lro_mgr->max_desc; i++) {
+ if (lro_desc[i].active)
+ lro_flush(lro_mgr, &lro_desc[i]);
+ }
+}
+EXPORT_SYMBOL(lro_flush_all);