summaryrefslogtreecommitdiffstats
path: root/common/VIL/conntrack/rte_ct_synproxy.c
diff options
context:
space:
mode:
Diffstat (limited to 'common/VIL/conntrack/rte_ct_synproxy.c')
-rw-r--r--common/VIL/conntrack/rte_ct_synproxy.c873
1 files changed, 873 insertions, 0 deletions
diff --git a/common/VIL/conntrack/rte_ct_synproxy.c b/common/VIL/conntrack/rte_ct_synproxy.c
new file mode 100644
index 00000000..967596d1
--- /dev/null
+++ b/common/VIL/conntrack/rte_ct_synproxy.c
@@ -0,0 +1,873 @@
+/*
+// Copyright (c) 2017 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <rte_common.h>
+#include <rte_malloc.h>
+#include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_udp.h>
+#include <rte_icmp.h>
+#include <rte_byteorder.h>
+#include <rte_cycles.h>
+
+#include "rte_ct_tcp.h"
+
+
+/*
+ * OVERVIEW:
+ * This module will behave as a proxy between an initiator (external client)
+ * and listener (internal server).
+ * (1) Proxy receives SYN from initiator, replies with spoofed SYN-ACK message
+ * No packet is sent to the lister at this time.
+ * (2) Proxy receives ACK from the initiator, so the connection request is
+ * considred valid. Proxy sends a spoofed SYN message to the listener.
+ * (3) Proxy receives SYN-ACK message from listener. Proxy replies to listener
+ * with a spoofed ACK message. The connection is considered established.
+ * (4) Traffic is exchanged between initiator and listener. Sequence and
+ * ack numbers translated appropriately by proxy.
+ */
+
+/*
+ * DETAILS, when SynProxy on:
+ * (1) receive initial SYN from client
+ * call CT, all new connections assigned spoofed (random) SEQ number
+ * packet re-purposed as SYN-ACK back to client with spoofed SEQ
+ * -> change ethernet, IP, and TCP headers, put on appropriate output ring
+ * (2) receive ACK packet from client
+ * connection request now considered valid
+ * packet re-purposed as SYN to server, using SEQ from original SYN
+ * -> change TCP header, put on output ring originally targetted
+ * (3) receive SYN-ACK packet from server
+ * connection now ESTABLISHED
+ * compute SEQ difference between spoofed SEQ and real server SEQ
+ * packet re-purposed as ACK to server
+ * -> change ethernet, IP, and TCP headers, put on appropriate output ring
+ * (4) all further packets flow normally, except SEQ and ACK numbers must be
+ * modified by SEQ diff (SEQ in server->client direction, ACK and SACK in
+ * client->server direction)
+ *
+ */
+
+#define META_DATA_OFFSET 128
+#define ETHERNET_START (META_DATA_OFFSET + RTE_PKTMBUF_HEADROOM)
+#define ETH_HDR_SIZE 14
+#define IP_START (ETHERNET_START + ETH_HDR_SIZE)
+#define PROTOCOL_START (IP_START + 9)
+#define IP_V4_HEADER_SIZE 20
+#define IP_V6_HEADER_SIZE 40
+#define TCP_START (IP_START + IP_V4_HEADER_SIZE)
+#define TCP_MIN_HDR_SIZE 20
+
+#define RTE_TCP_PROTO_ID 6
+#define RTE_SP_DEFAULT_TTL 64
+
+#define RTE_SYNPROXY_MAX_SPOOFED_PKTS 64
+
+#define RTE_TCP_SYN 0x02
+#define RTE_TCP_ACK 0x10
+#define RTE_TCP_SYN_ACK (RTE_TCP_SYN | RTE_TCP_ACK)
+
+#define RTE_SP_DEFAULT_WINDOW 29200
+#define RTE_CT_DEBUG_SPOOFED_SEQ 0
+#define RTE_DPDK_IS_16_4 0
+
+#define IP_VERSION_4 4
+#define IP_VERSION_6 6
+
+
+/* default TCP options */
+/* TODO: need to set in config file */
+
+struct rte_synproxy_options default_ipv4_synproxy_options = {
+ .options = RTE_SP_OPTIONS_MSS |
+ RTE_SP_OPTIONS_SACK_PERM |
+ RTE_SP_OPTIONS_WINDOW_SCALE,
+ .mss = 1460,
+ .window_scale = 7,
+ .initial_window = RTE_SP_DEFAULT_WINDOW
+};
+
+
+struct rte_synproxy_options default_ipv6_synproxy_options = {
+ .options = RTE_SP_OPTIONS_MSS |
+ RTE_SP_OPTIONS_SACK_PERM |
+ RTE_SP_OPTIONS_WINDOW_SCALE,
+ .mss = 1440,
+ .window_scale = 7,
+ .initial_window = RTE_SP_DEFAULT_WINDOW
+};
+
+/* IP/TCP header print for debugging */
+static __rte_unused void
+rte_ct_synproxy_print_pkt_info(struct rte_mbuf *pkt)
+{
+ struct ipv4_hdr *ihdr4 = (struct ipv4_hdr *)
+ RTE_MBUF_METADATA_UINT32_PTR(pkt, IP_START);
+ __rte_unused struct tcp_hdr *thdr = (struct tcp_hdr *)
+ RTE_MBUF_METADATA_UINT32_PTR(pkt, TCP_START);
+ uint32_t packet_length = rte_pktmbuf_pkt_len(pkt);
+
+ printf("\npacket length %u, ip length %u\n", packet_length,
+ rte_bswap16(ihdr4->total_length));
+ rte_pktmbuf_dump(stdout, pkt, 80);
+}
+
+static inline void
+rte_sp_incremental_tcp_chksum_update_32(
+ uint32_t num_before, /* in Intel order, not network order */
+ uint32_t num_after, /* in Intel order, not network order */
+
+ uint16_t *chksum) /* network order, e.g. pointer into header */
+{
+ uint32_t sum;
+
+ sum = ~rte_bswap16(*chksum) & 0xffff;
+ num_before = ~num_before;
+ sum += (num_before >> 16) + (num_before & 0xffff);
+ sum += (num_after >> 16) + (num_after & 0xffff);
+ sum = (sum >> 16) + (sum & 0xffff);
+ sum += (sum >> 16);
+ *chksum = rte_bswap16(~sum & 0xffff);
+}
+
+
+
+static inline uint32_t
+rte_sp_get_random_seq_number(void)
+{
+ return rte_get_tsc_cycles(); /* low 32 bits of timestamp*/
+}
+
+
+static int8_t rte_ct_ipversion(void *i_hdr)
+{
+ uint8_t *ihdr = (uint8_t *)i_hdr;
+ int8_t hdr_chk = *ihdr;
+
+ hdr_chk = hdr_chk >> 4;
+ if (hdr_chk == IP_VERSION_4 || hdr_chk == IP_VERSION_6)
+ return hdr_chk;
+ else
+ return -1;
+}
+
+static inline void
+rte_synproxy_adjust_pkt_length(struct rte_mbuf *pkt)
+{
+ uint16_t pkt_length = 0;
+ int ip_hdr_size_bytes = rte_ct_get_IP_hdr_size(pkt);
+ void *iphdr = RTE_MBUF_METADATA_UINT32_PTR(pkt, IP_START);
+
+ if (ip_hdr_size_bytes == IP_V4_HEADER_SIZE) {
+ struct ipv4_hdr *ihdr4 = (struct ipv4_hdr *)iphdr;
+
+ pkt_length = rte_bswap16(ihdr4->total_length) + ETH_HDR_SIZE;
+ } else if (ip_hdr_size_bytes == IP_V6_HEADER_SIZE) {
+ struct ipv6_hdr *ihdr6 = (struct ipv6_hdr *)iphdr;
+
+ pkt_length = rte_bswap16(ihdr6->payload_len) +
+ IP_V6_HEADER_SIZE + ETH_HDR_SIZE;
+ }
+ uint16_t mbuf_pkt_length = rte_pktmbuf_pkt_len(pkt);
+
+ if (pkt_length == mbuf_pkt_length)
+ return;
+
+ if (pkt_length < mbuf_pkt_length) {
+ rte_pktmbuf_trim(pkt, mbuf_pkt_length - pkt_length);
+ return;
+ }
+
+ /* pkt_length > mbuf_pkt_length */
+ rte_pktmbuf_append(pkt, pkt_length - mbuf_pkt_length);
+}
+
+static void
+rte_synproxy_build_ipv4_header(
+ struct ipv4_hdr *hdr4,
+ uint32_t src_addr,
+ uint32_t dst_addr,
+ uint16_t tcp_length)
+{
+ /* TODO: consider interface re-work, too many rte_bswapxx */
+ /* options are not supported, so header size is fixed */
+ hdr4->version_ihl = 0x45;
+ hdr4->type_of_service = 0;
+ hdr4->total_length = rte_bswap16(tcp_length + IP_V4_HEADER_SIZE);
+ hdr4->packet_id = 0;
+ /* set Don't fragment bit, Intel order */
+ hdr4->fragment_offset = 0x0040;
+ hdr4->time_to_live = RTE_SP_DEFAULT_TTL;
+ hdr4->next_proto_id = RTE_TCP_PROTO_ID;
+ /* checksum calculated later */
+ hdr4->src_addr = rte_bswap32(src_addr);
+ hdr4->dst_addr = rte_bswap32(dst_addr);
+}
+
+
+static void
+rte_synproxy_build_ipv6_header(
+ struct ipv6_hdr *hdr6,
+ uint8_t *src_addr,
+ uint8_t *dst_addr,
+ uint16_t tcp_length)
+{
+ /* TODO: consider interface re-work, too many rte_bswapxx */
+ /* options are not supported, so header size is fixed */
+ uint8_t temp_src[16];
+ uint8_t temp_dst[16];
+
+ hdr6->vtc_flow = 0x60; /* Intel Order */
+ hdr6->payload_len = rte_bswap16(tcp_length);
+ hdr6->proto = RTE_TCP_PROTO_ID;
+ hdr6->hop_limits = RTE_SP_DEFAULT_TTL;
+ /* checksum calculated later */
+
+ /* must copy to temps to avoid overwriting */
+ rte_mov16(temp_src, src_addr);
+ rte_mov16(temp_dst, dst_addr);
+ rte_mov16(hdr6->src_addr, temp_src);
+ rte_mov16(hdr6->dst_addr, temp_dst);
+}
+
+/* add options specified in t_opts to TCP header in packet. */
+
+static uint16_t
+rte_sp_add_tcp_options(struct tcp_hdr *thdr,
+ const struct rte_synproxy_options *t_opts)
+{
+ uint32_t *options_ptr = (uint32_t *)(thdr + 1);
+ uint32_t *saved_ptr = options_ptr;
+ uint8_t options = t_opts->options;
+ uint32_t option_bytes; /* options built in groups of 4 bytes */
+
+ if (options & RTE_SP_OPTIONS_MSS) {
+ option_bytes = (RTE_CT_TCPOPT_MSS << 24) |
+ (RTE_CT_TCPOLEN_MSS << 16) | t_opts->mss;
+ *options_ptr++ = rte_bswap32(option_bytes);
+ }
+
+ if (options & RTE_SP_OPTIONS_TIMESTAMP) {
+ /* if both timestamp and sack permitted options,
+ * pack together
+ */
+ if (options & RTE_SP_OPTIONS_SACK_PERM)
+ option_bytes = (RTE_CT_TCPOPT_SACK_PERM << 24) |
+ (RTE_CT_TCPOLEN_SACK_PERM << 16);
+ else
+ option_bytes = (RTE_CT_TCPOPT_NOP << 24) |
+ (RTE_CT_TCPOPT_NOP << 16);
+
+ option_bytes |= (RTE_CT_TCPOPT_TIMESTAMP << 8) |
+ RTE_CT_TCPOLEN_TIMESTAMP;
+ *options_ptr++ = rte_bswap32(option_bytes);
+ *options_ptr++ = rte_bswap32(t_opts->ts_val);
+ *options_ptr++ = rte_bswap32(t_opts->ts_echo_reply);
+ } else if (options & RTE_SP_OPTIONS_SACK_PERM) {
+ option_bytes = (RTE_CT_TCPOPT_NOP << 24) |
+ (RTE_CT_TCPOPT_NOP << 16) |
+ (RTE_CT_TCPOPT_SACK_PERM << 8) |
+ RTE_CT_TCPOLEN_SACK_PERM;
+ *options_ptr++ = rte_bswap32(option_bytes);
+ }
+
+ if (options & RTE_SP_OPTIONS_WINDOW_SCALE) {
+ option_bytes = (RTE_CT_TCPOPT_NOP << 24) |
+ (RTE_CT_TCPOPT_WINDOW << 16) |
+ (RTE_CT_TCPOLEN_WINDOW << 8) |
+ t_opts->window_scale;
+ *options_ptr++ = rte_bswap32(option_bytes);
+ }
+
+ /* compute the data offset field, which is size of total
+ * TCP header in 32 bit words
+ */
+ /* TODO: diff from options ptr to thdr */
+ uint16_t data_offset_bytes = (uint16_t)RTE_PTR_DIFF(options_ptr,
+ saved_ptr) + sizeof(struct tcp_hdr);
+ thdr->data_off = (data_offset_bytes >> 2) << 4;
+
+ return data_offset_bytes;
+}
+
+/* Build a TCP header.
+ * Note that the the tcp_hdr must be in the appropriate location
+ * in an mbuf
+ * TODO: consider interface re-work, too many rte_bswapxx
+ */
+static inline uint16_t
+rte_synproxy_build_tcp_header(
+ __rte_unused struct rte_mbuf *old_pkt,
+ struct tcp_hdr *t_hdr,
+ uint16_t src_port,
+ uint16_t dst_port,
+ uint32_t seq,
+ uint32_t ack,
+ uint8_t flags,
+ const struct rte_synproxy_options *t_opts,
+ uint8_t add_options)
+{
+ t_hdr->src_port = rte_bswap16(src_port);
+ t_hdr->dst_port = rte_bswap16(dst_port);
+ t_hdr->sent_seq = rte_bswap32(seq);
+ t_hdr->recv_ack = rte_bswap32(ack);
+
+ t_hdr->tcp_flags = flags;
+ t_hdr->rx_win = t_opts->initial_window;
+ /* checksum calculated later */
+ t_hdr->tcp_urp = 0;
+
+ /* add tcp header options, if applicable */
+
+ uint16_t new_tcp_hdr_size = TCP_MIN_HDR_SIZE;
+
+ if (add_options)
+ new_tcp_hdr_size = rte_sp_add_tcp_options(t_hdr, t_opts);
+ else
+ t_hdr->data_off = (TCP_MIN_HDR_SIZE >> 2) << 4;
+
+ return new_tcp_hdr_size;
+}
+
+static void
+rte_synproxy_compute_checksums(void *i_hdr, struct tcp_hdr *t_hdr)
+{
+ /*
+ * calculate IP and TCP checksums. Note that both checksum
+ * routines requirehecksum fields to be set to zero,
+ * and the the checksum is in the correct
+ * byte order, so no rte_bswap16 is required.
+ */
+
+ /* TODO: look into h/w computation of checksums */
+
+ int8_t hdr_chk = rte_ct_ipversion(i_hdr);
+
+ t_hdr->cksum = 0;
+
+ if (hdr_chk == IP_VERSION_4) {
+ struct ipv4_hdr *i4_hdr = (struct ipv4_hdr *)i_hdr;
+
+ i4_hdr->hdr_checksum = 0;
+ t_hdr->cksum = rte_ipv4_udptcp_cksum(i4_hdr, t_hdr);
+ i4_hdr->hdr_checksum = rte_ipv4_cksum(i4_hdr);
+ } else if (hdr_chk == IP_VERSION_6) {
+ struct ipv6_hdr *i6_hdr = (struct ipv6_hdr *)i_hdr;
+
+ t_hdr->cksum = rte_ipv6_udptcp_cksum(i6_hdr, t_hdr);
+ }
+}
+
+
+
+/*
+ * Building new packet headers:
+ * For IPv4 and IPv6 headers, no options and no fragmentation are supported.
+ * Header size is fixed.
+ * TCP header will (likely) have options, so header size is not fixed.
+ * TCP header will be built first, and size used in IP packet size calculation.
+ */
+void
+rte_sp_cvt_to_spoofed_client_synack(struct rte_ct_cnxn_data *cd,
+ struct rte_mbuf *old_pkt)
+{
+ /* old packet is syn from client. Change to a (spoofed)
+ * SYN-ACK to send back
+ */
+
+ int ip_hdr_size_bytes = rte_ct_get_IP_hdr_size(old_pkt);
+ void *iphdr = RTE_MBUF_METADATA_UINT32_PTR(old_pkt, IP_START);
+ struct tcp_hdr *thdr = (struct tcp_hdr *)
+ RTE_MBUF_METADATA_UINT32_PTR(old_pkt, IP_START +
+ ip_hdr_size_bytes);
+ uint16_t tcp_header_size;
+
+ /* get a spoofed sequence number and save in the connection data */
+ uint32_t new_seq = rte_sp_get_random_seq_number();
+
+ if (RTE_CT_DEBUG_SPOOFED_SEQ)
+ new_seq = 10; /* something simple to aid debugging */
+
+ cd->ct_protocol.synproxy_data.original_spoofed_seq = new_seq;
+
+ /* build the TCP header, including reversing the port numbers. */
+ tcp_header_size = rte_synproxy_build_tcp_header(old_pkt, thdr,
+ rte_bswap16(thdr->dst_port),
+ rte_bswap16(thdr->src_port),
+ new_seq, rte_bswap32(thdr->sent_seq) + 1,
+ RTE_TCP_SYN_ACK,
+ ip_hdr_size_bytes == IP_V4_HEADER_SIZE ?
+ &default_ipv4_synproxy_options :
+ &default_ipv6_synproxy_options, 1);
+
+ /* reverse the source and destination addresses in the IP hdr */
+ if (ip_hdr_size_bytes == IP_V4_HEADER_SIZE) {
+ struct ipv4_hdr *ihdr4 = (struct ipv4_hdr *)iphdr;
+
+ rte_synproxy_build_ipv4_header(ihdr4,
+ rte_bswap32(ihdr4->dst_addr),
+ rte_bswap32(ihdr4->src_addr), tcp_header_size);
+
+ } else if (ip_hdr_size_bytes == IP_V6_HEADER_SIZE) {
+ struct ipv6_hdr *ihdr6 = (struct ipv6_hdr *)iphdr;
+
+ rte_synproxy_build_ipv6_header(ihdr6,
+ (uint8_t *)ihdr6->dst_addr,
+ (uint8_t *)ihdr6->src_addr, tcp_header_size);
+ }
+ rte_synproxy_adjust_pkt_length(old_pkt);
+ /* compute checksums */
+ rte_synproxy_compute_checksums(iphdr, thdr);
+
+}
+
+
+void
+rte_sp_cvt_to_spoofed_server_syn(struct rte_ct_cnxn_data *cd,
+ struct rte_mbuf *old_pkt)
+{
+ /* old packet is ACK from client. Change to (spoofed)
+ * SYN to send to server
+ */
+
+ int ip_hdr_size_bytes = rte_ct_get_IP_hdr_size(old_pkt);
+ void *iphdr = RTE_MBUF_METADATA_UINT32_PTR(old_pkt, IP_START);
+ struct tcp_hdr *thdr = (struct tcp_hdr *)
+ RTE_MBUF_METADATA_UINT32_PTR(old_pkt, IP_START
+ + ip_hdr_size_bytes);
+ uint16_t tcp_header_size;
+
+ tcp_header_size = rte_synproxy_build_tcp_header(old_pkt, thdr,
+ rte_bswap16(thdr->src_port),
+ rte_bswap16(thdr->dst_port),
+ rte_bswap32(thdr->sent_seq) - 1, 0,
+ RTE_TCP_SYN,
+ &cd->ct_protocol.synproxy_data.cnxn_options, 1);
+
+ if (ip_hdr_size_bytes == IP_V4_HEADER_SIZE) {
+ struct ipv4_hdr *ihdr4 = (struct ipv4_hdr *)iphdr;
+
+ rte_synproxy_build_ipv4_header(ihdr4,
+ rte_bswap32(ihdr4->src_addr),
+ rte_bswap32(ihdr4->dst_addr), tcp_header_size);
+ } else if (ip_hdr_size_bytes == IP_V6_HEADER_SIZE) {
+ struct ipv6_hdr *ihdr6 = (struct ipv6_hdr *)iphdr;
+
+ rte_synproxy_build_ipv6_header(ihdr6,
+ (uint8_t *)ihdr6->src_addr,
+ (uint8_t *)ihdr6->dst_addr, tcp_header_size);
+ }
+
+ rte_synproxy_adjust_pkt_length(old_pkt);
+ /* compute checksums */
+ rte_synproxy_compute_checksums(iphdr, thdr);
+
+}
+
+void
+rte_sp_cvt_to_spoofed_server_ack(struct rte_ct_cnxn_data *cd,
+ struct rte_mbuf *old_pkt)
+{
+ /* old packet is SYN-ACK from server. Change to spoofed ACK and
+ * send back to server
+ */
+
+ int ip_hdr_size_bytes = rte_ct_get_IP_hdr_size(old_pkt);
+ void *iphdr = RTE_MBUF_METADATA_UINT32_PTR(old_pkt, IP_START);
+ struct tcp_hdr *thdr = (struct tcp_hdr *)
+ RTE_MBUF_METADATA_UINT32_PTR(old_pkt, IP_START +
+ ip_hdr_size_bytes);
+
+ /* read real seq out of SYN-ACK from server, and save the delta from
+ * the spoofed one
+ */
+ uint32_t real_seq = rte_bswap32(thdr->sent_seq);
+ uint16_t tcp_header_size;
+
+ cd->ct_protocol.synproxy_data.seq_diff =
+ real_seq - cd->ct_protocol.synproxy_data.original_spoofed_seq;
+
+ /* reverse the source and destination addresses */
+ tcp_header_size = rte_synproxy_build_tcp_header(old_pkt, thdr,
+ rte_bswap16(thdr->dst_port),
+ rte_bswap16(thdr->src_port),
+ rte_bswap32(thdr->recv_ack),
+ rte_bswap32(thdr->sent_seq) + 1, RTE_TCP_ACK,
+ &cd->ct_protocol.synproxy_data.cnxn_options, 0);
+
+ /* reverse the source and destination addresses in the IP hdr */
+ if (ip_hdr_size_bytes == IP_V4_HEADER_SIZE) {
+ struct ipv4_hdr *ihdr4 = (struct ipv4_hdr *)iphdr;
+
+ rte_synproxy_build_ipv4_header(ihdr4,
+ rte_bswap32(ihdr4->dst_addr),
+ rte_bswap32(ihdr4->src_addr), tcp_header_size);
+
+ } else if (ip_hdr_size_bytes == IP_V6_HEADER_SIZE) {
+ struct ipv6_hdr *ihdr6 = (struct ipv6_hdr *)iphdr;
+
+ rte_synproxy_build_ipv6_header(ihdr6,
+ (uint8_t *)ihdr6->dst_addr,
+ (uint8_t *)ihdr6->src_addr, tcp_header_size);
+ }
+ rte_synproxy_adjust_pkt_length(old_pkt);
+ /* compute checksums */
+ rte_synproxy_compute_checksums(iphdr, thdr);
+}
+
+/*
+ * if running synproxy and both halves of the proxied connection has been
+ * established, need adjust the seq or ack value of the packet.
+ * The value is adjusted by the difference between the spoofed server
+ * initial sequence number and the real server sequence number.
+ * In the client -> server direction, the ack must be increased by the
+ * difference before the window check.
+ * In the server -> client direction, the seq must be decreased by the
+ * difference after the window check.
+ */
+
+
+void
+rte_sp_adjust_server_seq_after_window_check(
+ struct rte_ct_cnxn_data *cd,
+ __rte_unused void *i_hdr,
+ struct tcp_hdr *thdr,
+ enum rte_ct_pkt_direction dir)
+{
+ uint32_t num_before, num_after;
+
+ if (!cd->ct_protocol.synproxy_data.cnxn_established)
+ return;
+
+ if (dir == RTE_CT_DIR_ORIGINAL)
+ return; /*wrong direction */
+
+
+ /* update appropriate number (seq or ack) in header */
+ num_before = rte_bswap32(thdr->sent_seq);
+ num_after = num_before - cd->ct_protocol.synproxy_data.seq_diff;
+ thdr->sent_seq = rte_bswap32(num_after);
+
+ rte_sp_incremental_tcp_chksum_update_32(num_before, num_after,
+ &thdr->cksum);
+}
+
+
+static void
+rte_sp_adjust_client_sack_entries(
+ struct tcp_hdr *thdr,
+ uint32_t diff)
+{
+ uint32_t num_before, num_after;
+ uint32_t *sack_ptr;
+ uint8_t sack_blk_size;
+ uint16_t dataoff_in_bytes = (thdr->data_off & 0xf0) >> 2;
+ uint16_t length = dataoff_in_bytes - sizeof(struct tcp_hdr);
+
+ if (!length)
+ return;
+
+ uint8_t *options_ptr = (uint8_t *)(thdr + 1);
+
+ while (length > 0) {
+ uint8_t opcode = *options_ptr;
+ uint8_t opsize = options_ptr[1];
+ int i;
+
+ switch (opcode) {
+
+ case RTE_CT_TCPOPT_EOL:
+ return; /* end of options */
+
+ case RTE_CT_TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
+ length--;
+ options_ptr++;
+ continue;
+
+ case RTE_CT_TCPOPT_SACK:
+ /*
+ * SACK (selective ACK) contains a block of 1 to 4
+ * entries of 8 bytes each. Each entry is a pair of
+ * 32 bit numbers. This block follows the usual 2
+ * bytes for opcode and opsize. Thus, the entire SACK
+ * option must be 10, 18, 26 or 34 bytes long.
+ */
+
+ sack_blk_size = opsize - 2;
+ /* start of entries */
+ sack_ptr = (uint32_t *)(options_ptr + 2);
+ /* count of 32 bit elements */
+ int num_acks = sack_blk_size >> 2;
+
+ if (unlikely(sack_blk_size > 32 ||
+ ((sack_blk_size & 0x3) != 0))) {
+ printf("Sack block parsing failure\n");
+ return;
+ }
+
+ for (i = 0; i < num_acks; i++) {
+ num_before = rte_bswap32(*sack_ptr);
+ num_after = num_before + diff;
+ *sack_ptr = rte_bswap32(num_after);
+ sack_ptr++;
+ rte_sp_incremental_tcp_chksum_update_32(
+ num_before,
+ num_after,
+ &thdr->cksum);
+ }
+
+ return;
+ default:
+ break;
+ }
+ if ((opsize < 2) || (opsize > length)) {
+ printf("ERROR!, opsize %i, length %i\n",
+ opsize, length);
+ return;
+ }
+
+ options_ptr += opsize;
+ length -= opsize;
+ }
+}
+
+void
+rte_sp_adjust_client_ack_before_window_check(
+ struct rte_ct_cnxn_data *cd,
+ __rte_unused void *i_hdr,
+ struct tcp_hdr *thdr,
+ enum rte_ct_pkt_direction dir)
+{
+ uint32_t num_before, num_after;
+
+ if (!cd->ct_protocol.synproxy_data.cnxn_established)
+ return;
+
+ if (dir != RTE_CT_DIR_ORIGINAL)
+ return; /*wrong direction */
+
+
+ /* first update appropriate number (seq or ack) in header */
+ num_before = rte_bswap32(thdr->recv_ack);
+ num_after = num_before + cd->ct_protocol.synproxy_data.seq_diff;
+ thdr->recv_ack = rte_bswap32(num_after);
+ rte_sp_incremental_tcp_chksum_update_32(num_before,
+ num_after, &thdr->cksum);
+
+ /* update SACK entries in header if any */
+
+ if (1) { /* TODO: check if sack permitted before calling */
+ rte_sp_adjust_client_sack_entries(thdr,
+ cd->ct_protocol.synproxy_data.seq_diff);
+ /* note that tcp hdr checksum adjusted in above sack
+ * entries routine call
+ */
+ }
+}
+
+
+
+
+/* parse the tcp header options, if any, and save interesting ones */
+static void
+rte_sp_parse_tcp_options(
+ uint8_t *options_ptr,
+ uint16_t length,
+ struct rte_synproxy_options *t_opts)
+{
+ int opsize;
+
+ t_opts->options = 0;
+
+ while (length > 0) {
+ uint8_t opcode = *options_ptr++;
+
+ if (opcode == RTE_CT_TCPOPT_EOL)
+ return;
+
+ if (opcode == RTE_CT_TCPOPT_NOP) {
+ length--;
+ continue; /* skip adjustments at loop bottom */
+ }
+
+ opsize = *options_ptr++;
+
+ if (unlikely(opsize < 2 || opsize > length)) {
+ /* TODO: Change printf to log */
+ printf("parsing error, opsize: %i, length: %i\n",
+ opsize, length);
+ return;
+ }
+
+ switch (opcode) {
+
+ case RTE_CT_TCPOPT_MSS:
+ if (opsize == RTE_CT_TCPOLEN_MSS) {
+ uint16_t *mss_ptr = (uint16_t *)options_ptr;
+
+ t_opts->mss = rte_bswap16(*mss_ptr);
+ t_opts->options |= RTE_SP_OPTIONS_MSS;
+ }
+ break;
+
+ case RTE_CT_TCPOPT_WINDOW:
+ if (opsize == RTE_CT_TCPOLEN_WINDOW) {
+ t_opts->window_scale = RTE_MIN(*options_ptr,
+ RTE_CT_MAX_TCP_WINDOW_SCALE);
+ t_opts->options |= RTE_SP_OPTIONS_WINDOW_SCALE;
+ }
+ break;
+
+ case RTE_CT_TCPOPT_TIMESTAMP:
+ if (opsize == RTE_CT_TCPOLEN_TIMESTAMP) {
+ uint32_t *ts_val_ptr = (uint32_t *)options_ptr;
+ uint32_t *ts_ecr_ptr =
+ (uint32_t *)(options_ptr + 4);
+ t_opts->ts_val = rte_bswap32(*ts_val_ptr);
+ t_opts->ts_echo_reply =
+ rte_bswap32(*ts_ecr_ptr);
+ t_opts->options |= RTE_SP_OPTIONS_TIMESTAMP;
+ }
+ break;
+
+ case RTE_CT_TCPOPT_SACK_PERM:
+ if (opsize == RTE_CT_TCPOLEN_SACK_PERM)
+ t_opts->options |= RTE_SP_OPTIONS_SACK_PERM;
+ break;
+
+ default:
+ break;
+ }
+
+ options_ptr += opsize - 2;
+ length -= opsize;
+
+ }
+}
+
+/* parse the tcp header options, if any, and save interesting ones in t_opts */
+void
+rte_sp_parse_options(struct rte_mbuf *pkt, struct rte_ct_cnxn_data *cd)
+{
+ /*uint16_t ip_hdr_length = rte_sp_get_ip_header_size(pkt);
+ * skip over IPv4 or IPv6 header
+ */
+ int ip_hdr_length = rte_ct_get_IP_hdr_size(pkt);
+ struct tcp_hdr *thdr = (struct tcp_hdr *)
+ RTE_MBUF_METADATA_UINT32_PTR(pkt, IP_START + ip_hdr_length);
+ uint8_t *opt_ptr = RTE_MBUF_METADATA_UINT8_PTR(pkt,
+ (IP_START + ip_hdr_length + sizeof(struct tcp_hdr)));
+
+ struct rte_synproxy_options *t_opts =
+ &cd->ct_protocol.synproxy_data.cnxn_options;
+ int length_in_bytes =
+ ((thdr->data_off & 0xf0) >> 2) - sizeof(struct tcp_hdr);
+
+ rte_sp_parse_tcp_options(opt_ptr, length_in_bytes, t_opts);
+ t_opts->initial_window = thdr->rx_win;
+}
+
+
+
+
+struct rte_mbuf *
+rte_ct_get_buffered_synproxy_packets(
+ struct rte_ct_cnxn_tracker *ct)
+{
+ struct rte_mbuf *trkr_list = ct->buffered_pkt_list;
+
+ ct->buffered_pkt_list = NULL;
+ return trkr_list;
+}
+
+
+
+void rte_ct_enable_synproxy(struct rte_ct_cnxn_tracker *ct)
+{
+ ct->misc_options.synproxy_enabled = 1;
+ printf("rte_ct_enable_synproxy = %d\n",
+ ct->misc_options.synproxy_enabled);
+}
+
+void rte_ct_disable_synproxy(struct rte_ct_cnxn_tracker *ct)
+{
+ ct->misc_options.synproxy_enabled = 0;
+ //printf("rte_ct_disable_synproxy = %d\n",
+ // ct->misc_options.synproxy_enabled);
+}
+
+void
+rte_ct_buffer_packet(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_ct_cnxn_data *cd,
+ struct rte_mbuf *pkt)
+{
+ /*
+ * Add packet to list of buffered packets for the connection.
+ * List is built in reverse of order received by adding to front.
+ * List will later be reversed to maintain order of arrival.
+ */
+
+ struct rte_mbuf **next = (struct rte_mbuf **)
+ RTE_MBUF_METADATA_UINT64_PTR(pkt,
+ ct->pointer_offset);
+ *next = cd->ct_protocol.synproxy_data.buffered_pkt_list;
+ cd->ct_protocol.synproxy_data.buffered_pkt_list = pkt;
+}
+
+void
+rte_ct_release_buffered_packets(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_ct_cnxn_data *cd)
+{
+ struct rte_mbuf *cnxn_list =
+ cd->ct_protocol.synproxy_data.buffered_pkt_list;
+
+ if (cnxn_list == NULL)
+ return;
+
+ cd->ct_protocol.synproxy_data.buffered_pkt_list = NULL;
+
+ struct rte_mbuf *trkr_list = ct->buffered_pkt_list;
+
+ if (trkr_list == NULL)
+ return;
+ /*
+ * walk the cnxn_list, and add to front of trkr_list, reversing order
+ * and thus restoring orginal order. Order between different
+ * connections is irrelevant.
+ */
+ while (cnxn_list != NULL) {
+ struct rte_mbuf *old_next;
+
+ struct rte_mbuf **next = (struct rte_mbuf **)
+ RTE_MBUF_METADATA_UINT64_PTR(cnxn_list,
+ ct->pointer_offset);
+
+ old_next = *next; /* save next cd packet */
+ *next = trkr_list;/* make this cd packet point to ct list */
+ trkr_list = cnxn_list;/* make the cd packet head of ct list */
+ cnxn_list = old_next; /* advance along cd list */
+ }
+ ct->buffered_pkt_list = trkr_list;
+}