summaryrefslogtreecommitdiffstats
path: root/common/VIL/conntrack
diff options
context:
space:
mode:
authorVishwesh M Rudramuni <vishweshmr@gmail.com>2017-04-18 19:41:40 +0530
committerDeepak S <deepak.s@linux.intel.com>2017-04-18 02:59:07 -0700
commit51cd08d9a3f2826088d122e2a5683315c77a2786 (patch)
tree3fac17a8f7bf362f0c77f1003615b2063d900d35 /common/VIL/conntrack
parent03aef84e240c5be8813634735d825420129f1460 (diff)
common: Adding common library for sample vnf
JIRA: SAMPLEVNF-3 This patch adds common libraries required as part of the sample vnf. This includes the following libraries 1. ACL library 2. SIP 3. FTP 4. Connection tracker 5. L2l3 stack - Interface Manager - ARP & ICMPv4 - ND & ICMPv6 and other common libraries needed for ip pipeline framework Change-Id: I117690b6b63fbcd76974cd7274518484e60980ab Signed-off-by: Vishwesh M Rudramuni <vishwesh.m.rudramuni@intel.com> [Push patch to gerrit] Signed-off-by: Deepak S <deepak.s@linux.intel.com>
Diffstat (limited to 'common/VIL/conntrack')
-rw-r--r--common/VIL/conntrack/rte_cnxn_tracking.c1804
-rw-r--r--common/VIL/conntrack/rte_cnxn_tracking.h413
-rw-r--r--common/VIL/conntrack/rte_ct_synproxy.c873
-rw-r--r--common/VIL/conntrack/rte_ct_tcp.c1116
-rw-r--r--common/VIL/conntrack/rte_ct_tcp.h484
-rw-r--r--common/VIL/conntrack/rte_ct_udp.c49
6 files changed, 4739 insertions, 0 deletions
diff --git a/common/VIL/conntrack/rte_cnxn_tracking.c b/common/VIL/conntrack/rte_cnxn_tracking.c
new file mode 100644
index 00000000..461ed422
--- /dev/null
+++ b/common/VIL/conntrack/rte_cnxn_tracking.c
@@ -0,0 +1,1804 @@
+/*
+// Copyright (c) 2017 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include <rte_ether.h>
+#include <rte_prefetch.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_timer.h>
+#include <rte_spinlock.h>
+#include "rte_cnxn_tracking.h"
+#include "rte_ct_tcp.h"
+
+#define CNXN_TRX_DEBUG 0
+#define TESTING_TIMERS 0
+#define RTE_CT_TIMER_EXPIRED_DUMP 0
+
+#define META_DATA_OFFSET 128
+#define ETHERNET_START (META_DATA_OFFSET + RTE_PKTMBUF_HEADROOM)
+#define ETH_HDR_SIZE 14
+#define IP_START (ETHERNET_START + ETH_HDR_SIZE)
+#define PROTOCOL_START (IP_START + 9)
+#define SRC_ADDR_START (IP_START + 12)
+#define TCP_START (IP_START + 20)
+
+/* IPV6 changes */
+#define PROTOCOL_START_IPV6 (IP_START + 6)
+#define SRC_ADDR_START_IPV6 (IP_START + 8)
+#define TCP_START_IPV6 (IP_START + 40)
+
+#define TCP_PROTOCOL 6
+#define UDP_PROTOCOL 17
+#define TCP_FW_IPV4_KEY_SIZE 16
+
+#define TCP_FW_IPV6_KEY_SIZE 40
+
+#define IPv4_HEADER_SIZE 20
+#define IPv6_HEADER_SIZE 40
+
+#define IP_VERSION_4 4
+#define IP_VERSION_6 6
+
+static void
+rte_ct_cnxn_tracker_batch_lookup_basic_type(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_mbuf **pkts,
+ uint64_t *pkts_mask,
+ uint64_t no_new_cnxn_mask,
+ uint64_t *reply_pkt_mask,
+ uint64_t *hijack_mask,
+ uint8_t ip_hdr_size_bytes);
+
+/*
+ * Check if the packet is valid for the given connection. "original_direction"
+ * is false if the address order need to be "flipped".See create_cnxn_hashkey().
+ * True otherwise. Return 0 if the packet is valid, or a negative otherwise.
+ */
+
+/* IP/TCP header print for debugging */
+static void
+rte_ct_cnxn_print_pkt(struct rte_mbuf *pkt, uint8_t type)
+{
+ int i;
+ uint8_t *rd = RTE_MBUF_METADATA_UINT8_PTR(pkt, IP_START);
+
+ printf("\n");
+ printf("IP and TCP/UDP headers:\n");
+
+ if (type == IP_VERSION_4) {
+ for (i = 0; i < 40; i++) {
+ printf("%02x ", rd[i]);
+ if ((i & 3) == 3)
+ printf("\n");
+ }
+ printf("\n");
+ }
+
+ if (type == IP_VERSION_6) {
+ for (i = 0; i < 60; i++) {
+ printf("%02x ", rd[i]);
+ if ((i & 3) == 3)
+ printf("\n");
+ }
+ printf("\n");
+ }
+
+}
+
+static void
+rte_cnxn_ip_type(uint8_t *type, struct rte_mbuf *pkt)
+{
+
+ int ip_hdr_size_bytes = rte_ct_get_IP_hdr_size(pkt);
+
+ if (ip_hdr_size_bytes == IPv4_HEADER_SIZE)
+ *type = IP_VERSION_4;
+
+ if (ip_hdr_size_bytes == IPv6_HEADER_SIZE)
+ *type = IP_VERSION_6;
+}
+
+static void
+rte_ct_print_hashkey(uint32_t *key)
+{
+ printf("Key: %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x \\\n",
+ key[0], key[1], key[2], key[3],
+ key[4], key[5], key[6], key[7], key[8], key[9]);
+}
+
+/*
+ * Create a hash key consisting of the source address/port, the destination
+ * address/ports, and the tcp protocol number. The address/port combos are
+ * treated as two 48 bit numbers and sorted. Thus the key is always the
+ * same regardless of the direction of the packet. Remembering if the numbers
+ * were "flipped" from the order in the packet, and comparing that to whether
+ * the original hash key was flipped, tells if this packet is from the same
+ * direction as the original sender or the response direction. Returns 1 (true)
+ * if the key was left in the original direction.
+ */
+uint8_t
+rte_ct_create_cnxn_hashkey(
+ uint32_t *src_addr,
+ uint32_t *dst_addr,
+ uint16_t src_port,
+ uint16_t dst_port,
+ uint8_t proto,
+ uint32_t *key,
+ uint8_t type)
+{
+ uint8_t hash_order_original_direction = 1;
+
+ key[9] = proto;
+
+ if (type == IP_VERSION_4) {
+ uint32_t source = *src_addr;
+ uint32_t dest = *dst_addr;
+
+ key[3] = key[4] = key[5] = key[6] = key[7] = key[8] = 0;
+
+ if ((source < dest)
+ || ((source == dest) && (src_port < dst_port))) {
+ key[0] = source;
+ key[1] = dest;
+ key[2] = (src_port << 16) | dst_port;
+ } else {
+ key[0] = dest;
+ key[1] = source;
+ key[2] = (dst_port << 16) | src_port;
+ hash_order_original_direction = 0;
+ }
+ }
+
+ if (type == IP_VERSION_6) {
+ int ip_cmp = memcmp(src_addr, dst_addr, 16);
+ uint32_t *lo_addr;
+ uint32_t *hi_addr;
+
+ if ((ip_cmp < 0) || ((ip_cmp == 0) && (src_port < dst_port))) {
+ lo_addr = src_addr;
+ hi_addr = dst_addr;
+ key[8] = (src_port << 16) | dst_port;
+ } else {
+ lo_addr = dst_addr;
+ hi_addr = src_addr;
+ key[8] = (dst_port << 16) | src_port;
+ hash_order_original_direction = 0;
+ }
+ key[0] = lo_addr[0];
+ key[1] = lo_addr[1];
+ key[2] = lo_addr[2];
+ key[3] = lo_addr[3];
+ key[4] = hi_addr[0];
+ key[5] = hi_addr[1];
+ key[6] = hi_addr[2];
+ key[7] = hi_addr[3];
+
+ }
+#ifdef ALGDBG
+ rte_ct_print_hashkey(key);
+#endif
+ return hash_order_original_direction;
+}
+
+
+int
+rte_ct_get_IP_hdr_size(struct rte_mbuf *pkt)
+{
+ /* NOTE: Only supporting IP headers with no options at this time, so
+ * header is fixed size
+ */
+ /* TODO: Need to find defined contstants for start of Ether and
+ * IP headers.
+ */
+ uint8_t hdr_chk = RTE_MBUF_METADATA_UINT8(pkt, IP_START);
+
+ hdr_chk = hdr_chk >> 4;
+
+ if (hdr_chk == IP_VERSION_4)
+ return IPv4_HEADER_SIZE;
+
+ else if (hdr_chk == IP_VERSION_6)
+ return IPv6_HEADER_SIZE;
+
+ else /* Not IPv4 header with no options, return negative. */
+ return -1;
+ /*
+ * int ip_hdr_size_bytes = (ihdr->version_ihl & IPV4_HDR_IHL_MASK) *
+ * IPV4_IHL_MULTIPLIER;
+ * return ip_hdr_size_bytes;
+ */
+}
+
+static void
+rte_ct_set_timer_for_new_cnxn(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_ct_cnxn_data *cd)
+{
+ cd->state_used_for_timer = RTE_CT_TCP_NONE;
+ rte_ct_set_cnxn_timer_for_tcp(ct, cd, RTE_CT_TCP_SYN_SENT);
+}
+
+/*
+ * The connection data is stored in a hash table which makes use of the bulk
+ * lookup optimization provided in DPDK. All of the packets seen in one call
+ * to rte_ct_cnxn_tracker_batch_lookup are done in one hash table lookup. The
+ * number of packets is the number being processed by the pipeline (default
+ * max 32, absolute max 64). For any TCP or UDP packet that does not have
+ * an existing (pseudo-)connection in the table (i.e. was a miss on the hash
+ * lookup), a new connection must be added.
+ *
+ * It is possible, for UDP, that the first packet for a (pseudo-)connection and
+ * a subsequent packet are in the same batch. This means that when looking for
+ * new connections in a batch the first one must add the connection, the
+ * second and subsequent (in that batch) that are part of the same connection
+ * must use that newly created one, not create another table entry.
+ *
+ * Any newly created entries are "remembered" in linear table, which is search
+ * when processing hash tables misses. All the entries in that table are
+ * "forgotten" at the start of a new batch.
+ *
+ * A linear table may seem slow, but consider:
+ * - out of millions of packets/second, this involves at most 64.
+ * - this affects only UDP. TCP connections are set up using an acknowledgement
+ * protocl, so would not have multiple packets for new connection in
+ * same batch (TODO)
+ * - the number of new connections in a batch would usually be zero, or a low
+ * number like 1
+ * - all the data to search through should still be in cache
+ */
+
+static inline void
+rte_ct_remember_new_connection(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_ct_cnxn_data *entry)
+{
+ ct->latest_connection++;
+ ct->new_connections[ct->latest_connection] = entry;
+}
+
+static struct rte_ct_cnxn_data *
+rte_ct_search_new_connections(struct rte_ct_cnxn_tracker *ct, uint32_t *key)
+{
+ int i;
+
+ for (i = 0; i <= ct->latest_connection; i++) {
+ uint32_t *cnxn_key = ct->new_connections[i]->key;
+ int key_cmp = memcmp(cnxn_key, key,
+ sizeof(ct->new_connections[i]->key));
+
+ if (key_cmp == 0)
+ return ct->new_connections[i];
+ }
+ return NULL;
+}
+
+static inline void rte_ct_forget_new_connections(struct rte_ct_cnxn_tracker *ct)
+{
+ ct->latest_connection = -1;
+}
+
+
+
+
+static enum rte_ct_packet_action
+rte_ct_handle_tcp_lookup(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_mbuf *packet,
+ uint8_t pkt_num,
+ uint8_t key_is_client_order,
+ uint32_t *key,
+ int hash_table_entry,
+ int no_new_cnxn,
+ uint8_t ip_hdr_size_bytes)
+{
+ struct rte_ct_cnxn_data new_cnxn_data;
+
+ memset(&new_cnxn_data, 0, sizeof(struct rte_ct_cnxn_data));
+ enum rte_ct_packet_action packet_action;
+
+ #ifdef CT_CGNAT
+ int32_t position = hash_table_entry;
+ ct->positions[pkt_num] = position;
+ #endif
+
+ /* rte_ct_cnxn_print_pkt(packet); */
+ if (hash_table_entry >= 0) {
+ /*
+ * connection found for this packet.
+ * Check that this is a valid packet for connection
+ */
+
+ struct rte_ct_cnxn_data *entry =
+ &ct->hash_table_entries[hash_table_entry];
+
+ packet_action = rte_ct_verify_tcp_packet(ct, entry, packet,
+ key_is_client_order, ip_hdr_size_bytes);
+
+ switch (packet_action) {
+
+ case RTE_CT_FORWARD_PACKET:
+ entry->counters.packets_forwarded++;
+ break;
+
+ case RTE_CT_DROP_PACKET:
+ entry->counters.packets_dropped++;
+ return RTE_CT_DROP_PACKET;
+
+ case RTE_CT_REOPEN_CNXN_AND_FORWARD_PACKET:
+ /* Entry already in hash table, just re-initialize */
+
+ /* Don't use syproxy on re-init, since it
+ * is a valid connection
+ */
+
+ if (rte_ct_tcp_new_connection(ct, &new_cnxn_data,
+ packet, 0, ip_hdr_size_bytes) !=
+ RTE_CT_DROP_PACKET) {
+ rte_memcpy(&entry->ct_protocol.tcp_ct_data,
+ &new_cnxn_data.ct_protocol.tcp_ct_data,
+ sizeof(new_cnxn_data.ct_protocol.tcp_ct_data));
+ rte_ct_set_timer_for_new_cnxn(ct, entry);
+ if (ct->counters->sessions_reactivated > 0)
+ ct->counters->sessions_reactivated--;
+ }
+
+ break;
+
+ case RTE_CT_SEND_SERVER_SYN:
+ ct->counters->pkts_forwarded++;
+ /* packet modified, send back to original source */
+ return RTE_CT_SEND_SERVER_SYN;
+
+ case RTE_CT_SEND_SERVER_ACK:
+ ct->counters->pkts_forwarded++;
+ /* packet modified, send back to original source */
+ return RTE_CT_SEND_SERVER_ACK;
+
+ case RTE_CT_HIJACK:
+ ct->counters->pkts_forwarded++;
+ /* packet saved with connection, notify VNF
+ * to hijack it
+ */
+ return RTE_CT_HIJACK;
+
+ case RTE_CT_DESTROY_CNXN_AND_FORWARD_PACKET:
+
+ /*
+ * Forward the packet because it is "legal", but destroy
+ * the connection by removing it from the hash table and
+ * cancelling any timer. There is a remote possibility
+ * (perhaps impossible?) that a later packet in the same
+ * batch is for this connection. Due to the batch
+ * lookup, which has already happened, the later packet
+ * thinks that the connection is valid. This might cause
+ * a timer to be set. Eventually, it would time out so
+ * the only bug case occurs if the hash table also, in
+ * the same batch, allocates this entry for a new
+ * connection before the above packet is received. The
+ * chances of this happening seem impossibly small but
+ * this case should perhaps be investigated further.
+ */
+
+ if (rte_hash_del_key(ct->rhash, entry->key) >= 0) {
+ /*
+ * if rte_hash_del_key >= 0, then the connection
+ * was found in the hash table and removed.
+ * Counters must be updated, and the timer
+ * cancelled. If the result was < 0, then the
+ * connection must have already been deleted,
+ * and it must have been deleted in this batch
+ * of packets processed. Do nothing.
+ */
+
+ ct->counters->sessions_closed++;
+ if (ct->counters->current_active_sessions > 0)
+ ct->counters->current_active_sessions--;
+ rte_ct_cancel_cnxn_timer(entry);
+ }
+ entry->counters.packets_forwarded++;
+ break;
+
+ default:
+ break;
+ }
+ } else {
+ /* try to add new connection */
+ struct rte_ct_cnxn_data *new_hash_entry;
+
+ if (no_new_cnxn) {
+ ct->counters->pkts_drop_invalid_conn++;
+ return RTE_CT_DROP_PACKET;
+ }
+
+ packet_action = rte_ct_tcp_new_connection(ct, &new_cnxn_data,
+ packet, ct->misc_options.synproxy_enabled,
+ ip_hdr_size_bytes);
+
+ if (unlikely(packet_action == RTE_CT_DROP_PACKET)) {
+ ct->counters->pkts_drop_invalid_conn++;
+ return RTE_CT_DROP_PACKET;
+ }
+
+ /* This packet creates a connection . */
+ int32_t position = rte_hash_add_key(ct->rhash, key);
+ if (position < 0) {
+ printf
+ ("Failed to add new connection to hash table %d, pkt_num:%d\n",
+ position, pkt_num);
+ return RTE_CT_DROP_PACKET;
+ }
+ #ifdef CT_CGNAT
+ ct->positions[pkt_num] = position;
+ #endif
+ new_hash_entry = &ct->hash_table_entries[position];
+
+ /* update fields in new_cnxn_data not set by new_connection */
+
+ memcpy(new_cnxn_data.key, key, sizeof(new_cnxn_data.key));
+ new_cnxn_data.key_is_client_order = key_is_client_order;
+ new_cnxn_data.protocol = TCP_PROTOCOL;
+ rte_cnxn_ip_type(&new_cnxn_data.type, packet);
+ rte_memcpy(new_hash_entry, &new_cnxn_data,
+ sizeof(struct rte_ct_cnxn_data));
+ new_hash_entry->counters.packets_forwarded = 1;
+ new_hash_entry->counters.packets_dropped = 0;
+ ct->counters->current_active_sessions++;
+ ct->counters->sessions_activated++;
+
+ if (packet_action == RTE_CT_SEND_CLIENT_SYNACK) {
+ /* this is a synproxied connecton */
+ /* must remember mss, window scaling etc. from client */
+
+ rte_sp_parse_options(packet, new_hash_entry);
+
+ /*
+ * update packet to a SYN/ACK directed to the client,
+ * including default header options
+ */
+
+ rte_sp_cvt_to_spoofed_client_synack(new_hash_entry,
+ packet);
+
+ /*
+ * run updated packet through connection tracking so
+ * cnxn data updated appropriately and timer set for syn
+ * received state, not syn sent.
+ */
+ packet_action = rte_ct_verify_tcp_packet(ct,
+ new_hash_entry, packet,
+ !key_is_client_order,
+ ip_hdr_size_bytes);
+
+ if (unlikely(packet_action != RTE_CT_FORWARD_PACKET)) {
+ /* should never get here */
+ printf("Serious error in synproxy generating ");
+ printf("SYN/ACK\n");
+ return RTE_CT_DROP_PACKET;
+ }
+ ct->counters->pkts_forwarded++;
+ /* spoofed packet good to go */
+ return RTE_CT_SEND_CLIENT_SYNACK;
+ }
+ rte_ct_set_timer_for_new_cnxn(ct, new_hash_entry);
+
+ }
+
+ /* TODO: is it possible that earlier packet in this batch caused new
+ * entry to be added for the connection? Seems unlikely, since it
+ * would require multiple packets from the same side of the connection
+ * one after another immediately, and the TCP connection OPEN requires
+ * acknowledgement before further packets. What about simultaneous
+ * OPEN? Only if both sides are on same input port. Is that possible?
+ */
+ /* if made it here, packet will be forwarded */
+ ct->counters->pkts_forwarded++;
+ return RTE_CT_FORWARD_PACKET;
+}
+
+static uint64_t
+rte_ct_cnxn_tracker_batch_lookup_basic(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_mbuf **pkts,
+ uint64_t pkts_mask,
+ uint64_t no_new_cnxn_mask,
+ uint64_t *reply_pkt_mask,
+ uint64_t *hijack_mask)
+{
+ /* bitmap of packets left to process */
+ uint64_t pkts_to_process = pkts_mask;
+ /* bitmap of valid packets to return */
+ uint64_t valid_packets = pkts_mask;
+ uint8_t compacting_map[RTE_HASH_LOOKUP_BULK_MAX];
+ /* for pkt, key in originators direction? */
+ uint8_t key_orig_dir[RTE_HASH_LOOKUP_BULK_MAX];
+ uint32_t packets_for_lookup = 0;
+ int32_t positions[RTE_HASH_LOOKUP_BULK_MAX];
+ uint32_t i;
+ struct rte_ct_cnxn_data new_cnxn_data;
+
+ if (CNXN_TRX_DEBUG > 1) {
+ printf("Enter cnxn tracker %p", ct);
+ printf(" synproxy batch lookup with packet mask %p\n",
+ (void *)pkts_mask);
+ }
+
+ rte_ct_forget_new_connections(ct);
+ *reply_pkt_mask = 0;
+ *hijack_mask = 0;
+
+ /*
+ * Use bulk lookup into hash table for performance reasons. Cannot have
+ * "empty slots" in the bulk lookup,so need to create a compacted table.
+ */
+
+ for (; pkts_to_process;) {
+ uint8_t pos = (uint8_t) __builtin_ctzll(pkts_to_process);
+ /* bitmask representing only this packet */
+ uint64_t pkt_mask = 1LLU << pos;
+ /* remove this packet from remaining list */
+ pkts_to_process &= ~pkt_mask;
+
+ struct rte_mbuf *pkt = pkts[pos];
+
+ int ip_hdr_size_bytes = rte_ct_get_IP_hdr_size(pkt);
+
+ if (unlikely(ip_hdr_size_bytes < 0)) {
+ /* Not IPv4, ignore. */
+ continue;
+ }
+
+ void *ip_hdr = RTE_MBUF_METADATA_UINT32_PTR(pkt, IP_START);
+
+ /* TCP and UDP ports at same offset, just use TCP for
+ * offset calculation
+ */
+ struct tcp_hdr *thdr =
+ (struct tcp_hdr *)RTE_MBUF_METADATA_UINT32_PTR(pkt,
+ (IP_START + ip_hdr_size_bytes));
+ uint16_t src_port = rte_bswap16(thdr->src_port);
+ uint16_t dst_port = rte_bswap16(thdr->dst_port);
+
+ if (ip_hdr_size_bytes == IPv4_HEADER_SIZE) {
+ struct ipv4_hdr *ihdr = (struct ipv4_hdr *)ip_hdr;
+ uint8_t proto = ihdr->next_proto_id;
+
+ if (!(proto == TCP_PROTOCOL || proto == UDP_PROTOCOL)) {
+ /* only tracking TCP and UDP at this time */
+ continue;
+ }
+
+ /*
+ * Load the addresses and ports, and convert from Intel
+ * to network byte order. Strictly speaking, it is not
+ * necessary to do this conversion, as this data is only
+ * used to create a hash key.
+ */
+ uint32_t src_addr = rte_bswap32(ihdr->src_addr);
+ uint32_t dst_addr = rte_bswap32(ihdr->dst_addr);
+
+ if (CNXN_TRX_DEBUG > 2) {
+ if (CNXN_TRX_DEBUG > 4)
+ rte_ct_cnxn_print_pkt(pkt,
+ IP_VERSION_4);
+ }
+ /* need to create compacted table of pointers to pass
+ * to bulk lookup
+ */
+
+ compacting_map[packets_for_lookup] = pos;
+ key_orig_dir[packets_for_lookup] =
+ rte_ct_create_cnxn_hashkey(&src_addr, &dst_addr,
+ src_port, dst_port,
+ proto,
+ &ct->hash_keys
+ [packets_for_lookup][0],
+ IP_VERSION_4);
+ packets_for_lookup++;
+ }
+
+ if (ip_hdr_size_bytes == IPv6_HEADER_SIZE) {
+ struct ipv6_hdr *ihdr = (struct ipv6_hdr *)ip_hdr;
+ uint8_t proto = ihdr->proto;
+
+ if (!(proto == TCP_PROTOCOL || proto == UDP_PROTOCOL)) {
+ /* only tracking TCP and UDP at this time */
+ continue;
+ }
+
+ if (CNXN_TRX_DEBUG > 2) {
+ if (CNXN_TRX_DEBUG > 4)
+ rte_ct_cnxn_print_pkt(pkt,
+ IP_VERSION_6);
+ }
+
+ /* need to create compacted table of pointers to pass
+ * to bulk lookup
+ */
+
+ compacting_map[packets_for_lookup] = pos;
+ key_orig_dir[packets_for_lookup] =
+ rte_ct_create_cnxn_hashkey(
+ (uint32_t *) ihdr->src_addr,
+ (uint32_t *) ihdr->dst_addr,
+ src_port, dst_port,
+ proto,
+ &ct->hash_keys
+ [packets_for_lookup][0],
+ IP_VERSION_6);
+ packets_for_lookup++;
+ }
+
+ }
+
+ if (unlikely(packets_for_lookup == 0))
+ return valid_packets; /* no suitable packet for lookup */
+
+ /* Clear all the data to make sure no stack garbage is in it */
+ memset(&new_cnxn_data, 0, sizeof(struct rte_ct_cnxn_data));
+
+ /* lookup all tcp & udp packets in the connection table */
+
+ int lookup_result =
+ rte_hash_lookup_bulk(ct->rhash, (const void **)&ct->hash_key_ptrs,
+ packets_for_lookup, &positions[0]);
+
+ if (unlikely(lookup_result < 0)) {
+ /* TODO: change a log */
+ printf("Unexpected hash table problem, discarding all packets");
+ return 0; /* unknown error, just discard all packets */
+ }
+#ifdef ALGDBG
+ for (i = 0; i < packets_for_lookup; i++) {
+ if (positions[i] >= 0)
+ printf("@CT positions[i]= %d, compacting_map[i]= %d\n",
+ positions[i], compacting_map[i]);
+ }
+#endif
+ for (i = 0; i < packets_for_lookup; i++) {
+ /* index into hash table entries */
+ int hash_table_entry = positions[i];
+ /* index into packet table of this packet */
+ uint8_t pkt_index = compacting_map[i];
+ /* bitmask representing only this packet */
+ uint64_t pkt_mask = 1LLU << pkt_index;
+ uint8_t key_is_client_order = key_orig_dir[i];
+ uint32_t *key = ct->hash_key_ptrs[pkt_index];
+ uint8_t protocol = *(key + 9);
+ struct rte_mbuf *packet = pkts[pkt_index];
+ int no_new_cnxn = (pkt_mask & no_new_cnxn_mask) != 0;
+
+ /* rte_ct_print_hashkey(key); */
+
+ if (protocol == TCP_PROTOCOL) {
+ enum rte_ct_packet_action tcp_pkt_action;
+
+ int ip_hdr_size_bytes = rte_ct_get_IP_hdr_size(packet);
+ tcp_pkt_action = rte_ct_handle_tcp_lookup(ct, packet,
+ pkt_index, key_is_client_order,
+ key, hash_table_entry, no_new_cnxn,
+ ip_hdr_size_bytes);
+
+ switch (tcp_pkt_action) {
+
+ case RTE_CT_SEND_CLIENT_SYNACK:
+ case RTE_CT_SEND_SERVER_ACK:
+ /* altered packet or copy must be returned
+ * to originator
+ */
+ *reply_pkt_mask |= pkt_mask;
+ /* FALL-THROUGH */
+
+ case RTE_CT_SEND_SERVER_SYN:
+ case RTE_CT_FORWARD_PACKET:
+ break;
+
+ case RTE_CT_HIJACK:
+ *hijack_mask |= pkt_mask;
+ break;
+
+ default:
+ /* bad packet, clear mask to drop */
+ valid_packets ^= pkt_mask;
+ ct->counters->pkts_drop++;
+ break;
+ }
+
+ /* rte_ct_cnxn_print_pkt(pkts[pkt_index]); */
+ } else { /* UDP entry */
+
+ if (hash_table_entry >= 0) {
+ /*
+ * connection found for this packet. Check that
+ * this is a valid packet for connection
+ */
+
+ struct rte_ct_cnxn_data *entry =
+ &ct->hash_table_entries[hash_table_entry];
+
+ if (rte_ct_udp_packet
+ (ct, entry, pkts[pkt_index],
+ key_is_client_order)) {
+ entry->counters.packets_forwarded++;
+ ct->counters->pkts_forwarded++;
+ }
+ } else {
+ /*
+ * connection not found in bulk hash lookup,
+ * but might have been added in this batch
+ */
+
+ struct rte_ct_cnxn_data *recent_entry =
+ rte_ct_search_new_connections(ct, key);
+
+ if (recent_entry != NULL) {
+ if (rte_ct_udp_packet(ct, recent_entry,
+ pkts[pkt_index],
+ key_is_client_order)) {
+ recent_entry->counters.
+ packets_forwarded++;
+ ct->counters->pkts_forwarded++;
+ }
+ } else {
+ /* no existing connection, try to add
+ * new one
+ */
+
+ if (no_new_cnxn) {
+ /* new cnxn not allowed, clear
+ * mask to drop
+ */
+ valid_packets ^= pkt_mask;
+ ct->counters->pkts_drop++;
+ ct->counters->
+ pkts_drop_invalid_conn++;
+ continue;
+ }
+
+ if (rte_ct_udp_new_connection(ct,
+ &new_cnxn_data,
+ pkts[pkt_index])) {
+ /* This packet creates a
+ * connection .
+ */
+ int32_t position =
+ rte_hash_add_key(
+ ct->rhash, key);
+
+ if (position < 0)
+ continue;
+
+ struct rte_ct_cnxn_data
+ *new_hash_entry = &ct->
+ hash_table_entries[position];
+
+ /*
+ *update fields in new_cnxn_data
+ * not set by "new_connection"
+ */
+
+ memcpy(new_cnxn_data.key, key,
+ sizeof(new_cnxn_data.key));
+
+ new_cnxn_data.
+ key_is_client_order
+ = key_is_client_order;
+ new_cnxn_data.protocol =
+ UDP_PROTOCOL;
+ rte_cnxn_ip_type(
+ &new_cnxn_data.type,
+ packet);
+ rte_memcpy(new_hash_entry,
+ &new_cnxn_data,
+ sizeof(struct
+ rte_ct_cnxn_data));
+
+ new_hash_entry->counters.
+ packets_forwarded = 1;
+ ct->counters->pkts_forwarded++;
+ new_hash_entry->counters.
+ packets_dropped = 0;
+ ct->counters->pkts_drop = 0;
+ ct->counters->
+ current_active_sessions++;
+ ct->counters->
+ sessions_activated++;
+
+ new_hash_entry->
+ state_used_for_timer
+ = RTE_CT_UDP_NONE;
+ rte_ct_set_cnxn_timer_for_udp(
+ ct,
+ new_hash_entry,
+ RTE_CT_UDP_UNREPLIED);
+
+ rte_ct_remember_new_connection(
+ ct,
+ new_hash_entry);
+ }
+ }
+
+ }
+
+ } /* UDP */
+ } /* packets_for_lookup */
+
+ if (CNXN_TRX_DEBUG > 1) {
+ printf("Exit cnxn tracker synproxy batch lookup with");
+ printf(" packet mask %p\n", (void *)valid_packets);
+ }
+
+ return valid_packets;
+}
+
+uint64_t
+rte_ct_cnxn_tracker_batch_lookup_with_synproxy(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_mbuf **pkts,
+ uint64_t pkts_mask,
+ struct rte_synproxy_helper *sp_helper)
+{
+ return rte_ct_cnxn_tracker_batch_lookup_basic(ct, pkts, pkts_mask, 0,
+ &sp_helper->reply_pkt_mask, &sp_helper->hijack_mask);
+}
+#ifdef CT_CGNAT
+uint64_t cgnapt_ct_process(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_mbuf **pkts,
+ uint64_t pkts_mask,
+ struct rte_CT_helper *ct_helper)
+{
+/* to disable SynProxy for CGNAT */
+ rte_ct_disable_synproxy(ct);
+ return rte_ct_cnxn_tracker_batch_lookup_basic(ct, pkts, pkts_mask,
+ ct_helper->no_new_cnxn_mask,
+ &ct_helper->reply_pkt_mask,
+ &ct_helper->hijack_mask);
+}
+#endif/*CT-CGNAT*/
+uint64_t
+rte_ct_cnxn_tracker_batch_lookup(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_mbuf **pkts,
+ uint64_t pkts_mask,
+ struct rte_CT_helper *ct_helper)
+{
+
+ return rte_ct_cnxn_tracker_batch_lookup_basic(ct, pkts, pkts_mask,
+ ct_helper->no_new_cnxn_mask,
+ &ct_helper->reply_pkt_mask, &ct_helper->hijack_mask);
+}
+
+
+void rte_ct_cnxn_tracker_batch_lookup_type(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_mbuf **pkts,
+ uint64_t *pkts_mask,
+ struct rte_CT_helper *ct_helper,
+ uint8_t ip_hdr_size_bytes)
+{
+
+ rte_ct_cnxn_tracker_batch_lookup_basic_type(ct, pkts, pkts_mask,
+ ct_helper->no_new_cnxn_mask,
+ &ct_helper->reply_pkt_mask, &ct_helper->hijack_mask,
+ ip_hdr_size_bytes);
+}
+
+
+
+uint64_t
+rte_ct_cnxn_tracker_batch_lookup_with_new_cnxn_control(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_mbuf **pkts,
+ uint64_t pkts_mask,
+ uint64_t no_new_cnxn_mask)
+{
+ uint64_t dont_care;
+
+ return rte_ct_cnxn_tracker_batch_lookup_basic(ct, pkts, pkts_mask,
+ no_new_cnxn_mask,
+ &dont_care, &dont_care);
+}
+
+
+int
+rte_ct_initialize_default_timeouts(struct rte_ct_cnxn_tracker *new_cnxn_tracker)
+{
+
+ /* timer system init */
+
+ uint64_t hertz = rte_get_tsc_hz();
+
+ new_cnxn_tracker->hertz = hertz;
+ new_cnxn_tracker->timing_cycles_per_timing_step = hertz / 10;
+ new_cnxn_tracker->timing_100ms_steps_previous = 0;
+ new_cnxn_tracker->timing_100ms_steps = 0;
+ new_cnxn_tracker->timing_last_time = rte_get_tsc_cycles();
+
+ /* timeouts in seconds */
+ new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
+ [RTE_CT_TCP_SYN_SENT] = 120 * hertz;
+ new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
+ [RTE_CT_TCP_SYN_RECV] = 60 * hertz;
+ /* 5 * DAYS */
+ new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
+ [RTE_CT_TCP_ESTABLISHED] = 60 * 60 * 24 * 5 * hertz;
+
+ new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
+ [RTE_CT_TCP_FIN_WAIT] = 120 * hertz;
+ new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
+ [RTE_CT_TCP_CLOSE_WAIT] = 60 * hertz;
+ new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
+ [RTE_CT_TCP_LAST_ACK] = 30 * hertz;
+ new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
+ [RTE_CT_TCP_TIME_WAIT] = 120 * hertz;
+ new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
+ [RTE_CT_TCP_CLOSE] = 10 * hertz;
+ new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
+ [RTE_CT_TCP_SYN_SENT_2] = 120 * hertz;
+ new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
+ [RTE_CT_TCP_RETRANS] = 300 * hertz;
+ new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
+ [RTE_CT_TCP_UNACK] = 300 * hertz;
+
+ new_cnxn_tracker->ct_timeout.udptimeout.udp_timeouts
+ [RTE_CT_UDP_UNREPLIED] = 30 * hertz;
+ new_cnxn_tracker->ct_timeout.udptimeout.udp_timeouts
+ [RTE_CT_UDP_REPLIED] = 180 * hertz;
+ /* miscellaneous init */
+ new_cnxn_tracker->misc_options.tcp_max_retrans =
+ RTE_CT_TCP_MAX_RETRANS;
+ new_cnxn_tracker->misc_options.tcp_loose = 0;
+ new_cnxn_tracker->misc_options.tcp_be_liberal = 0;
+#ifdef CT_CGNAT
+ int i;
+ for (i=0; i < RTE_HASH_LOOKUP_BULK_MAX ;i ++ )
+ new_cnxn_tracker->positions[i] = -1;
+#endif
+
+ return 0;
+}
+
+struct rte_CT_counter_block rte_CT_counter_table[MAX_CT_INSTANCES]
+__rte_cache_aligned;
+int rte_CT_hi_counter_block_in_use = -1;
+
+int
+rte_ct_initialize_cnxn_tracker_with_synproxy(
+ struct rte_ct_cnxn_tracker *new_cnxn_tracker,
+ uint32_t max_connection_count,
+ char *name,
+ uint16_t pointer_offset)
+{
+ uint32_t i;
+ uint32_t size;
+ struct rte_CT_counter_block *counter_ptr;
+ /*
+ * TODO: Should number of entries be something like
+ * max_connection_count * 1.1 to allow for unused space
+ * and thus increased performance of hash table, at a cost of memory???
+ */
+
+ new_cnxn_tracker->pointer_offset = pointer_offset;
+
+ memset(new_cnxn_tracker->name, '\0', sizeof(new_cnxn_tracker->name));
+ strncpy(new_cnxn_tracker->name, name, strlen(new_cnxn_tracker->name));
+ //strcpy(new_cnxn_tracker->name, name);
+ /* + (max_connection_count >> 3); */
+ uint32_t number_of_entries = max_connection_count;
+
+ size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_ct_cnxn_data) *
+ number_of_entries);
+ new_cnxn_tracker->hash_table_entries =
+ rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE);
+ if (new_cnxn_tracker->hash_table_entries == NULL) {
+ printf(" Not enough memory, or invalid arguments\n");
+ return -1;
+ }
+ new_cnxn_tracker->num_cnxn_entries = number_of_entries;
+
+ /* initialize all timers */
+
+ for (i = 0; i < number_of_entries; i++)
+ rte_timer_init(&new_cnxn_tracker->hash_table_entries[i].timer);
+
+ /* pointers for temp storage used during bulk hash */
+ for (i = 0; i < RTE_HASH_LOOKUP_BULK_MAX; i++)
+ new_cnxn_tracker->hash_key_ptrs[i] =
+ &new_cnxn_tracker->hash_keys[i][0];
+
+ /*
+ * Now allocate a counter block entry.It appears that the initialization
+ * of these threads is serialized on core 0 so no lock is necessary
+ */
+
+ if (rte_CT_hi_counter_block_in_use == MAX_CT_INSTANCES)
+ return -1;
+
+ rte_CT_hi_counter_block_in_use++;
+ counter_ptr = &rte_CT_counter_table[rte_CT_hi_counter_block_in_use];
+
+ new_cnxn_tracker->counters = counter_ptr;
+
+ /* set up hash table parameters, then create hash table */
+ struct rte_hash_parameters rhash_parms = {
+ .name = name,
+ .entries = number_of_entries,
+ .hash_func = NULL, /* use default hash */
+ .key_len = 40,
+ .hash_func_init_val = 0,
+ .socket_id = rte_socket_id(),
+ .extra_flag = 1 /*This is needed for TSX memory*/
+ };
+
+ new_cnxn_tracker->rhash = rte_hash_create(&rhash_parms);
+
+ return 0;
+}
+
+int
+rte_ct_initialize_cnxn_tracker(
+ struct rte_ct_cnxn_tracker *new_cnxn_tracker,
+ uint32_t max_connection_count,
+ char *name)
+{
+ return rte_ct_initialize_cnxn_tracker_with_synproxy(new_cnxn_tracker,
+ max_connection_count, name, 0);
+}
+
+int
+rte_ct_free_cnxn_tracker_resources(struct rte_ct_cnxn_tracker *old_cnxn_tracker)
+{
+ rte_free(old_cnxn_tracker->hash_table_entries);
+ rte_hash_free(old_cnxn_tracker->rhash);
+ return 0;
+}
+
+int
+rte_ct_get_cnxn_tracker_size(void)
+{
+ return sizeof(struct rte_ct_cnxn_tracker);
+}
+
+void
+rte_ct_cnxn_timer_expired(struct rte_timer *rt, void *arg);
+
+static void
+rte_ct_set_cnxn_timer(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_ct_cnxn_data *cd,
+ uint64_t ticks_until_timeout)
+{
+ /*
+ * pointer to cnxn_data will be stored in timer system as pointer to
+ * rte_timer for later cast back to cnxn_data during timeout handling
+ */
+
+ struct rte_timer *rt = (struct rte_timer *)cd;
+ #ifdef CT_CGNAT
+ /* execute timeout on timer core */
+ uint32_t core_id = get_timer_core_id();
+ #else
+ /* execute timeout on current core */
+ uint32_t core_id = rte_lcore_id();
+ #endif
+ /* safe to reset since timeouts handled synchronously
+ * by rte_timer_manage
+ */
+ int success = rte_timer_reset(rt, ticks_until_timeout, SINGLE, core_id,
+ rte_ct_cnxn_timer_expired, ct);
+
+ if (success < 0) {
+ /* TODO: Change to log, perhaps something else?
+ * This should not happen
+ */
+ printf("CNXN_TRACKER: Failed to set connection timer.\n");
+ }
+}
+
+/*
+ * For the given connection, set a timeout based on the given state. If the
+* timer is already set, this call will reset the timer with a new value.
+ */
+
+void
+rte_ct_set_cnxn_timer_for_tcp(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_ct_cnxn_data *cd,
+ uint8_t tcp_state)
+{
+
+ cd->expected_timeout =
+ (ct->timing_100ms_steps * ct->timing_cycles_per_timing_step) +
+ ct->ct_timeout.tcptimeout.tcp_timeouts[tcp_state];
+
+ if (tcp_state == cd->state_used_for_timer) {
+ /*
+ * Don't reset timer, too expensive. Instead, determine time
+ * elapsed since start of timer. When this timer expires, the
+ * timer will be reset to the elapsed timer. So if in a state
+ * with a 5 minute timer last sees a packet 4 minutes into the
+ * timer, the timer when expires will be reset to 4 minutes.
+ * This means the timer will then expire 5 minutes after
+ * the last packet.
+ */
+ return;
+ }
+
+ if (TESTING_TIMERS)
+ printf("Set Timer for connection %p and state %s\n", cd,
+ rte_ct_tcp_names[tcp_state]);
+
+ rte_ct_set_cnxn_timer(ct, cd,
+ ct->ct_timeout.
+ tcptimeout.tcp_timeouts[tcp_state]);
+ cd->state_used_for_timer = tcp_state;
+}
+
+/*
+ * For the given connection, set a timeout based on the given state.
+ * If the timer is already set,
+ * this call will reset the timer with a new value.
+ */
+
+void
+rte_ct_set_cnxn_timer_for_udp(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_ct_cnxn_data *cd,
+ uint8_t udp_state)
+{
+
+ cd->expected_timeout = (ct->timing_cycles_per_timing_step) +
+ ct->ct_timeout.udptimeout.udp_timeouts[udp_state];
+
+ if (udp_state == cd->state_used_for_timer) {
+ /*
+ * Don't reset timer, too expensive. Instead, determine time
+ * elapsed since start of timer. When this timer expires, the
+ * timer will be reset to the elapsed timer. So if in a state
+ * with a 5 minute timer last sees a packet 4 minutes into the
+ * timer, the timer when expires will be reset to 4 minutes.
+ * This means the timer will then
+ * expire 5 minutes after the last packet.
+ */
+ return;
+ }
+
+ if (TESTING_TIMERS)
+ printf("Set Timer for connection %p and state %s\n", cd,
+ rte_ct_udp_names[udp_state]);
+ rte_ct_set_cnxn_timer(ct, cd,
+ ct->ct_timeout.
+ udptimeout.udp_timeouts[udp_state]);
+ cd->state_used_for_timer = udp_state;
+}
+
+/* Cancel the timer associated with the connection.
+ * Safe to call if no timer set.
+ */
+ void
+rte_ct_cancel_cnxn_timer(struct rte_ct_cnxn_data *cd)
+{
+ if (TESTING_TIMERS)
+ printf("Cancel Timer\n");
+
+ rte_timer_stop(&cd->timer);
+}
+
+void
+rte_ct_handle_expired_timers(struct rte_ct_cnxn_tracker *ct)
+{
+ /*
+ * If current time (in 100 ms increments) is different from the
+ * time it was last viewed, then check for and process expired timers.
+ */
+
+ uint64_t new_time = rte_get_tsc_cycles();
+ uint64_t time_diff = new_time - ct->timing_last_time;
+
+ if (time_diff >= ct->timing_cycles_per_timing_step) {
+ ct->timing_last_time = new_time;
+ ct->timing_100ms_steps++;
+ }
+
+ if (ct->timing_100ms_steps != ct->timing_100ms_steps_previous) {
+ rte_timer_manage();
+ ct->timing_100ms_steps_previous = ct->timing_100ms_steps;
+ }
+}
+
+/* timer has expired. Need to delete connection entry */
+
+void
+rte_ct_cnxn_timer_expired(struct rte_timer *rt, void *arg)
+{
+ /* the pointer to the rte_timer was actually a pointer
+ * to the cnxn data
+ */
+ struct rte_ct_cnxn_data *cd = (struct rte_ct_cnxn_data *)rt;
+ struct rte_ct_cnxn_tracker *ct = (struct rte_ct_cnxn_tracker *)arg;
+ int success = 0;
+
+ /*
+ * Check to see if the timer has "really" expired. If traffic occured
+ * since the timer was set, the timer needs be extended, so that timer
+ * expires the appropriate amount after that last packet.
+ */
+
+ uint64_t current_time = ct->timing_100ms_steps *
+ ct->timing_cycles_per_timing_step;
+
+ if (cd->expected_timeout >= current_time) {
+ uint64_t time_diff = cd->expected_timeout - current_time;
+
+ rte_ct_set_cnxn_timer(ct, cd, time_diff);
+ return;
+ }
+
+ if (cd->protocol == TCP_PROTOCOL) {
+ if (cd->state_used_for_timer == RTE_CT_TCP_TIME_WAIT ||
+ cd->state_used_for_timer == RTE_CT_TCP_CLOSE)
+ ct->counters->sessions_closed++;
+ else
+ ct->counters->sessions_timedout++;
+ /* if synproxied connection, free list of buffered
+ * packets if any
+ */
+
+ if (cd->ct_protocol.synproxy_data.synproxied)
+ rte_ct_release_buffered_packets(ct, cd);
+
+ } else if (cd->protocol == UDP_PROTOCOL)
+ ct->counters->sessions_closed++;
+ if (ct->counters->current_active_sessions > 0)
+ ct->counters->current_active_sessions--;
+
+ if (RTE_CT_TIMER_EXPIRED_DUMP) {
+ uint64_t percent = (cd->counters.packets_dropped * 10000) /
+ (cd->counters.packets_forwarded +
+ cd->counters.packets_dropped);
+
+ if (cd->protocol == TCP_PROTOCOL) {
+ printf("CnxnTrkr %s, timed-out TCP Connection: %p,",
+ ct->name, cd);
+ printf(" %s, pkts forwarded %"
+ PRIu64 ", pkts dropped %" PRIu64
+ ", drop%% %u.%u\n",
+ rte_ct_tcp_names[cd->state_used_for_timer],
+ cd->counters.packets_forwarded,
+ cd->counters.packets_dropped,
+ (uint32_t) (percent / 100),
+ (uint32_t) (percent % 100));
+ } else if (cd->protocol == UDP_PROTOCOL) {
+ printf("CnxnTrkr %s, Timed-out UDP Connection: %p,",
+ ct->name, cd);
+ printf(" %s, pkts forwarded %" PRIu64
+ ", pkts dropped %" PRIu64 ", drop%% %u.%u\n",
+ rte_ct_udp_names[cd->state_used_for_timer],
+ cd->counters.packets_forwarded,
+ cd->counters.packets_dropped,
+ (uint32_t) (percent / 100),
+ (uint32_t) (percent % 100));
+ }
+ }
+
+ success = rte_hash_del_key(ct->rhash, &cd->key);
+
+ if (success < 0) {
+ /* TODO: change to a log */
+ rte_ct_print_hashkey(cd->key);
+ }
+
+}
+
+struct rte_CT_counter_block *
+rte_ct_get_counter_address(struct rte_ct_cnxn_tracker *ct)
+{
+ return ct->counters;
+}
+
+int
+rte_ct_set_configuration_options(struct rte_ct_cnxn_tracker *ct,
+ char *name, char *value)
+{
+ /* check non-time values first */
+ int ival = atoi(value);
+
+ /* tcp_loose */
+ if (strcmp(name, "tcp_loose") == 0) {
+ ct->misc_options.tcp_loose = ival;
+ return 0;
+ }
+
+ /* tcp_be_liberal */
+ if (strcmp(name, "tcp_be_liberal") == 0) {
+ ct->misc_options.tcp_be_liberal = ival;
+ return 0;
+ }
+
+ /* tcp_max_retrans */
+ if (strcmp(name, "tcp_max_retrans") == 0) {
+ ct->misc_options.tcp_max_retrans = ival;
+ return 0;
+ }
+
+ uint64_t time_value = ival * ct->hertz;
+
+
+ /* configuration of timer values */
+
+ /* tcp_syn_sent */
+ if (strcmp(name, "tcp_syn_sent") == 0) {
+ if (time_value == 0)
+ return -1;
+ ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_SYN_SENT] =
+ time_value;
+ return 0;
+ }
+
+ /* tcp_syn_recv */
+ if (strcmp(name, "tcp_syn_recv") == 0) {
+ if (time_value == 0)
+ return -1;
+ ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_SYN_RECV] =
+ time_value;
+ return 0;
+ }
+
+ /* tcp_established */
+ if (strcmp(name, "tcp_established") == 0) {
+ if (time_value == 0)
+ return -1;
+ ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_ESTABLISHED] =
+ time_value;
+ return 0;
+ }
+
+ /* tcp_fin_wait */
+ if (strcmp(name, "tcp_fin_wait") == 0) {
+ if (time_value == 0)
+ return -1;
+ ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_FIN_WAIT] =
+ time_value;
+ return 0;
+ }
+
+ /* tcp_close_wait */
+ if (strcmp(name, "tcp_close_wait") == 0) {
+ if (time_value == 0)
+ return -1;
+ ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_CLOSE_WAIT] =
+ time_value;
+ return 0;
+ }
+
+ /* tcp_last_ack */
+ if (strcmp(name, "tcp_last_ack") == 0) {
+ if (time_value == 0)
+ return -1;
+ ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_LAST_ACK] =
+ time_value;
+ return 0;
+ }
+
+ /* tcp_time_wait */
+ if (strcmp(name, "tcp_time_wait") == 0) {
+ if (time_value == 0)
+ return -1;
+ ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_TIME_WAIT] =
+ time_value;
+ return 0;
+ }
+
+ /* tcp_close */
+ if (strcmp(name, "tcp_close") == 0) {
+ if (time_value == 0)
+ return -1;
+ ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_CLOSE] =
+ time_value;
+ return 0;
+ }
+
+ /* tcp_syn_sent_2 */
+ if (strcmp(name, "tcp_syn_sent_2") == 0) {
+ if (time_value == 0)
+ return -1;
+ ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_SYN_SENT_2] =
+ time_value;
+ return 0;
+ }
+
+ /* tcp_retrans */
+ if (strcmp(name, "tcp_retrans") == 0) {
+ if (time_value == 0)
+ return -1;
+ ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_RETRANS] =
+ time_value;
+ return 0;
+ }
+
+ /* tcp_unack */
+ if (strcmp(name, "tcp_unack") == 0) {
+ if (time_value == 0)
+ return -1;
+ ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_UNACK] =
+ time_value;
+ return 0;
+ }
+
+ /* udp_unreplied */
+ if (strcmp(name, "udp_unreplied") == 0) {
+ if (time_value == 0)
+ return -1;
+ ct->ct_timeout.udptimeout.udp_timeouts[RTE_CT_UDP_UNREPLIED] =
+ time_value;
+ return 0;
+ }
+
+ /* udp_replied */
+ if (strcmp(name, "udp_replied") == 0) {
+ if (time_value == 0)
+ return -1;
+ ct->ct_timeout.udptimeout.udp_timeouts[RTE_CT_UDP_REPLIED] =
+ time_value;
+ return 0;
+ }
+ return 1;
+}
+
+static void
+rte_ct_cnxn_tracker_batch_lookup_basic_type(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_mbuf **pkts,
+ uint64_t *pkts_mask,
+ uint64_t no_new_cnxn_mask,
+ uint64_t *reply_pkt_mask,
+ uint64_t *hijack_mask,
+ uint8_t ip_hdr_size_bytes)
+{
+ /* bitmap of packets left to process */
+ uint64_t pkts_to_process = *pkts_mask;
+ /* bitmap of valid packets to return */
+ uint8_t compacting_map[RTE_HASH_LOOKUP_BULK_MAX];
+ /* for pkt, key in originators direction? */
+ uint8_t key_orig_dir[RTE_HASH_LOOKUP_BULK_MAX];
+ uint32_t packets_for_lookup = 0;
+ int32_t positions[RTE_HASH_LOOKUP_BULK_MAX];
+ uint32_t i;
+ struct rte_ct_cnxn_data new_cnxn_data;
+
+ if (CNXN_TRX_DEBUG > 1) {
+ printf("Enter cnxn tracker %p", ct);
+ printf(" synproxy batch lookup with packet mask %p\n",
+ (void *)*pkts_mask);
+ }
+
+ rte_ct_forget_new_connections(ct);
+ *reply_pkt_mask = 0;
+ *hijack_mask = 0;
+
+ /*
+ * Use bulk lookup into hash table for performance reasons. Cannot have
+ * "empty slots" in the bulk lookup,so need to create a compacted table.
+ */
+
+ switch (ip_hdr_size_bytes) {
+ case IPv4_HEADER_SIZE:
+ for (; pkts_to_process;) {
+ uint8_t pos = (uint8_t) __builtin_ctzll(
+ pkts_to_process);
+ /* bitmask representing only this packet */
+ uint64_t pkt_mask = 1LLU << pos;
+ /* remove this packet from remaining list */
+ pkts_to_process &= ~pkt_mask;
+
+ struct rte_mbuf *pkt = pkts[pos];
+
+
+ /* TCP and UDP ports at same offset, just use TCP for
+ * offset calculation
+ */
+ struct tcp_hdr *thdr = (struct tcp_hdr *)
+ RTE_MBUF_METADATA_UINT32_PTR(pkt,
+ (IP_START + ip_hdr_size_bytes));
+ uint16_t src_port = rte_bswap16(thdr->src_port);
+ uint16_t dst_port = rte_bswap16(thdr->dst_port);
+
+ struct ipv4_hdr *ihdr = (struct ipv4_hdr *)
+ RTE_MBUF_METADATA_UINT32_PTR(pkt, IP_START);
+ uint8_t proto = ihdr->next_proto_id;
+
+ if (!(proto == TCP_PROTOCOL || proto == UDP_PROTOCOL)) {
+ /* only tracking TCP and UDP at this time */
+ continue;
+ }
+
+ /*
+ * Load the addresses and ports, and convert from Intel
+ * to network byte order. Strictly speaking, it is not
+ * necessary to do this conversion, as this data is only
+ * used to create a hash key.
+ */
+ uint32_t src_addr = rte_bswap32(ihdr->src_addr);
+ uint32_t dst_addr = rte_bswap32(ihdr->dst_addr);
+
+ if (CNXN_TRX_DEBUG > 2) {
+ if (CNXN_TRX_DEBUG > 4)
+ rte_ct_cnxn_print_pkt(pkt,
+ IP_VERSION_4);
+ }
+ /* need to create compacted table of pointers to pass
+ * to bulk lookup
+ */
+
+ compacting_map[packets_for_lookup] = pos;
+ key_orig_dir[packets_for_lookup] =
+ rte_ct_create_cnxn_hashkey(&src_addr, &dst_addr,
+ src_port, dst_port,
+ proto,
+ &ct->hash_keys
+ [packets_for_lookup][0],
+ IP_VERSION_4);
+ packets_for_lookup++;
+ }
+ break;
+ case IPv6_HEADER_SIZE:
+ for (; pkts_to_process;) {
+ uint8_t pos = (uint8_t) __builtin_ctzll(
+ pkts_to_process);
+ /* bitmask representing only this packet */
+ uint64_t pkt_mask = 1LLU << pos;
+ /* remove this packet from remaining list */
+ pkts_to_process &= ~pkt_mask;
+
+ struct rte_mbuf *pkt = pkts[pos];
+
+
+ void *ip_hdr = RTE_MBUF_METADATA_UINT32_PTR(pkt,
+ IP_START);
+
+ /* TCP and UDP ports at same offset, just use TCP for
+ * offset calculation
+ */
+ struct tcp_hdr *thdr = (struct tcp_hdr *)
+ RTE_MBUF_METADATA_UINT32_PTR(pkt,
+ (IP_START + ip_hdr_size_bytes));
+ uint16_t src_port = rte_bswap16(thdr->src_port);
+ uint16_t dst_port = rte_bswap16(thdr->dst_port);
+
+ struct ipv6_hdr *ihdr = (struct ipv6_hdr *)ip_hdr;
+ uint8_t proto = ihdr->proto;
+
+ if (!(proto == TCP_PROTOCOL || proto == UDP_PROTOCOL)) {
+ /* only tracking TCP and UDP at this time */
+ continue;
+ }
+
+ if (CNXN_TRX_DEBUG > 2) {
+ if (CNXN_TRX_DEBUG > 4)
+ rte_ct_cnxn_print_pkt(pkt,
+ IP_VERSION_6);
+ }
+
+ /* need to create compacted table of pointers to pass
+ * to bulk lookup
+ */
+
+ compacting_map[packets_for_lookup] = pos;
+ key_orig_dir[packets_for_lookup] =
+ rte_ct_create_cnxn_hashkey(
+ (uint32_t *) ihdr->src_addr,
+ (uint32_t *) ihdr->dst_addr,
+ src_port, dst_port,
+ proto,
+ &ct->hash_keys
+ [packets_for_lookup][0],
+ IP_VERSION_6);
+ packets_for_lookup++;
+ }
+ break;
+ default:
+ break;
+ }
+ if (unlikely(packets_for_lookup == 0))
+ return; /* no suitable packet for lookup */
+
+ /* Clear all the data to make sure no stack garbage is in it */
+ memset(&new_cnxn_data, 0, sizeof(struct rte_ct_cnxn_data));
+
+ /* lookup all tcp & udp packets in the connection table */
+
+ int lookup_result = rte_hash_lookup_bulk(ct->rhash,
+ (const void **)&ct->hash_key_ptrs,
+ packets_for_lookup, &positions[0]);
+
+ if (unlikely(lookup_result < 0)) {
+ /* TODO: change a log */
+ printf("Unexpected hash table problem, discarding all packets");
+ *pkts_mask = 0;
+ return; /* unknown error, just discard all packets */
+ }
+ for (i = 0; i < packets_for_lookup; i++) {
+ /* index into hash table entries */
+ int hash_table_entry = positions[i];
+ /* index into packet table of this packet */
+ uint8_t pkt_index = compacting_map[i];
+ /* bitmask representing only this packet */
+ uint64_t pkt_mask = 1LLU << pkt_index;
+ uint8_t key_is_client_order = key_orig_dir[i];
+ uint32_t *key = ct->hash_key_ptrs[pkt_index];
+ uint8_t protocol = *(key + 9);
+ struct rte_mbuf *packet = pkts[pkt_index];
+ int no_new_cnxn = (pkt_mask & no_new_cnxn_mask) != 0;
+
+ /* rte_ct_print_hashkey(key); */
+
+ if (protocol == TCP_PROTOCOL) {
+ enum rte_ct_packet_action tcp_pkt_action;
+
+ tcp_pkt_action = rte_ct_handle_tcp_lookup(ct, packet,
+ pkt_index, key_is_client_order,
+ key, hash_table_entry, no_new_cnxn,
+ ip_hdr_size_bytes);
+
+ switch (tcp_pkt_action) {
+
+ case RTE_CT_SEND_CLIENT_SYNACK:
+ case RTE_CT_SEND_SERVER_ACK:
+ /* altered packet or copy must be returned
+ * to originator
+ */
+ *reply_pkt_mask |= pkt_mask;
+ /* FALL-THROUGH */
+
+ case RTE_CT_SEND_SERVER_SYN:
+ case RTE_CT_FORWARD_PACKET:
+ break;
+
+ case RTE_CT_HIJACK:
+ *hijack_mask |= pkt_mask;
+ break;
+
+ default:
+ /* bad packet, clear mask to drop */
+ *pkts_mask ^= pkt_mask;
+ ct->counters->pkts_drop++;
+ break;
+ }
+ /* rte_ct_cnxn_print_pkt(pkts[pkt_index]); */
+
+ } else { /* UDP entry */
+
+ if (hash_table_entry >= 0) {
+ /*
+ * connection found for this packet. Check that
+ * this is a valid packet for connection
+ */
+
+ struct rte_ct_cnxn_data *entry =
+ &ct->hash_table_entries[hash_table_entry];
+
+ if (rte_ct_udp_packet
+ (ct, entry, pkts[pkt_index],
+ key_is_client_order)) {
+ entry->counters.packets_forwarded++;
+ ct->counters->pkts_forwarded++;
+ }
+ } else {
+ /*
+ * connection not found in bulk hash lookup,
+ * but might have been added in this batch
+ */
+
+ struct rte_ct_cnxn_data *recent_entry =
+ rte_ct_search_new_connections(ct, key);
+
+ if (recent_entry != NULL) {
+ if (rte_ct_udp_packet(ct, recent_entry,
+ pkts[pkt_index],
+ key_is_client_order)) {
+ recent_entry->counters.
+ packets_forwarded++;
+ ct->counters->pkts_forwarded++;
+ }
+ } else {
+ /* no existing connection, try to add
+ * new one
+ */
+
+ if (no_new_cnxn) {
+ /* new cnxn not allowed, clear
+ * mask to drop
+ */
+ *pkts_mask ^= pkt_mask;
+ ct->counters->pkts_drop++;
+ ct->counters->
+ pkts_drop_invalid_conn++;
+ continue;
+ }
+
+ if (rte_ct_udp_new_connection(ct,
+ &new_cnxn_data, pkts[pkt_index])) {
+ /* This packet creates a
+ * connection
+ */
+ int32_t position =
+ rte_hash_add_key(ct->
+ rhash, key);
+
+ if (position < 0)
+ continue;
+
+ struct rte_ct_cnxn_data
+ *new_hash_entry = &ct->
+ hash_table_entries[position];
+
+ /*
+ *update fields in new_cnxn_data
+ * not set by "new_connection"
+ */
+
+ memcpy(new_cnxn_data.key, key,
+ sizeof(new_cnxn_data.key));
+
+ new_cnxn_data.
+ key_is_client_order
+ = key_is_client_order;
+ new_cnxn_data.protocol =
+ UDP_PROTOCOL;
+ rte_cnxn_ip_type(
+ &new_cnxn_data.type,
+ packet);
+ rte_memcpy(new_hash_entry,
+ &new_cnxn_data,
+ sizeof(struct
+ rte_ct_cnxn_data));
+
+ new_hash_entry->counters.
+ packets_forwarded = 1;
+ ct->counters->pkts_forwarded++;
+ new_hash_entry->counters.
+ packets_dropped = 0;
+ ct->counters->pkts_drop = 0;
+ ct->counters->
+ current_active_sessions++;
+ ct->counters->
+ sessions_activated++;
+
+ new_hash_entry->
+ state_used_for_timer
+ = RTE_CT_UDP_NONE;
+ rte_ct_set_cnxn_timer_for_udp(
+ ct,
+ new_hash_entry,
+ RTE_CT_UDP_UNREPLIED);
+
+ rte_ct_remember_new_connection(
+ ct,
+ new_hash_entry);
+ }
+ }
+
+ }
+
+ } /* UDP */
+ } /* packets_for_lookup */
+
+ if (CNXN_TRX_DEBUG > 1) {
+ printf("Exit cnxn tracker synproxy batch lookup with");
+ printf(" packet mask %p\n", (void *)*pkts_mask);
+ }
+}
diff --git a/common/VIL/conntrack/rte_cnxn_tracking.h b/common/VIL/conntrack/rte_cnxn_tracking.h
new file mode 100644
index 00000000..1efb60ef
--- /dev/null
+++ b/common/VIL/conntrack/rte_cnxn_tracking.h
@@ -0,0 +1,413 @@
+/*
+// Copyright (c) 2017 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#ifndef _CNXN_TRACKING_H
+#define _CNXN_TRACKING_H
+
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <stdbool.h>
+
+
+#include <rte_hash.h>
+#include <rte_ether.h>
+
+#include "rte_ct_tcp.h"
+
+
+/**
+ * @file
+ * Connection Tracker
+ *
+ * A Connection Tracker tracks the status of TCP connections. By remembering
+ * keys pieces of data, such as connection state, sequence numbers seen, and
+ * transmission window size, it can determine if a give packet is valid, or
+ * invalid and should be discarded.
+ *
+ * The current interface is designed for use with ip_pipeline code.
+ */
+
+/*
+ * Opaque type definition for an instance of the connection tracker. It is
+ * possible to have multiple instances of connection tracking running, on one
+ * or more cores. All traffic for a TCP connection must be run through the same
+ * rte_ct_cnxn_tracker.
+ */
+
+/*
+ * The rte_ct_cnxn_tracker is an instance of a connection tracker.
+ */
+struct rte_ct_cnxn_tracker __rte_cache_aligned;
+
+extern int rte_CT_hi_counter_block_in_use;
+
+struct rte_CT_counter_block {
+ /* as long as a counter doesn't cross cache line, writes are atomic */
+ uint64_t current_active_sessions;
+ uint64_t sessions_activated; /* a SYN packet seen, or UDP */
+ /* a SYN packet re-opening a connection */
+ uint64_t sessions_reactivated;
+ /* SYN, SYN/ACK, ACK established a connection */
+ uint64_t sessions_established;
+ uint64_t sessions_closed;
+ uint64_t sessions_timedout;
+ uint64_t pkts_forwarded;
+ uint64_t pkts_drop;
+ uint64_t pkts_drop_invalid_conn;
+ uint64_t pkts_drop_invalid_state;
+ uint64_t pkts_drop_invalid_rst;
+ uint64_t pkts_drop_outof_window;
+} __rte_cache_aligned;
+
+struct rte_synproxy_helper {
+ uint64_t reply_pkt_mask;
+ uint64_t hijack_mask;
+ struct rte_mbuf **buffered_pkts_to_forward;
+ uint8_t num_buffered_pkts_to_forward;
+};
+
+struct rte_CT_helper {
+ uint64_t no_new_cnxn_mask;
+ uint64_t reply_pkt_mask;
+ uint64_t hijack_mask;
+ struct rte_mbuf **buffered_pkts_to_forward;
+ uint8_t num_buffered_pkts_to_forward;
+};
+
+#define MAX_CT_INSTANCES 24 /* max number fw threads, actual usually less*/
+
+extern struct rte_CT_counter_block rte_CT_counter_table[MAX_CT_INSTANCES]
+__rte_cache_aligned;
+
+/**
+ * Run the connection tracking for 1 to 64 packets.
+ *
+ * @param ct
+ * Instance of cnxn tracker to use.
+ * @param pkts
+ * Table of pointers to mbufs containing packets for connection tracking.
+ * Any packets which are not TCP/IP will be ignnored. A maximum of 64
+ * packets may be processed in a call.
+ * @param pkts_mask
+ * Bit map representing which table elements of "pkts" are valid mbuf
+ * pointers, where the least-significant bit of the map represents table
+ * element 0. There must be at least as many elements in the table as the
+ * highest order bit in the map. Valid table entries with a corresponding
+ * 0 in the bitmap will be ignored.
+ * @param ct_helper
+ * Pointer to rte_CT_helper structure which hold the connection tracker
+ * tracking information.
+ *
+ * @return
+ * Returns an updated bitmap that reflects which packets are valid and should
+ * be forwarded.
+ * Any bits representing invalid TCP packets are cleared.
+ * Any packets which are not TCP/IP are considered valid for this purpose.
+ */
+
+uint64_t
+rte_ct_cnxn_tracker_batch_lookup(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_mbuf **pkts,
+ uint64_t pkts_mask,
+ struct rte_CT_helper *ct_helper);
+
+void
+rte_ct_cnxn_tracker_batch_lookup_type(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_mbuf **pkts,
+ uint64_t *pkts_mask,
+ struct rte_CT_helper *ct_helper,
+ uint8_t ip_hdr_size_bytes);
+
+
+/**
+ * Run the connection tracking for 1 to 64 packets.
+ *
+ * @param ct
+ * Instance of cnxn tracker to use.
+ * @param pkts
+ * Table of pointers to mbufs containing packets for connection tracking.
+ * Any packets which are not TCP/IP will be ignnored. A maximum of 64
+ * packets may be processed in a call.
+ * @param pkts_mask
+ * Bit map representing which table elements of "pkts" are valid mbuf
+ * pointers, where the least-significant bit of the map represents table
+ * element 0. There must be at least as many elements in the table as the
+ * highest order bit in the map. Valid table entries with a corresponding
+ * 0 in the bitmap will be ignored.
+ * @param no_new_cnxn_mask
+ * Bit map representing which table elements of "pkts" are should be
+ * considered valid packets only if there is already an existing connection
+ * for this packet (i.e. same ip addresses, tcp/udp ports, and protocol).
+ * This mask must be a subset of "pkts_mask" (including all or none), and
+ * follows the same format. A 1 means must be existing connection, a 0 means
+ * a new connection setup (e.g. TCP SYN packet) is allowed, or this entry
+ * corresponds to a 0 in pkts_mask.
+ *
+ * @return
+ * Returns an updated bitmap that reflects which packets are valid and should
+ * be forwarded.
+ * Any bits representing invalid TCP packets are cleared.
+ * Any packets which are not TCP/IP are considered valid for this purpose.
+ */
+
+uint64_t
+rte_ct_cnxn_tracker_batch_lookup_with_new_cnxn_control(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_mbuf **pkts,
+ uint64_t pkts_mask,
+ uint64_t no_new_cnxn_mask);
+
+
+/**
+* Run the connection tracking for 1 to 64 packets, with support for
+* synproxy.
+*
+* @param ct
+* Instance of cnxn tracker to use.
+* @param pkts
+* Table of pointers to mbufs containing packets for connection tracking.
+* Any packets which are not TCP/IP will be ignnored. A maximum of 64
+* packets may be processed in a call.
+* @param pkts_mask
+* Bit map representing which table elements of "pkts" are valid mbuf pointers,
+* where the least-significant bit of the map represents table element 0. There
+* must be at least as many elements in the table as the highest order bit in
+* the map. Valid table entries with a corresponding 0 in the bitmap will be
+* ignored.
+* @param reply_pkt_mask
+* Bit map representing which table elements of "pkts" have been altered to
+* reply messages for synproxy. These packets, or copies of them must be sent
+* back to the originator. IP and TCP headers have been altered, ethernet
+* header has not
+* @return
+* Returns an updated bitmap that reflects which packets are valid and should
+* be forwarded.Any bits representing invalid TCP packets are cleared.
+* Any packets which are not TCP/IP are considered valid for this purpose.
+*/
+
+
+uint64_t
+rte_ct_cnxn_tracker_batch_lookup_with_synproxy(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_mbuf **pkts,
+ uint64_t pkts_mask,
+ struct rte_synproxy_helper *sp_helper);
+
+
+
+
+
+/**
+ * Synproxy might need to buffer client-side packets while the
+ * server-side of the proxy is still being set up. The packets
+ * are released when the server-side connection is complete.
+ * This routine is used to retrieve those packets. Packets are
+ * also released in a similar manner if there is a timeout
+ * during a synproxy setup. This routine should be called before
+ * immediately before any timeout handling, to get the list of
+ * packets (if any) to forward, and again immediately after timeout
+ * handling to get the list of packets (if any) to delete.
+ * Calling this routine removes the packets from synproxy.
+ *
+ * @param new_cnxn_tracker
+ * The connection tracker from which to retrieve the packets
+ *
+ * @return
+ * a linked list of packets to process, in order. The list is
+ * connected via a pointer stored in the mbuf in the offset
+ * given in the "pointer_offset" parameter to the routine:
+ * "rte_ct_initialize_cnxn_tracker_with_synproxy".
+ * If not packets currently available, returns NULL.
+ */
+
+struct rte_mbuf *
+rte_ct_get_buffered_synproxy_packets(struct rte_ct_cnxn_tracker *ct);
+
+
+/**
+ * Initialize a connection tracker instance before use.
+ *
+ * @param new_cnxn_tracker
+ * The connection tracker to initialize, allocated by the user.
+ * @param max_connection_count
+ * Maximum number of simultaneous connections supported.
+ * @param name
+ * A name to give to this connection tracker, for debug purposes
+ *
+ * @return
+ * - 0 if successful
+ * - negative if unsuccesful
+ */
+
+int
+rte_ct_initialize_cnxn_tracker_with_synproxy(
+ struct rte_ct_cnxn_tracker *new_cnxn_tracker,
+ uint32_t max_connection_count,
+ char *name,
+ uint16_t pointer_offset);
+
+/**
+ * Initialize a connection tracker instance before use with synproxy support.
+ *
+ * @param new_cnxn_tracker
+ * The connection tracker to initialize, allocated by the user.
+ * @param max_connection_count
+ * Maximum number of simultaneous connections supported.
+ * @param name
+ * A name to give to this connection tracker, for debug purposes
+ * @param pointer_offset
+ * An offset into the mbuf where the connection tracker can store two pointers.
+ *
+ * @return
+ * - 0 if successful
+ * - negative if unsuccesful
+ */
+
+int
+rte_ct_initialize_cnxn_tracker(
+ struct rte_ct_cnxn_tracker *new_cnxn_tracker,
+ uint32_t max_connection_count,
+ char *name);
+
+
+/**
+ * Free resources allocated by earlier call to rte_ct_initialize_cnxn_tracker()
+ *
+ * @param old_cnxn_tracker
+ * The connection tracker previously initialized.
+ *
+ * @return
+ * - 0 if successful
+ * - < 0 if unsuccesful
+ */
+
+int
+rte_ct_free_cnxn_tracker_resources(
+ struct rte_ct_cnxn_tracker *old_cnxn_tracker);
+
+
+/**
+ * Get size of opaque type rte_ct_cnxn_tracker in order to allocate an instance.
+ *
+ * @return
+ * Size in bytes of rte_ct_cnxn_tracker type
+ */
+
+int
+rte_ct_get_cnxn_tracker_size(void);
+
+/**
+ * Get address of counters kept by this instance.
+ *
+ * @param ct
+ * Instance of cnxn tracker.
+ *
+ */
+
+struct rte_CT_counter_block*
+rte_ct_get_counter_address(struct rte_ct_cnxn_tracker *ct);
+
+
+/**
+ * Process a configuration option supported in the config file.
+ * If a valid name/value pair, update the cnxn tracker.
+ *
+ * @param ct
+ * Instance of cnxn tracker.
+ *
+ * @param name
+ * Name of configuration option.
+ *
+ * @param value
+ * Value of configuration option.
+ *
+ * @return
+ * - 0 if successful
+ * - < 0 if unsuccesful
+ */
+
+int
+rte_ct_set_configuration_options(
+ struct rte_ct_cnxn_tracker *ct,
+ char *name,
+ char *value);
+
+/**
+ * Check for expired connection tracking timers, and delete any expired
+ * connections. This routine must be called in the loop that processes packets,
+ * to ensure that timeouts are handled synchronously with packet processing.
+ * More frequent calls means more accurate timing but more overhead.
+ *
+ * @param ct
+ * Instance of cnxn tracker to check timers.
+ *
+ */
+
+void
+rte_ct_handle_expired_timers(struct rte_ct_cnxn_tracker *ct);
+
+
+int
+rte_ct_get_IP_hdr_size(struct rte_mbuf *pkt);
+
+/**
+* Enable synproxy for this connection tracker.
+*
+* @param ct
+* Instance of cnxn tracker to enable.
+*
+*/
+
+void
+rte_ct_enable_synproxy(struct rte_ct_cnxn_tracker *ct);
+
+/**
+* Disable synproxy for this connection tracker.
+*
+* @param ct
+* Instance of cnxn tracker to disable.
+*
+*/
+
+void
+rte_ct_disable_synproxy(struct rte_ct_cnxn_tracker *ct);
+int
+rte_ct_initialize_default_timeouts(
+ struct rte_ct_cnxn_tracker *new_cnxn_tracker);
+
+uint8_t
+rte_ct_create_cnxn_hashkey(
+ uint32_t *src_addr,
+ uint32_t *dst_addr,
+ uint16_t src_port,
+ uint16_t dst_port,
+ uint8_t proto,
+ uint32_t *key,
+ uint8_t type);
+
+/* To get timer core id from CGNAPT timer thread*/
+#ifdef CT_CGNAT
+extern uint32_t get_timer_core_id(void);
+uint64_t cgnapt_ct_process(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_mbuf **pkts,
+ uint64_t pkts_mask,
+ struct rte_CT_helper *ct_helper);
+#endif
+#endif
diff --git a/common/VIL/conntrack/rte_ct_synproxy.c b/common/VIL/conntrack/rte_ct_synproxy.c
new file mode 100644
index 00000000..967596d1
--- /dev/null
+++ b/common/VIL/conntrack/rte_ct_synproxy.c
@@ -0,0 +1,873 @@
+/*
+// Copyright (c) 2017 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <rte_common.h>
+#include <rte_malloc.h>
+#include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_udp.h>
+#include <rte_icmp.h>
+#include <rte_byteorder.h>
+#include <rte_cycles.h>
+
+#include "rte_ct_tcp.h"
+
+
+/*
+ * OVERVIEW:
+ * This module will behave as a proxy between an initiator (external client)
+ * and listener (internal server).
+ * (1) Proxy receives SYN from initiator, replies with spoofed SYN-ACK message
+ * No packet is sent to the lister at this time.
+ * (2) Proxy receives ACK from the initiator, so the connection request is
+ * considred valid. Proxy sends a spoofed SYN message to the listener.
+ * (3) Proxy receives SYN-ACK message from listener. Proxy replies to listener
+ * with a spoofed ACK message. The connection is considered established.
+ * (4) Traffic is exchanged between initiator and listener. Sequence and
+ * ack numbers translated appropriately by proxy.
+ */
+
+/*
+ * DETAILS, when SynProxy on:
+ * (1) receive initial SYN from client
+ * call CT, all new connections assigned spoofed (random) SEQ number
+ * packet re-purposed as SYN-ACK back to client with spoofed SEQ
+ * -> change ethernet, IP, and TCP headers, put on appropriate output ring
+ * (2) receive ACK packet from client
+ * connection request now considered valid
+ * packet re-purposed as SYN to server, using SEQ from original SYN
+ * -> change TCP header, put on output ring originally targetted
+ * (3) receive SYN-ACK packet from server
+ * connection now ESTABLISHED
+ * compute SEQ difference between spoofed SEQ and real server SEQ
+ * packet re-purposed as ACK to server
+ * -> change ethernet, IP, and TCP headers, put on appropriate output ring
+ * (4) all further packets flow normally, except SEQ and ACK numbers must be
+ * modified by SEQ diff (SEQ in server->client direction, ACK and SACK in
+ * client->server direction)
+ *
+ */
+
+#define META_DATA_OFFSET 128
+#define ETHERNET_START (META_DATA_OFFSET + RTE_PKTMBUF_HEADROOM)
+#define ETH_HDR_SIZE 14
+#define IP_START (ETHERNET_START + ETH_HDR_SIZE)
+#define PROTOCOL_START (IP_START + 9)
+#define IP_V4_HEADER_SIZE 20
+#define IP_V6_HEADER_SIZE 40
+#define TCP_START (IP_START + IP_V4_HEADER_SIZE)
+#define TCP_MIN_HDR_SIZE 20
+
+#define RTE_TCP_PROTO_ID 6
+#define RTE_SP_DEFAULT_TTL 64
+
+#define RTE_SYNPROXY_MAX_SPOOFED_PKTS 64
+
+#define RTE_TCP_SYN 0x02
+#define RTE_TCP_ACK 0x10
+#define RTE_TCP_SYN_ACK (RTE_TCP_SYN | RTE_TCP_ACK)
+
+#define RTE_SP_DEFAULT_WINDOW 29200
+#define RTE_CT_DEBUG_SPOOFED_SEQ 0
+#define RTE_DPDK_IS_16_4 0
+
+#define IP_VERSION_4 4
+#define IP_VERSION_6 6
+
+
+/* default TCP options */
+/* TODO: need to set in config file */
+
+struct rte_synproxy_options default_ipv4_synproxy_options = {
+ .options = RTE_SP_OPTIONS_MSS |
+ RTE_SP_OPTIONS_SACK_PERM |
+ RTE_SP_OPTIONS_WINDOW_SCALE,
+ .mss = 1460,
+ .window_scale = 7,
+ .initial_window = RTE_SP_DEFAULT_WINDOW
+};
+
+
+struct rte_synproxy_options default_ipv6_synproxy_options = {
+ .options = RTE_SP_OPTIONS_MSS |
+ RTE_SP_OPTIONS_SACK_PERM |
+ RTE_SP_OPTIONS_WINDOW_SCALE,
+ .mss = 1440,
+ .window_scale = 7,
+ .initial_window = RTE_SP_DEFAULT_WINDOW
+};
+
+/* IP/TCP header print for debugging */
+static __rte_unused void
+rte_ct_synproxy_print_pkt_info(struct rte_mbuf *pkt)
+{
+ struct ipv4_hdr *ihdr4 = (struct ipv4_hdr *)
+ RTE_MBUF_METADATA_UINT32_PTR(pkt, IP_START);
+ __rte_unused struct tcp_hdr *thdr = (struct tcp_hdr *)
+ RTE_MBUF_METADATA_UINT32_PTR(pkt, TCP_START);
+ uint32_t packet_length = rte_pktmbuf_pkt_len(pkt);
+
+ printf("\npacket length %u, ip length %u\n", packet_length,
+ rte_bswap16(ihdr4->total_length));
+ rte_pktmbuf_dump(stdout, pkt, 80);
+}
+
+static inline void
+rte_sp_incremental_tcp_chksum_update_32(
+ uint32_t num_before, /* in Intel order, not network order */
+ uint32_t num_after, /* in Intel order, not network order */
+
+ uint16_t *chksum) /* network order, e.g. pointer into header */
+{
+ uint32_t sum;
+
+ sum = ~rte_bswap16(*chksum) & 0xffff;
+ num_before = ~num_before;
+ sum += (num_before >> 16) + (num_before & 0xffff);
+ sum += (num_after >> 16) + (num_after & 0xffff);
+ sum = (sum >> 16) + (sum & 0xffff);
+ sum += (sum >> 16);
+ *chksum = rte_bswap16(~sum & 0xffff);
+}
+
+
+
+static inline uint32_t
+rte_sp_get_random_seq_number(void)
+{
+ return rte_get_tsc_cycles(); /* low 32 bits of timestamp*/
+}
+
+
+static int8_t rte_ct_ipversion(void *i_hdr)
+{
+ uint8_t *ihdr = (uint8_t *)i_hdr;
+ int8_t hdr_chk = *ihdr;
+
+ hdr_chk = hdr_chk >> 4;
+ if (hdr_chk == IP_VERSION_4 || hdr_chk == IP_VERSION_6)
+ return hdr_chk;
+ else
+ return -1;
+}
+
+static inline void
+rte_synproxy_adjust_pkt_length(struct rte_mbuf *pkt)
+{
+ uint16_t pkt_length = 0;
+ int ip_hdr_size_bytes = rte_ct_get_IP_hdr_size(pkt);
+ void *iphdr = RTE_MBUF_METADATA_UINT32_PTR(pkt, IP_START);
+
+ if (ip_hdr_size_bytes == IP_V4_HEADER_SIZE) {
+ struct ipv4_hdr *ihdr4 = (struct ipv4_hdr *)iphdr;
+
+ pkt_length = rte_bswap16(ihdr4->total_length) + ETH_HDR_SIZE;
+ } else if (ip_hdr_size_bytes == IP_V6_HEADER_SIZE) {
+ struct ipv6_hdr *ihdr6 = (struct ipv6_hdr *)iphdr;
+
+ pkt_length = rte_bswap16(ihdr6->payload_len) +
+ IP_V6_HEADER_SIZE + ETH_HDR_SIZE;
+ }
+ uint16_t mbuf_pkt_length = rte_pktmbuf_pkt_len(pkt);
+
+ if (pkt_length == mbuf_pkt_length)
+ return;
+
+ if (pkt_length < mbuf_pkt_length) {
+ rte_pktmbuf_trim(pkt, mbuf_pkt_length - pkt_length);
+ return;
+ }
+
+ /* pkt_length > mbuf_pkt_length */
+ rte_pktmbuf_append(pkt, pkt_length - mbuf_pkt_length);
+}
+
+static void
+rte_synproxy_build_ipv4_header(
+ struct ipv4_hdr *hdr4,
+ uint32_t src_addr,
+ uint32_t dst_addr,
+ uint16_t tcp_length)
+{
+ /* TODO: consider interface re-work, too many rte_bswapxx */
+ /* options are not supported, so header size is fixed */
+ hdr4->version_ihl = 0x45;
+ hdr4->type_of_service = 0;
+ hdr4->total_length = rte_bswap16(tcp_length + IP_V4_HEADER_SIZE);
+ hdr4->packet_id = 0;
+ /* set Don't fragment bit, Intel order */
+ hdr4->fragment_offset = 0x0040;
+ hdr4->time_to_live = RTE_SP_DEFAULT_TTL;
+ hdr4->next_proto_id = RTE_TCP_PROTO_ID;
+ /* checksum calculated later */
+ hdr4->src_addr = rte_bswap32(src_addr);
+ hdr4->dst_addr = rte_bswap32(dst_addr);
+}
+
+
+static void
+rte_synproxy_build_ipv6_header(
+ struct ipv6_hdr *hdr6,
+ uint8_t *src_addr,
+ uint8_t *dst_addr,
+ uint16_t tcp_length)
+{
+ /* TODO: consider interface re-work, too many rte_bswapxx */
+ /* options are not supported, so header size is fixed */
+ uint8_t temp_src[16];
+ uint8_t temp_dst[16];
+
+ hdr6->vtc_flow = 0x60; /* Intel Order */
+ hdr6->payload_len = rte_bswap16(tcp_length);
+ hdr6->proto = RTE_TCP_PROTO_ID;
+ hdr6->hop_limits = RTE_SP_DEFAULT_TTL;
+ /* checksum calculated later */
+
+ /* must copy to temps to avoid overwriting */
+ rte_mov16(temp_src, src_addr);
+ rte_mov16(temp_dst, dst_addr);
+ rte_mov16(hdr6->src_addr, temp_src);
+ rte_mov16(hdr6->dst_addr, temp_dst);
+}
+
+/* add options specified in t_opts to TCP header in packet. */
+
+static uint16_t
+rte_sp_add_tcp_options(struct tcp_hdr *thdr,
+ const struct rte_synproxy_options *t_opts)
+{
+ uint32_t *options_ptr = (uint32_t *)(thdr + 1);
+ uint32_t *saved_ptr = options_ptr;
+ uint8_t options = t_opts->options;
+ uint32_t option_bytes; /* options built in groups of 4 bytes */
+
+ if (options & RTE_SP_OPTIONS_MSS) {
+ option_bytes = (RTE_CT_TCPOPT_MSS << 24) |
+ (RTE_CT_TCPOLEN_MSS << 16) | t_opts->mss;
+ *options_ptr++ = rte_bswap32(option_bytes);
+ }
+
+ if (options & RTE_SP_OPTIONS_TIMESTAMP) {
+ /* if both timestamp and sack permitted options,
+ * pack together
+ */
+ if (options & RTE_SP_OPTIONS_SACK_PERM)
+ option_bytes = (RTE_CT_TCPOPT_SACK_PERM << 24) |
+ (RTE_CT_TCPOLEN_SACK_PERM << 16);
+ else
+ option_bytes = (RTE_CT_TCPOPT_NOP << 24) |
+ (RTE_CT_TCPOPT_NOP << 16);
+
+ option_bytes |= (RTE_CT_TCPOPT_TIMESTAMP << 8) |
+ RTE_CT_TCPOLEN_TIMESTAMP;
+ *options_ptr++ = rte_bswap32(option_bytes);
+ *options_ptr++ = rte_bswap32(t_opts->ts_val);
+ *options_ptr++ = rte_bswap32(t_opts->ts_echo_reply);
+ } else if (options & RTE_SP_OPTIONS_SACK_PERM) {
+ option_bytes = (RTE_CT_TCPOPT_NOP << 24) |
+ (RTE_CT_TCPOPT_NOP << 16) |
+ (RTE_CT_TCPOPT_SACK_PERM << 8) |
+ RTE_CT_TCPOLEN_SACK_PERM;
+ *options_ptr++ = rte_bswap32(option_bytes);
+ }
+
+ if (options & RTE_SP_OPTIONS_WINDOW_SCALE) {
+ option_bytes = (RTE_CT_TCPOPT_NOP << 24) |
+ (RTE_CT_TCPOPT_WINDOW << 16) |
+ (RTE_CT_TCPOLEN_WINDOW << 8) |
+ t_opts->window_scale;
+ *options_ptr++ = rte_bswap32(option_bytes);
+ }
+
+ /* compute the data offset field, which is size of total
+ * TCP header in 32 bit words
+ */
+ /* TODO: diff from options ptr to thdr */
+ uint16_t data_offset_bytes = (uint16_t)RTE_PTR_DIFF(options_ptr,
+ saved_ptr) + sizeof(struct tcp_hdr);
+ thdr->data_off = (data_offset_bytes >> 2) << 4;
+
+ return data_offset_bytes;
+}
+
+/* Build a TCP header.
+ * Note that the the tcp_hdr must be in the appropriate location
+ * in an mbuf
+ * TODO: consider interface re-work, too many rte_bswapxx
+ */
+static inline uint16_t
+rte_synproxy_build_tcp_header(
+ __rte_unused struct rte_mbuf *old_pkt,
+ struct tcp_hdr *t_hdr,
+ uint16_t src_port,
+ uint16_t dst_port,
+ uint32_t seq,
+ uint32_t ack,
+ uint8_t flags,
+ const struct rte_synproxy_options *t_opts,
+ uint8_t add_options)
+{
+ t_hdr->src_port = rte_bswap16(src_port);
+ t_hdr->dst_port = rte_bswap16(dst_port);
+ t_hdr->sent_seq = rte_bswap32(seq);
+ t_hdr->recv_ack = rte_bswap32(ack);
+
+ t_hdr->tcp_flags = flags;
+ t_hdr->rx_win = t_opts->initial_window;
+ /* checksum calculated later */
+ t_hdr->tcp_urp = 0;
+
+ /* add tcp header options, if applicable */
+
+ uint16_t new_tcp_hdr_size = TCP_MIN_HDR_SIZE;
+
+ if (add_options)
+ new_tcp_hdr_size = rte_sp_add_tcp_options(t_hdr, t_opts);
+ else
+ t_hdr->data_off = (TCP_MIN_HDR_SIZE >> 2) << 4;
+
+ return new_tcp_hdr_size;
+}
+
+static void
+rte_synproxy_compute_checksums(void *i_hdr, struct tcp_hdr *t_hdr)
+{
+ /*
+ * calculate IP and TCP checksums. Note that both checksum
+ * routines requirehecksum fields to be set to zero,
+ * and the the checksum is in the correct
+ * byte order, so no rte_bswap16 is required.
+ */
+
+ /* TODO: look into h/w computation of checksums */
+
+ int8_t hdr_chk = rte_ct_ipversion(i_hdr);
+
+ t_hdr->cksum = 0;
+
+ if (hdr_chk == IP_VERSION_4) {
+ struct ipv4_hdr *i4_hdr = (struct ipv4_hdr *)i_hdr;
+
+ i4_hdr->hdr_checksum = 0;
+ t_hdr->cksum = rte_ipv4_udptcp_cksum(i4_hdr, t_hdr);
+ i4_hdr->hdr_checksum = rte_ipv4_cksum(i4_hdr);
+ } else if (hdr_chk == IP_VERSION_6) {
+ struct ipv6_hdr *i6_hdr = (struct ipv6_hdr *)i_hdr;
+
+ t_hdr->cksum = rte_ipv6_udptcp_cksum(i6_hdr, t_hdr);
+ }
+}
+
+
+
+/*
+ * Building new packet headers:
+ * For IPv4 and IPv6 headers, no options and no fragmentation are supported.
+ * Header size is fixed.
+ * TCP header will (likely) have options, so header size is not fixed.
+ * TCP header will be built first, and size used in IP packet size calculation.
+ */
+void
+rte_sp_cvt_to_spoofed_client_synack(struct rte_ct_cnxn_data *cd,
+ struct rte_mbuf *old_pkt)
+{
+ /* old packet is syn from client. Change to a (spoofed)
+ * SYN-ACK to send back
+ */
+
+ int ip_hdr_size_bytes = rte_ct_get_IP_hdr_size(old_pkt);
+ void *iphdr = RTE_MBUF_METADATA_UINT32_PTR(old_pkt, IP_START);
+ struct tcp_hdr *thdr = (struct tcp_hdr *)
+ RTE_MBUF_METADATA_UINT32_PTR(old_pkt, IP_START +
+ ip_hdr_size_bytes);
+ uint16_t tcp_header_size;
+
+ /* get a spoofed sequence number and save in the connection data */
+ uint32_t new_seq = rte_sp_get_random_seq_number();
+
+ if (RTE_CT_DEBUG_SPOOFED_SEQ)
+ new_seq = 10; /* something simple to aid debugging */
+
+ cd->ct_protocol.synproxy_data.original_spoofed_seq = new_seq;
+
+ /* build the TCP header, including reversing the port numbers. */
+ tcp_header_size = rte_synproxy_build_tcp_header(old_pkt, thdr,
+ rte_bswap16(thdr->dst_port),
+ rte_bswap16(thdr->src_port),
+ new_seq, rte_bswap32(thdr->sent_seq) + 1,
+ RTE_TCP_SYN_ACK,
+ ip_hdr_size_bytes == IP_V4_HEADER_SIZE ?
+ &default_ipv4_synproxy_options :
+ &default_ipv6_synproxy_options, 1);
+
+ /* reverse the source and destination addresses in the IP hdr */
+ if (ip_hdr_size_bytes == IP_V4_HEADER_SIZE) {
+ struct ipv4_hdr *ihdr4 = (struct ipv4_hdr *)iphdr;
+
+ rte_synproxy_build_ipv4_header(ihdr4,
+ rte_bswap32(ihdr4->dst_addr),
+ rte_bswap32(ihdr4->src_addr), tcp_header_size);
+
+ } else if (ip_hdr_size_bytes == IP_V6_HEADER_SIZE) {
+ struct ipv6_hdr *ihdr6 = (struct ipv6_hdr *)iphdr;
+
+ rte_synproxy_build_ipv6_header(ihdr6,
+ (uint8_t *)ihdr6->dst_addr,
+ (uint8_t *)ihdr6->src_addr, tcp_header_size);
+ }
+ rte_synproxy_adjust_pkt_length(old_pkt);
+ /* compute checksums */
+ rte_synproxy_compute_checksums(iphdr, thdr);
+
+}
+
+
+void
+rte_sp_cvt_to_spoofed_server_syn(struct rte_ct_cnxn_data *cd,
+ struct rte_mbuf *old_pkt)
+{
+ /* old packet is ACK from client. Change to (spoofed)
+ * SYN to send to server
+ */
+
+ int ip_hdr_size_bytes = rte_ct_get_IP_hdr_size(old_pkt);
+ void *iphdr = RTE_MBUF_METADATA_UINT32_PTR(old_pkt, IP_START);
+ struct tcp_hdr *thdr = (struct tcp_hdr *)
+ RTE_MBUF_METADATA_UINT32_PTR(old_pkt, IP_START
+ + ip_hdr_size_bytes);
+ uint16_t tcp_header_size;
+
+ tcp_header_size = rte_synproxy_build_tcp_header(old_pkt, thdr,
+ rte_bswap16(thdr->src_port),
+ rte_bswap16(thdr->dst_port),
+ rte_bswap32(thdr->sent_seq) - 1, 0,
+ RTE_TCP_SYN,
+ &cd->ct_protocol.synproxy_data.cnxn_options, 1);
+
+ if (ip_hdr_size_bytes == IP_V4_HEADER_SIZE) {
+ struct ipv4_hdr *ihdr4 = (struct ipv4_hdr *)iphdr;
+
+ rte_synproxy_build_ipv4_header(ihdr4,
+ rte_bswap32(ihdr4->src_addr),
+ rte_bswap32(ihdr4->dst_addr), tcp_header_size);
+ } else if (ip_hdr_size_bytes == IP_V6_HEADER_SIZE) {
+ struct ipv6_hdr *ihdr6 = (struct ipv6_hdr *)iphdr;
+
+ rte_synproxy_build_ipv6_header(ihdr6,
+ (uint8_t *)ihdr6->src_addr,
+ (uint8_t *)ihdr6->dst_addr, tcp_header_size);
+ }
+
+ rte_synproxy_adjust_pkt_length(old_pkt);
+ /* compute checksums */
+ rte_synproxy_compute_checksums(iphdr, thdr);
+
+}
+
+void
+rte_sp_cvt_to_spoofed_server_ack(struct rte_ct_cnxn_data *cd,
+ struct rte_mbuf *old_pkt)
+{
+ /* old packet is SYN-ACK from server. Change to spoofed ACK and
+ * send back to server
+ */
+
+ int ip_hdr_size_bytes = rte_ct_get_IP_hdr_size(old_pkt);
+ void *iphdr = RTE_MBUF_METADATA_UINT32_PTR(old_pkt, IP_START);
+ struct tcp_hdr *thdr = (struct tcp_hdr *)
+ RTE_MBUF_METADATA_UINT32_PTR(old_pkt, IP_START +
+ ip_hdr_size_bytes);
+
+ /* read real seq out of SYN-ACK from server, and save the delta from
+ * the spoofed one
+ */
+ uint32_t real_seq = rte_bswap32(thdr->sent_seq);
+ uint16_t tcp_header_size;
+
+ cd->ct_protocol.synproxy_data.seq_diff =
+ real_seq - cd->ct_protocol.synproxy_data.original_spoofed_seq;
+
+ /* reverse the source and destination addresses */
+ tcp_header_size = rte_synproxy_build_tcp_header(old_pkt, thdr,
+ rte_bswap16(thdr->dst_port),
+ rte_bswap16(thdr->src_port),
+ rte_bswap32(thdr->recv_ack),
+ rte_bswap32(thdr->sent_seq) + 1, RTE_TCP_ACK,
+ &cd->ct_protocol.synproxy_data.cnxn_options, 0);
+
+ /* reverse the source and destination addresses in the IP hdr */
+ if (ip_hdr_size_bytes == IP_V4_HEADER_SIZE) {
+ struct ipv4_hdr *ihdr4 = (struct ipv4_hdr *)iphdr;
+
+ rte_synproxy_build_ipv4_header(ihdr4,
+ rte_bswap32(ihdr4->dst_addr),
+ rte_bswap32(ihdr4->src_addr), tcp_header_size);
+
+ } else if (ip_hdr_size_bytes == IP_V6_HEADER_SIZE) {
+ struct ipv6_hdr *ihdr6 = (struct ipv6_hdr *)iphdr;
+
+ rte_synproxy_build_ipv6_header(ihdr6,
+ (uint8_t *)ihdr6->dst_addr,
+ (uint8_t *)ihdr6->src_addr, tcp_header_size);
+ }
+ rte_synproxy_adjust_pkt_length(old_pkt);
+ /* compute checksums */
+ rte_synproxy_compute_checksums(iphdr, thdr);
+}
+
+/*
+ * if running synproxy and both halves of the proxied connection has been
+ * established, need adjust the seq or ack value of the packet.
+ * The value is adjusted by the difference between the spoofed server
+ * initial sequence number and the real server sequence number.
+ * In the client -> server direction, the ack must be increased by the
+ * difference before the window check.
+ * In the server -> client direction, the seq must be decreased by the
+ * difference after the window check.
+ */
+
+
+void
+rte_sp_adjust_server_seq_after_window_check(
+ struct rte_ct_cnxn_data *cd,
+ __rte_unused void *i_hdr,
+ struct tcp_hdr *thdr,
+ enum rte_ct_pkt_direction dir)
+{
+ uint32_t num_before, num_after;
+
+ if (!cd->ct_protocol.synproxy_data.cnxn_established)
+ return;
+
+ if (dir == RTE_CT_DIR_ORIGINAL)
+ return; /*wrong direction */
+
+
+ /* update appropriate number (seq or ack) in header */
+ num_before = rte_bswap32(thdr->sent_seq);
+ num_after = num_before - cd->ct_protocol.synproxy_data.seq_diff;
+ thdr->sent_seq = rte_bswap32(num_after);
+
+ rte_sp_incremental_tcp_chksum_update_32(num_before, num_after,
+ &thdr->cksum);
+}
+
+
+static void
+rte_sp_adjust_client_sack_entries(
+ struct tcp_hdr *thdr,
+ uint32_t diff)
+{
+ uint32_t num_before, num_after;
+ uint32_t *sack_ptr;
+ uint8_t sack_blk_size;
+ uint16_t dataoff_in_bytes = (thdr->data_off & 0xf0) >> 2;
+ uint16_t length = dataoff_in_bytes - sizeof(struct tcp_hdr);
+
+ if (!length)
+ return;
+
+ uint8_t *options_ptr = (uint8_t *)(thdr + 1);
+
+ while (length > 0) {
+ uint8_t opcode = *options_ptr;
+ uint8_t opsize = options_ptr[1];
+ int i;
+
+ switch (opcode) {
+
+ case RTE_CT_TCPOPT_EOL:
+ return; /* end of options */
+
+ case RTE_CT_TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
+ length--;
+ options_ptr++;
+ continue;
+
+ case RTE_CT_TCPOPT_SACK:
+ /*
+ * SACK (selective ACK) contains a block of 1 to 4
+ * entries of 8 bytes each. Each entry is a pair of
+ * 32 bit numbers. This block follows the usual 2
+ * bytes for opcode and opsize. Thus, the entire SACK
+ * option must be 10, 18, 26 or 34 bytes long.
+ */
+
+ sack_blk_size = opsize - 2;
+ /* start of entries */
+ sack_ptr = (uint32_t *)(options_ptr + 2);
+ /* count of 32 bit elements */
+ int num_acks = sack_blk_size >> 2;
+
+ if (unlikely(sack_blk_size > 32 ||
+ ((sack_blk_size & 0x3) != 0))) {
+ printf("Sack block parsing failure\n");
+ return;
+ }
+
+ for (i = 0; i < num_acks; i++) {
+ num_before = rte_bswap32(*sack_ptr);
+ num_after = num_before + diff;
+ *sack_ptr = rte_bswap32(num_after);
+ sack_ptr++;
+ rte_sp_incremental_tcp_chksum_update_32(
+ num_before,
+ num_after,
+ &thdr->cksum);
+ }
+
+ return;
+ default:
+ break;
+ }
+ if ((opsize < 2) || (opsize > length)) {
+ printf("ERROR!, opsize %i, length %i\n",
+ opsize, length);
+ return;
+ }
+
+ options_ptr += opsize;
+ length -= opsize;
+ }
+}
+
+void
+rte_sp_adjust_client_ack_before_window_check(
+ struct rte_ct_cnxn_data *cd,
+ __rte_unused void *i_hdr,
+ struct tcp_hdr *thdr,
+ enum rte_ct_pkt_direction dir)
+{
+ uint32_t num_before, num_after;
+
+ if (!cd->ct_protocol.synproxy_data.cnxn_established)
+ return;
+
+ if (dir != RTE_CT_DIR_ORIGINAL)
+ return; /*wrong direction */
+
+
+ /* first update appropriate number (seq or ack) in header */
+ num_before = rte_bswap32(thdr->recv_ack);
+ num_after = num_before + cd->ct_protocol.synproxy_data.seq_diff;
+ thdr->recv_ack = rte_bswap32(num_after);
+ rte_sp_incremental_tcp_chksum_update_32(num_before,
+ num_after, &thdr->cksum);
+
+ /* update SACK entries in header if any */
+
+ if (1) { /* TODO: check if sack permitted before calling */
+ rte_sp_adjust_client_sack_entries(thdr,
+ cd->ct_protocol.synproxy_data.seq_diff);
+ /* note that tcp hdr checksum adjusted in above sack
+ * entries routine call
+ */
+ }
+}
+
+
+
+
+/* parse the tcp header options, if any, and save interesting ones */
+static void
+rte_sp_parse_tcp_options(
+ uint8_t *options_ptr,
+ uint16_t length,
+ struct rte_synproxy_options *t_opts)
+{
+ int opsize;
+
+ t_opts->options = 0;
+
+ while (length > 0) {
+ uint8_t opcode = *options_ptr++;
+
+ if (opcode == RTE_CT_TCPOPT_EOL)
+ return;
+
+ if (opcode == RTE_CT_TCPOPT_NOP) {
+ length--;
+ continue; /* skip adjustments at loop bottom */
+ }
+
+ opsize = *options_ptr++;
+
+ if (unlikely(opsize < 2 || opsize > length)) {
+ /* TODO: Change printf to log */
+ printf("parsing error, opsize: %i, length: %i\n",
+ opsize, length);
+ return;
+ }
+
+ switch (opcode) {
+
+ case RTE_CT_TCPOPT_MSS:
+ if (opsize == RTE_CT_TCPOLEN_MSS) {
+ uint16_t *mss_ptr = (uint16_t *)options_ptr;
+
+ t_opts->mss = rte_bswap16(*mss_ptr);
+ t_opts->options |= RTE_SP_OPTIONS_MSS;
+ }
+ break;
+
+ case RTE_CT_TCPOPT_WINDOW:
+ if (opsize == RTE_CT_TCPOLEN_WINDOW) {
+ t_opts->window_scale = RTE_MIN(*options_ptr,
+ RTE_CT_MAX_TCP_WINDOW_SCALE);
+ t_opts->options |= RTE_SP_OPTIONS_WINDOW_SCALE;
+ }
+ break;
+
+ case RTE_CT_TCPOPT_TIMESTAMP:
+ if (opsize == RTE_CT_TCPOLEN_TIMESTAMP) {
+ uint32_t *ts_val_ptr = (uint32_t *)options_ptr;
+ uint32_t *ts_ecr_ptr =
+ (uint32_t *)(options_ptr + 4);
+ t_opts->ts_val = rte_bswap32(*ts_val_ptr);
+ t_opts->ts_echo_reply =
+ rte_bswap32(*ts_ecr_ptr);
+ t_opts->options |= RTE_SP_OPTIONS_TIMESTAMP;
+ }
+ break;
+
+ case RTE_CT_TCPOPT_SACK_PERM:
+ if (opsize == RTE_CT_TCPOLEN_SACK_PERM)
+ t_opts->options |= RTE_SP_OPTIONS_SACK_PERM;
+ break;
+
+ default:
+ break;
+ }
+
+ options_ptr += opsize - 2;
+ length -= opsize;
+
+ }
+}
+
+/* parse the tcp header options, if any, and save interesting ones in t_opts */
+void
+rte_sp_parse_options(struct rte_mbuf *pkt, struct rte_ct_cnxn_data *cd)
+{
+ /*uint16_t ip_hdr_length = rte_sp_get_ip_header_size(pkt);
+ * skip over IPv4 or IPv6 header
+ */
+ int ip_hdr_length = rte_ct_get_IP_hdr_size(pkt);
+ struct tcp_hdr *thdr = (struct tcp_hdr *)
+ RTE_MBUF_METADATA_UINT32_PTR(pkt, IP_START + ip_hdr_length);
+ uint8_t *opt_ptr = RTE_MBUF_METADATA_UINT8_PTR(pkt,
+ (IP_START + ip_hdr_length + sizeof(struct tcp_hdr)));
+
+ struct rte_synproxy_options *t_opts =
+ &cd->ct_protocol.synproxy_data.cnxn_options;
+ int length_in_bytes =
+ ((thdr->data_off & 0xf0) >> 2) - sizeof(struct tcp_hdr);
+
+ rte_sp_parse_tcp_options(opt_ptr, length_in_bytes, t_opts);
+ t_opts->initial_window = thdr->rx_win;
+}
+
+
+
+
+struct rte_mbuf *
+rte_ct_get_buffered_synproxy_packets(
+ struct rte_ct_cnxn_tracker *ct)
+{
+ struct rte_mbuf *trkr_list = ct->buffered_pkt_list;
+
+ ct->buffered_pkt_list = NULL;
+ return trkr_list;
+}
+
+
+
+void rte_ct_enable_synproxy(struct rte_ct_cnxn_tracker *ct)
+{
+ ct->misc_options.synproxy_enabled = 1;
+ printf("rte_ct_enable_synproxy = %d\n",
+ ct->misc_options.synproxy_enabled);
+}
+
+void rte_ct_disable_synproxy(struct rte_ct_cnxn_tracker *ct)
+{
+ ct->misc_options.synproxy_enabled = 0;
+ //printf("rte_ct_disable_synproxy = %d\n",
+ // ct->misc_options.synproxy_enabled);
+}
+
+void
+rte_ct_buffer_packet(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_ct_cnxn_data *cd,
+ struct rte_mbuf *pkt)
+{
+ /*
+ * Add packet to list of buffered packets for the connection.
+ * List is built in reverse of order received by adding to front.
+ * List will later be reversed to maintain order of arrival.
+ */
+
+ struct rte_mbuf **next = (struct rte_mbuf **)
+ RTE_MBUF_METADATA_UINT64_PTR(pkt,
+ ct->pointer_offset);
+ *next = cd->ct_protocol.synproxy_data.buffered_pkt_list;
+ cd->ct_protocol.synproxy_data.buffered_pkt_list = pkt;
+}
+
+void
+rte_ct_release_buffered_packets(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_ct_cnxn_data *cd)
+{
+ struct rte_mbuf *cnxn_list =
+ cd->ct_protocol.synproxy_data.buffered_pkt_list;
+
+ if (cnxn_list == NULL)
+ return;
+
+ cd->ct_protocol.synproxy_data.buffered_pkt_list = NULL;
+
+ struct rte_mbuf *trkr_list = ct->buffered_pkt_list;
+
+ if (trkr_list == NULL)
+ return;
+ /*
+ * walk the cnxn_list, and add to front of trkr_list, reversing order
+ * and thus restoring orginal order. Order between different
+ * connections is irrelevant.
+ */
+ while (cnxn_list != NULL) {
+ struct rte_mbuf *old_next;
+
+ struct rte_mbuf **next = (struct rte_mbuf **)
+ RTE_MBUF_METADATA_UINT64_PTR(cnxn_list,
+ ct->pointer_offset);
+
+ old_next = *next; /* save next cd packet */
+ *next = trkr_list;/* make this cd packet point to ct list */
+ trkr_list = cnxn_list;/* make the cd packet head of ct list */
+ cnxn_list = old_next; /* advance along cd list */
+ }
+ ct->buffered_pkt_list = trkr_list;
+}
diff --git a/common/VIL/conntrack/rte_ct_tcp.c b/common/VIL/conntrack/rte_ct_tcp.c
new file mode 100644
index 00000000..073c63ed
--- /dev/null
+++ b/common/VIL/conntrack/rte_ct_tcp.c
@@ -0,0 +1,1116 @@
+/*
+// Copyright (c) 2017 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include <stdlib.h>
+#include <string.h>
+#include <immintrin.h>
+#include <inttypes.h>
+#include "rte_ct_tcp.h"
+#include "rte_cnxn_tracking.h"
+
+/* uint32_t CT_DEBUG = 1; */ /* Can be used to conditionally turn of debug */
+#define CT_DEBUG 0
+#define STATE_TRACKING 0
+#define RTE_CT_ASSERT 0
+
+/* constants for mbuff manipulation */
+#define META_DATA_OFFSET 128
+#define RTE_PKTMBUF_HEADROOM 128 /* where is this defined ? */
+#define ETHERNET_START (META_DATA_OFFSET + RTE_PKTMBUF_HEADROOM)
+#define ETH_HDR_SIZE 14
+#define IP_START (ETHERNET_START + ETH_HDR_SIZE)
+
+#define IPv4_HEADER_SIZE 20
+#define IPv6_HEADER_SIZE 40
+
+#define IP_VERSION_4 4
+#define IP_VERSION_6 6
+
+#define rte_after(seq2, seq1) rte_before(seq1, seq2)
+static inline uint8_t rte_before(uint32_t seq1, uint32_t seq2)
+{
+ return (int32_t) (seq1 - seq2) < 0;
+}
+
+/* short state names for defining state table */
+
+#define ctNO RTE_CT_TCP_NONE
+#define ctSS RTE_CT_TCP_SYN_SENT
+#define ctSR RTE_CT_TCP_SYN_RECV
+#define ctES RTE_CT_TCP_ESTABLISHED
+#define ctFW RTE_CT_TCP_FIN_WAIT
+#define ctCW RTE_CT_TCP_CLOSE_WAIT
+#define ctLA RTE_CT_TCP_LAST_ACK
+#define ctTW RTE_CT_TCP_TIME_WAIT
+#define ctCL RTE_CT_TCP_CLOSE
+#define ctS2 RTE_CT_TCP_SYN_SENT_2
+#define ctIV RTE_CT_TCP_MAX
+#define ctIG RTE_CT_TCP_IGNORE
+
+static const uint8_t rte_ct_tcp_state_table[2][6][RTE_CT_TCP_MAX] = {
+ { /* "client" direction, i.e. first SYN sent */
+ /* ctNO, ctSS, ctSR, ctES, ctFW, ctCW, ctLA, ctTW, ctCL, ctS2 */
+ /* syn */ {ctSS, ctSS, ctIG, ctIG, ctIG, ctIG, ctIG, ctSS, ctSS,
+ ctS2},
+
+ /* ctNO, ctSS, ctSR, ctES, ctFW, ctCW, ctLA, ctTW, ctCL, ctS2 */
+ /* synack */ {ctIV, ctIV, ctSR, ctIV, ctIV, ctIV, ctIV, ctIV, ctIV,
+ ctSR},
+
+ /* ctNO, ctSS, ctSR, ctES, ctFW, ctCW, ctLA, ctTW, ctCL, ctS2 */
+ /* fin */ {ctIV, ctIV, ctFW, ctFW, ctLA, ctLA, ctLA, ctTW, ctCL,
+ ctIV},
+ /* ctNO, ctSS, ctSR, ctES, ctFW, ctCW, ctLA, ctTW, ctCL, ctS2 */
+ /* ack */ {ctES, ctIV, ctES, ctES, ctCW, ctCW, ctTW, ctTW, ctCL,
+ ctIV},
+
+ /* ctNO, ctSS, ctSR, ctES, ctFW, ctCW, ctLA, ctTW, ctCL, ctS2 */
+ /* rst */ {ctIV, ctCL, ctCL, ctCL, ctCL, ctCL, ctCL, ctCL, ctCL,
+ ctCL},
+ /* ill */ {ctIV, ctIV, ctIV, ctIV, ctIV, ctIV, ctIV, ctIV, ctIV, ctIV}
+ },
+
+ { /* "server" direction */
+ /* ctNO, ctSS, ctSR, ctES, ctFW, ctCW, ctLA, ctTW, ctCL, ctS2 */
+ /* syn */ {ctIV, ctS2, ctIV, ctIV, ctIV, ctIV, ctIV, ctSS, ctIV,
+ ctS2},
+
+ /* ctNO, ctSS, ctSR, ctES, ctFW, ctCW, ctLA, ctTW, ctCL, ctS2 */
+ /* synack */ {ctIV, ctSR, ctIG, ctIG, ctIG, ctIG, ctIG, ctIG, ctIG,
+ ctSR},
+
+ /* ctNO, ctSS, ctSR, ctES, ctFW, ctCW, ctLA, ctTW, ctCL, ctS2 */
+ /* fin */ {ctIV, ctIV, ctFW, ctFW, ctLA, ctLA, ctLA, ctTW, ctCL,
+ ctIV},
+
+ /* ctNO, ctSS, ctSR, ctES, ctFW, ctCW, ctLA, ctTW, ctCL, ctS2 */
+ /* ack */ {ctIV, ctIG, ctSR, ctES, ctCW, ctCW, ctTW, ctTW, ctCL,
+ ctIG},
+
+ /* ctNO, ctSS, ctSR, ctES, ctFW, ctCW, ctLA, ctTW, ctCL, ctS2 */
+ /* rst */ {ctIV, ctCL, ctCL, ctCL, ctCL, ctCL, ctCL, ctCL, ctCL,
+ ctCL},
+ /* ill */ {ctIV, ctIV, ctIV, ctIV, ctIV, ctIV, ctIV, ctIV, ctIV, ctIV}
+ }
+};
+
+/* What TCP flags are set from RST/SYN/FIN/ACK. */
+enum rte_tcp_flag {
+ RTE_CT_TCP_SYN_FLAG,
+ RTE_CT_TCP_SAK_FLAG, /* SYN ACK */
+ RTE_CT_TCP_FIN_FLAG,
+ RTE_CT_TCP_ACK_FLAG,
+ RTE_CT_TCP_RST_FLAG,
+ RTE_CT_TCP_ILL_FLAG,
+};
+
+static uint8_t rte_ct_tcp_flags_to_state_table_index[16] = {
+ /* A R S F */
+ RTE_CT_TCP_ILL_FLAG, /* 0 0 0 0 */
+ RTE_CT_TCP_FIN_FLAG, /* 0 0 0 1 */
+ RTE_CT_TCP_SYN_FLAG, /* 0 0 1 0 */
+ RTE_CT_TCP_ILL_FLAG, /* 0 0 1 1 */
+ RTE_CT_TCP_RST_FLAG, /* 0 1 0 0 */
+ RTE_CT_TCP_RST_FLAG, /* 0 1 0 1 */
+ RTE_CT_TCP_RST_FLAG, /* 0 1 1 0 */
+ RTE_CT_TCP_ILL_FLAG, /* 0 1 1 1 */
+
+ RTE_CT_TCP_ACK_FLAG, /* 1 0 0 0 */
+ RTE_CT_TCP_FIN_FLAG, /* 1 0 0 1 */
+ RTE_CT_TCP_SAK_FLAG, /* 1 0 1 0 */
+ RTE_CT_TCP_ILL_FLAG, /* 1 0 1 1 */
+ RTE_CT_TCP_RST_FLAG, /* 1 1 0 0 */
+ RTE_CT_TCP_ILL_FLAG, /* 1 1 0 1 */
+ RTE_CT_TCP_RST_FLAG, /* 1 1 1 0 */
+ RTE_CT_TCP_ILL_FLAG, /* 1 1 1 1 */
+};
+
+static inline uint8_t
+rte_ct_get_index(uint8_t tcp_flags)
+{
+ uint8_t important_flags;
+
+ tcp_flags &= 0x3f; /* clear off optional flags */
+ important_flags = ((tcp_flags & 0x10) >> 1) | (tcp_flags & 7);
+ /* should be _pext_u32(tcp_flags, 0x17) */
+
+ if (unlikely((tcp_flags == 0) || (tcp_flags == 0x3f)))
+ /* these known as null and christmas tree respectively */
+ return RTE_CT_TCP_ILL_FLAG;
+
+ return rte_ct_tcp_flags_to_state_table_index[important_flags];
+
+}
+
+static inline int
+rte_ct_either_direction_has_flags(struct rte_ct_cnxn_data *cd, uint8_t flags)
+{
+ return ((cd->ct_protocol.tcp_ct_data.seen[0].flags | cd->
+ ct_protocol.tcp_ct_data.seen[1].flags) & flags) != 0;
+}
+
+static inline uint32_t rte_ct_seq_plus_length(struct rte_mbuf *pkt,
+ uint8_t ip_hdr_size)
+{
+ uint16_t pkt_length = 0;
+ struct tcp_hdr *tcpheader =
+ (struct tcp_hdr *)RTE_MBUF_METADATA_UINT32_PTR(pkt,
+ (IP_START +
+ ip_hdr_size));
+ uint32_t tcp_hdr_size = (tcpheader->data_off & 0xf0) >> 2;
+
+ void *ip_hdr = RTE_MBUF_METADATA_UINT32_PTR(pkt, IP_START);
+
+ if (ip_hdr_size == IPv4_HEADER_SIZE) {
+ struct ipv4_hdr *ihdr = (struct ipv4_hdr *)ip_hdr;
+
+ pkt_length = rte_bswap16(ihdr->total_length);
+ }
+ if (ip_hdr_size == IPv6_HEADER_SIZE) {
+ struct ipv6_hdr *ihdr = (struct ipv6_hdr *)ip_hdr;
+
+ pkt_length = rte_bswap16(ihdr->payload_len) + IPv6_HEADER_SIZE;
+ }
+
+ /*
+ * Return sequence number plus the length of TCP segment (payload).
+ * SYN & FIN are each considered one byte, but it is illegal
+ * to have them together in one header (checked elsewhere)
+ */
+
+
+ return rte_bswap32(tcpheader->sent_seq) +
+ pkt_length - ip_hdr_size - tcp_hdr_size +
+ ((tcpheader->tcp_flags & (RTE_CT_TCPHDR_SYN | RTE_CT_TCPHDR_FIN)) !=
+ 0 ? 1 : 0);
+
+}
+
+static void
+rte_ct_check_for_scaling_and_sack_perm(
+ struct rte_mbuf *pkt,
+ struct rte_ct_tcp_state *state,
+ uint8_t ip_hdr_size)
+{
+
+ struct tcp_hdr *tcpheader =
+ (struct tcp_hdr *)RTE_MBUF_METADATA_UINT32_PTR(pkt,
+ (IP_START +
+ ip_hdr_size));
+ uint32_t dataoff_in_bytes = (tcpheader->data_off & 0xf0) >> 2;
+ uint32_t length = dataoff_in_bytes - sizeof(struct tcp_hdr);
+
+ state->scale = 0;
+ state->flags = 0;
+
+ if (length == 0)
+ /* no header options */
+ return;
+ uint8_t *options_ptr =
+ RTE_MBUF_METADATA_UINT8_PTR(pkt,
+ (IP_START + ip_hdr_size +
+ sizeof(struct tcp_hdr)));
+
+ while (length > 0) {
+ uint8_t option = *options_ptr;
+ uint8_t opsize = options_ptr[1];
+ /* opsize reset for NOPs below */
+
+ switch (option) {
+
+ case RTE_CT_TCPOPT_EOL:
+ /* end of options */
+ return;
+
+ case RTE_CT_TCPOPT_NOP:
+ options_ptr++;
+ length--;
+ continue;
+
+ case RTE_CT_TCPOPT_SACK_PERM:
+ if (opsize == RTE_CT_TCPOLEN_SACK_PERM)
+ state->flags |= RTE_CT_TCP_FLAG_SACK_PERM;
+ break;
+
+ case RTE_CT_TCPOPT_WINDOW:
+ if (opsize == RTE_CT_TCPOLEN_WINDOW) {
+ state->scale =
+ RTE_MIN(options_ptr[2],
+ RTE_CT_MAX_TCP_WINDOW_SCALE);
+ state->flags |= RTE_CT_TCP_FLAG_WINDOW_SCALE;
+ }
+ break;
+
+ default:
+ break;
+
+ }
+
+ if ((opsize < 2) || (opsize > length)) {
+ /* something wrong */
+ printf("scaling_and_sack_perm:something wrong\n");
+ return;
+ }
+ options_ptr += opsize;
+ length -= opsize;
+
+ }
+}
+
+static void
+rte_ct_tcpdisplay_hdr(struct tcp_hdr *tcpheader)
+{
+ printf("Tcp header: src_port=%d", rte_bswap16(tcpheader->src_port));
+ printf(", dst_port=%d", rte_bswap16(tcpheader->dst_port));
+ printf(", sent_seq=%u", rte_bswap32(tcpheader->sent_seq));
+ printf(", recv_ack=%u", rte_bswap32(tcpheader->recv_ack));
+ printf(",data_off=%d", tcpheader->data_off / 16);
+ printf(",tcp_flags=%02x", tcpheader->tcp_flags);
+ printf(", rx_win=%d\n", rte_bswap16(tcpheader->rx_win));
+
+}
+
+static inline void
+rte_ct_clear_cnxn_data(__rte_unused struct rte_ct_cnxn_tracker *ct,
+ struct rte_ct_cnxn_data *cd,
+ __rte_unused struct rte_mbuf *pkt)
+{
+ /* clear all tcp connection data, then set up individual fields */
+
+ memset(&cd->ct_protocol.tcp_ct_data, 0,
+ sizeof(cd->ct_protocol.tcp_ct_data));
+ cd->ct_protocol.tcp_ct_data.last_index = RTE_CT_TCP_ILL_FLAG;
+
+}
+
+enum rte_ct_packet_action
+rte_ct_tcp_new_connection(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_ct_cnxn_data *cd,
+ struct rte_mbuf *pkt,
+ int use_synproxy,
+ uint8_t ip_hdr_size)
+{
+ struct tcp_hdr *tcpheader =
+ (struct tcp_hdr *)RTE_MBUF_METADATA_UINT32_PTR(pkt,
+ (IP_START + ip_hdr_size));
+
+ enum rte_ct_tcp_states new_state;
+ uint8_t index;
+ struct rte_ct_tcp_state *sender =
+ &cd->ct_protocol.tcp_ct_data.seen[RTE_CT_DIR_ORIGINAL];
+ struct rte_ct_tcp_state *receiver =
+ &cd->ct_protocol.tcp_ct_data.seen[RTE_CT_DIR_REPLY];
+ uint16_t win;
+
+ if (CT_DEBUG)
+ rte_ct_tcpdisplay_hdr(tcpheader);
+
+ index = rte_ct_get_index(tcpheader->tcp_flags);
+ new_state = rte_ct_tcp_state_table[0][index][RTE_CT_TCP_NONE];
+
+ if (unlikely(new_state >= RTE_CT_TCP_MAX)) {
+ if (CT_DEBUG)
+ printf("invalid new state with flags %02x\n",
+ tcpheader->tcp_flags);
+ return RTE_CT_DROP_PACKET;
+ }
+ /*
+ * A normal connection starts with a SYN packet. However, it is possible
+ * that an onginging connection has been routed here somehow. Support
+ * for these connections is optional.
+ */
+
+ if (unlikely((new_state != RTE_CT_TCP_SYN_SENT
+ && ct->misc_options.tcp_loose == 0))) {
+ /* Not a standard connection start and not supporting
+ * onging connections. */
+ return RTE_CT_DROP_PACKET;
+ }
+
+ if (CT_DEBUG)
+ printf(" new connection with state %s\n",
+ rte_ct_tcp_names[new_state]);
+
+ /* clear all tcp connection data, then set up individual fields */
+ rte_ct_clear_cnxn_data(ct, cd, pkt);
+ cd->ct_protocol.tcp_ct_data.state = new_state;
+
+ sender->end = sender->maxend = rte_ct_seq_plus_length(pkt, ip_hdr_size);
+ win = rte_bswap16(tcpheader->rx_win);
+ sender->maxwin = RTE_MAX(win, (uint32_t)1);
+
+ if (likely(new_state == RTE_CT_TCP_SYN_SENT)) {
+ /* check for window scaling and selective ACK */
+ rte_ct_check_for_scaling_and_sack_perm(pkt, sender,
+ ip_hdr_size);
+
+ cd->ct_protocol.synproxy_data.synproxied = use_synproxy;
+
+ if (use_synproxy) {
+ /*
+ * new connection from client using synproxy. The proxy
+ * must send back a SYN-ACK
+ */
+
+
+ if (CT_DEBUG > 2)
+ printf("synproxy sending SYN-ACK to client\n");
+
+ return RTE_CT_SEND_CLIENT_SYNACK;
+ }
+ } else {
+ /*
+ * An ongoing connection. Make a very liberal connection since
+ * all the original set up data is lost. Assume SACK and
+ * liberal window checking to handle unknown window scaling.
+ */
+
+ sender->maxend += sender->maxwin;
+ sender->flags = receiver->flags =
+ (RTE_CT_TCP_FLAG_SACK_PERM | RTE_CT_TCP_FLAG_BE_LIBERAL);
+ }
+
+ if (CT_DEBUG > 0) {
+ printf("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i",
+ sender->end, sender->maxend, sender->maxwin,
+ sender->scale);
+ printf(" receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+ receiver->end, receiver->maxend,
+ receiver->maxwin,
+ receiver->scale);
+ }
+
+ return RTE_CT_OPEN_CONNECTION;
+}
+
+static uint32_t
+rte_ct_tcp_sack(struct rte_mbuf *pkt, uint8_t ip_hdr_size)
+{
+ struct tcp_hdr *tcpheader =
+ (struct tcp_hdr *)RTE_MBUF_METADATA_UINT32_PTR(pkt,
+ (IP_START +
+ ip_hdr_size));
+ uint16_t dataoff_in_bytes = (tcpheader->data_off & 0xf0) >> 2;
+ uint16_t length = dataoff_in_bytes - sizeof(struct tcp_hdr);
+ uint32_t sack = rte_bswap32(tcpheader->recv_ack);
+
+ if (unlikely(!length))
+ return sack;
+
+ uint8_t *options_ptr = RTE_MBUF_METADATA_UINT8_PTR(pkt,
+ (IP_START + ip_hdr_size + sizeof(struct tcp_hdr)));
+
+ while (length > 0) {
+ uint8_t opcode = *options_ptr;
+ uint8_t opsize = options_ptr[1];
+ int i;
+ uint32_t *sack_ptr;
+
+ switch (opcode) {
+ case RTE_CT_TCPOPT_TIMESTAMP:
+ /* common "solo" option, check first */
+ break;
+
+ case RTE_CT_TCPOPT_EOL:
+ return sack; /* end of options */
+
+ case RTE_CT_TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
+ length--;
+ options_ptr++;
+ continue;
+
+ case RTE_CT_TCPOPT_SACK:
+ /*
+ * SACK (selective ACK) contains a block of
+ * 1 to 4 entries of 8 bytes each.
+ * Each entry is a pair of 32 bit numbers.
+ * This block follows the usual 2
+ * bytes for opcode and opsize. Thus,
+ * the entire SACK option must be 10, 18,
+ * 26 or 34 bytes long.
+ */
+ if ((opsize >= (RTE_CT_TCPOLEN_PER_SACK_ENTRY + 2)) &&
+ ((opsize - 2) %
+ RTE_CT_TCPOLEN_PER_SACK_ENTRY) == 0) {
+ /* skip over opcode and size, and point to
+ * 2nd 32 bits in entry */
+ options_ptr += 6;
+ for (i = 0; i < (opsize - 2); i +=
+ RTE_CT_TCPOLEN_PER_SACK_ENTRY) {
+ sack_ptr =
+ (uint32_t *) &options_ptr[i];
+ uint32_t ack = rte_bswap32(*sack_ptr);
+
+ if (rte_after(ack, sack))
+ sack = ack;
+ }
+ return sack;
+ }
+ break;
+ default:
+ break;
+ }
+ if ((opsize < 2) || (opsize > length)) {
+ printf("rte_ct_tcp_sack: something wrong, opsize %i,",
+ opsize);
+ printf(" length %i\n", length);
+ return sack;
+ }
+ options_ptr += opsize;
+ length -= opsize;
+ }
+ return sack;
+}
+
+/*
+ * if this is a retransmission of last packet, increment retransmission count,
+ * otherwise record this as last packet.
+ */
+static inline void
+rte_ct_check_for_retransmissions(
+ struct rte_ct_tcp *state,
+ uint8_t dir,
+ uint32_t seq,
+ uint32_t ack,
+ uint32_t end,
+ uint16_t win)
+{
+ if (state->last_dir == dir
+ && state->last_seq == seq
+ && state->last_ack == ack
+ && state->last_end == end && state->last_win == win)
+ state->retrans++;
+ else {
+ state->last_dir = dir;
+ state->last_seq = seq;
+ state->last_ack = ack;
+ state->last_end = end;
+ state->last_win = win;
+ state->retrans = 0;
+ }
+}
+
+/*
+ * Verify that the sequence number in the given packet is within the valid
+ * range at this point in the connection
+ */
+static uint8_t
+rte_ct_tcp_in_window(
+ struct rte_ct_cnxn_data *cd,
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_ct_tcp *state,
+ enum rte_ct_pkt_direction dir,
+ uint8_t index,
+ struct rte_mbuf *pkt,
+ uint8_t ip_hdr_size)
+{
+ struct rte_ct_tcp_state *sender = &state->seen[dir];
+ struct rte_ct_tcp_state *receiver = &state->seen[!dir];
+ uint32_t seq, ack, sack, end, win, swin;
+ uint8_t in_recv_win, tcp_flags;
+ enum rte_ct_packet_action res;
+
+ void *iphdr = RTE_MBUF_METADATA_UINT32_PTR(pkt, IP_START);
+ struct tcp_hdr *tcpheader =
+ (struct tcp_hdr *)RTE_MBUF_METADATA_UINT32_PTR(pkt,
+ (IP_START + ip_hdr_size));
+
+ if (cd->ct_protocol.synproxy_data.synproxied)
+ rte_sp_adjust_client_ack_before_window_check(cd, iphdr,
+ tcpheader, dir);
+
+
+ seq = rte_bswap32(tcpheader->sent_seq);
+ ack = sack = rte_bswap32(tcpheader->recv_ack);
+ win = rte_bswap16(tcpheader->rx_win);
+ end = rte_ct_seq_plus_length(pkt, ip_hdr_size);
+ tcp_flags = tcpheader->tcp_flags;
+
+ if (receiver->flags & RTE_CT_TCP_FLAG_SACK_PERM)
+ sack = rte_ct_tcp_sack(pkt, ip_hdr_size);
+
+ if (unlikely(sender->maxwin == 0)) {
+ /* First packet for sender, initialize data. */
+ if (tcp_flags & RTE_CT_TCPHDR_SYN) {
+ /*
+ * SYN-ACK in reply to a SYN
+ * or SYN from reply direction in simultaneous open.
+ */
+ sender->end = sender->maxend = end;
+ sender->maxwin = RTE_MAX(win, (uint32_t)1);
+
+ rte_ct_check_for_scaling_and_sack_perm(pkt, sender,
+ ip_hdr_size);
+
+ /*
+ * RFC 1323: Both sides must send Window Scale option
+ * to enable scaling in either direction.
+ */
+ if ((sender->
+ flags & receiver->flags &
+ RTE_CT_TCP_FLAG_WINDOW_SCALE) == 0)
+ sender->scale = receiver->scale = 0;
+
+ if (!(tcp_flags & RTE_CT_TCPHDR_ACK))
+ /* Simultaneous open */
+ return 1;
+ } else {
+ /*
+ * In the middle of a connection with no setup data.
+ * Use available data from the packet.
+ */
+ sender->end = end;
+ swin = win << sender->scale;
+ sender->maxwin = (swin == 0 ? 1 : swin);
+ sender->maxend = end + sender->maxwin;
+ /*
+ * We haven't seen traffic in the other direction yet
+ * but we have to tweak window tracking to pass III
+ * and IV until that happens.
+ */
+ if (receiver->maxwin == 0)
+ receiver->end = receiver->maxend = sack;
+ }
+ }
+ /* if sender unititialized */
+ else if (((cd->ct_protocol.tcp_ct_data.state == RTE_CT_TCP_SYN_SENT &&
+ dir == RTE_CT_DIR_ORIGINAL) ||
+ (cd->ct_protocol.tcp_ct_data.state == RTE_CT_TCP_SYN_RECV &&
+ dir == RTE_CT_DIR_REPLY)) && rte_after(end, sender->end)) {
+ /*
+ * RFC 793: "if a TCP is reinitialized ... then it need
+ * not wait at all; it must only be sure to use sequence
+ * numbers larger than those recently used."
+ */
+ sender->end = sender->maxend = end;
+ sender->maxwin = RTE_MAX(win, (uint32_t)1);
+
+ rte_ct_check_for_scaling_and_sack_perm(pkt, sender,
+ ip_hdr_size);
+ }
+ /* If no ACK, just pretend there was. */
+ if (!(tcp_flags & RTE_CT_TCPHDR_ACK) ||
+ (((tcp_flags & RTE_CT_TCPHDR_RST_ACK) ==
+ RTE_CT_TCPHDR_RST_ACK) && (ack == 0))) {
+ /* Bad TCP Stacks */
+ ack = sack = receiver->end;
+ }
+
+ if ((tcp_flags & RTE_CT_TCPHDR_RST) && seq == 0 &&
+ cd->ct_protocol.tcp_ct_data.state == RTE_CT_TCP_SYN_SENT)
+ /* RST sent answering SYN. */
+ seq = end = sender->end;
+
+ /* Is the ending sequence in the receive window (if available)? */
+ in_recv_win = !receiver->maxwin ||
+ rte_after(end, sender->end - receiver->maxwin - 1);
+
+ if (rte_before(seq, sender->maxend + 1) && in_recv_win &&
+ rte_before(sack, receiver->end + 1) &&
+ rte_after(sack,
+ receiver->end - RTE_MAX(sender->maxwin,
+ (uint32_t)RTE_MAX_ACKWIN_CONST) - 1)) {
+ /*
+ * Apply window scaling (RFC 1323). Only valid if both
+ * directions sent this option in a SYN packet,
+ * so ignore until not a SYN packet. Scale will be
+ * set to zero if connection set up but no valid scale is there.
+ */
+ if (!(tcp_flags & RTE_CT_TCPHDR_SYN))
+ win <<= sender->scale;
+
+ /* Update sender data. */
+ swin = win + (sack - ack);
+ sender->maxwin = RTE_MAX(sender->maxwin, swin);
+
+ if (rte_after(end, sender->end)) {
+ sender->end = end;
+ sender->flags |= RTE_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
+ }
+
+ if (tcp_flags & RTE_CT_TCPHDR_ACK) {
+ if (!(sender->flags & RTE_CT_TCP_FLAG_MAXACK_SET)) {
+ sender->maxack = ack;
+ sender->flags |= RTE_CT_TCP_FLAG_MAXACK_SET;
+ } else if (rte_after(ack, sender->maxack))
+ sender->maxack = ack;
+ }
+
+ /* Update receiver data. */
+ if (receiver->maxwin != 0 && rte_after(end, sender->maxend))
+ receiver->maxwin += end - sender->maxend;
+
+ if (rte_after(sack + win, receiver->maxend - 1))
+ receiver->maxend = sack + RTE_MAX(win, (uint32_t)1);
+
+ if (ack == receiver->end)
+ receiver->flags &= ~RTE_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
+
+ /* If this packet has an ACK, it may be a retransmission. */
+ if (index == RTE_CT_TCP_ACK_FLAG)
+ rte_ct_check_for_retransmissions(state, dir, seq, ack,
+ end, win);
+ res = 1;
+ } else {
+ res = (sender->flags & RTE_CT_TCP_FLAG_BE_LIBERAL ||
+ ct->misc_options.tcp_be_liberal);
+ }
+
+ if (CT_DEBUG) {
+ if (!res) {
+ /* CT_DEBUG = 0; */
+ printf("tcp_in_window FAILED for %p\n", cd);
+ printf("rte_before(%u, %u + 1) is %d\n",
+ seq, sender->maxend + 1,
+ rte_before(seq, sender->maxend + 1));
+ printf("!%u || rte_after(%u, %u - %u - 1) is %d\n",
+ receiver->maxwin, end, sender->end,
+ receiver->maxwin, in_recv_win);
+ printf("rte_before(%u, %u + 1) is %d\n", sack,
+ receiver->end, rte_before(sack,
+ receiver->end + 1));
+ printf
+ ("rte_after(%u,(%u - RTE_MAX(%u, %u) - 1))) is%d\n",
+ sack, receiver->end, sender->maxwin,
+ RTE_MAX_ACKWIN_CONST, rte_after(sack,
+ receiver->end - RTE_MAX(sender->maxwin,
+ (uint32_t)RTE_MAX_ACKWIN_CONST)
+ - 1));
+
+ }
+ }
+ if (cd->ct_protocol.synproxy_data.synproxied)
+ rte_sp_adjust_server_seq_after_window_check(cd, iphdr,
+ tcpheader, dir);
+ return res;
+}
+
+/*for the given two FSM states,return the one with the smallest timeout value*/
+static inline uint8_t
+rte_ct_choose_min_timeout_state(
+ struct rte_ct_cnxn_tracker *ct,
+ uint8_t state1,
+ uint8_t state2)
+{
+ if (ct->ct_timeout.tcptimeout.tcp_timeouts[state1] <
+ ct->ct_timeout.tcptimeout.tcp_timeouts[state2])
+ return state1;
+ else
+ return state2;
+}
+
+
+/* Returns verdict for packet */
+enum rte_ct_packet_action
+rte_ct_verify_tcp_packet(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_ct_cnxn_data *cd,
+ struct rte_mbuf *pkt,
+ uint8_t key_was_flipped,
+ uint8_t ip_hdr_size)
+{
+ struct tcp_hdr *tcpheader = (struct tcp_hdr *)
+ RTE_MBUF_METADATA_UINT32_PTR(pkt, (IP_START + ip_hdr_size));
+
+ enum rte_ct_tcp_states new_state, old_state;
+ enum rte_ct_pkt_direction dir;
+ uint8_t index;
+
+ /* state whose timeout value will be used. In odd cases,
+ * not always current state */
+ uint8_t timeout_state;
+
+ dir = (cd->key_is_client_order == !key_was_flipped);
+
+ if (cd->ct_protocol.synproxy_data.synproxied &&
+ cd->ct_protocol.synproxy_data.half_established &&
+ !cd->ct_protocol.synproxy_data.cnxn_established &&
+ dir == RTE_CT_DIR_ORIGINAL) {
+ /*
+ * Packet from client, but only client side of this connection
+ * has been set up. Buffer packet until server side of
+ * connection complete.
+ */
+ rte_ct_buffer_packet(ct, cd, pkt);
+ return RTE_CT_HIJACK;
+ }
+
+ uint32_t recv_ack = rte_bswap32(tcpheader->recv_ack);
+ uint32_t sent_seq = rte_bswap32(tcpheader->sent_seq);
+
+ int check_window = 1;
+ enum rte_ct_packet_action return_action = RTE_CT_FORWARD_PACKET;
+
+ /* rte_ct_tcpdisplay_hdr(tcpheader); */
+
+ old_state = cd->ct_protocol.tcp_ct_data.state;
+ index = rte_ct_get_index(tcpheader->tcp_flags);
+ new_state = rte_ct_tcp_state_table[dir][index][old_state];
+
+ if (new_state == RTE_CT_TCP_MAX) {
+ if (CT_DEBUG) {
+ printf("!!!!invalid state transition from %s ",
+ rte_ct_tcp_names[old_state]);
+ printf("with flags 0x%02x\n",
+ tcpheader->tcp_flags);
+ }
+
+ ct->counters->pkts_drop_invalid_state++;
+ return RTE_CT_DROP_PACKET;
+ }
+
+ if (STATE_TRACKING && new_state != old_state)
+ printf(" new state %s\n", rte_ct_tcp_names[new_state]);
+
+ switch (new_state) {
+
+ case RTE_CT_TCP_ESTABLISHED:
+
+ if (cd->ct_protocol.synproxy_data.synproxied &&
+ !cd->ct_protocol.synproxy_data.half_established &&
+ (old_state == RTE_CT_TCP_SYN_RECV)) {
+ /*
+ * During synproxy setup, ESTABLISHED state entered by
+ * ACK arriving from client. The proxy must now send a
+ * spoofed SYN to the server.
+ * Reset the state to RTE_CT_TCP_SYN_SENT.
+ */
+
+ if (STATE_TRACKING) {
+ printf(" synproxy first half-cnxn complete,");
+ printf(" new state %s\n",
+ rte_ct_tcp_names[RTE_CT_TCP_SYN_SENT]);
+ }
+ cd->ct_protocol.synproxy_data.half_established = true;
+
+ rte_sp_cvt_to_spoofed_server_syn(cd, pkt);
+ rte_ct_clear_cnxn_data(ct, cd, pkt);
+ cd->ct_protocol.tcp_ct_data.state = RTE_CT_TCP_SYN_SENT;
+
+ struct rte_ct_tcp_state *sender =
+ &cd->ct_protocol.tcp_ct_data.
+ seen[RTE_CT_DIR_ORIGINAL];
+ uint16_t win = rte_bswap16(tcpheader->rx_win);
+
+ sender->end = sender->maxend =
+ rte_ct_seq_plus_length(pkt, ip_hdr_size);
+ sender->maxwin = RTE_MAX(win, (uint32_t)1);
+ rte_ct_check_for_scaling_and_sack_perm(pkt, sender,
+ ip_hdr_size);
+ /* TODO seq number code */
+ rte_ct_set_cnxn_timer_for_tcp(ct, cd,
+ RTE_CT_TCP_SYN_SENT);
+ return RTE_CT_SEND_SERVER_SYN;
+ }
+
+
+ case RTE_CT_TCP_SYN_RECV:
+
+ if (cd->ct_protocol.synproxy_data.synproxied &&
+ cd->ct_protocol.synproxy_data.half_established &&
+ !cd->ct_protocol.synproxy_data.cnxn_established) {
+ /*
+ * The reply SYN/ACK has been received from the server.
+ * The connection can now be considered established,
+ * even though an ACK stills needs to be sent to
+ * the server.
+ */
+
+ if (!rte_ct_tcp_in_window(cd, ct,
+ &cd->ct_protocol.tcp_ct_data,
+ dir, index, pkt, ip_hdr_size)) {
+ ct->counters->pkts_drop_outof_window++;
+ return RTE_CT_DROP_PACKET;
+ }
+
+ if (STATE_TRACKING) {
+ printf("synproxy full cnxn complete,");
+ printf(" new state %s\n", rte_ct_tcp_names
+ [RTE_CT_TCP_ESTABLISHED]);
+ }
+
+ /*
+ * Convert the packet to an ack to return to the server.
+ * This routine also saves the real sequence number
+ * from the server.
+ */
+
+ rte_sp_cvt_to_spoofed_server_ack(cd, pkt);
+
+ index = rte_ct_get_index(tcpheader->tcp_flags);
+
+ if (!rte_ct_tcp_in_window(cd, ct,
+ &cd->ct_protocol.tcp_ct_data,
+ !dir, index, pkt, ip_hdr_size)) {
+ ct->counters->pkts_drop_outof_window++;
+ return RTE_CT_DROP_PACKET;
+ }
+
+ /* good packets, OK to update state */
+
+ cd->ct_protocol.tcp_ct_data.state =
+ RTE_CT_TCP_ESTABLISHED;
+ ct->counters->sessions_established++;
+ cd->ct_protocol.synproxy_data.cnxn_established = true;
+ cd->ct_protocol.tcp_ct_data.last_index = index;
+ cd->ct_protocol.tcp_ct_data.last_dir = !dir;
+
+ rte_ct_set_cnxn_timer_for_tcp(ct, cd,
+ RTE_CT_TCP_ESTABLISHED);
+ rte_ct_release_buffered_packets(ct, cd);
+
+ return RTE_CT_SEND_SERVER_ACK;
+ }
+
+ case RTE_CT_TCP_SYN_SENT:
+
+ /*
+ * A connection that is actively closed goes to TIME-WAIT state.
+ * It can be re-opened (before it times out) by a SYN packet.
+ */
+
+ if (old_state < RTE_CT_TCP_TIME_WAIT)
+ break;
+ /*
+ * Due to previous check and state machine transitions,
+ * old state must be RTE_CT_TCP_TIME_WAIT or RTE_CT_TCP_CLOSE .
+ * Need to re-open connection.
+ */
+
+ return RTE_CT_REOPEN_CNXN_AND_FORWARD_PACKET;
+
+ case RTE_CT_TCP_IGNORE:
+
+ /*
+ * Ignored packets usually mean the connection data is
+ * out of sync with client/server. Ignore, but forward
+ * these packets since they may be valid for the connection.
+ * If the ignored packet is invalid, the receiver will send
+ * an RST which should get the connection entry back in sync.
+ */
+
+ /*
+ * However, if connection is running synproxy and the full
+ * connection is not yet established, there is no where
+ * for test packets to go so drop these packets.
+ */
+
+ if (cd->ct_protocol.synproxy_data.synproxied &&
+ !cd->ct_protocol.synproxy_data.cnxn_established)
+ return RTE_CT_DROP_PACKET;
+
+ if (index == RTE_CT_TCP_SAK_FLAG &&
+ cd->ct_protocol.tcp_ct_data.last_index ==
+ RTE_CT_TCP_SYN_FLAG
+ && cd->ct_protocol.tcp_ct_data.last_dir != dir
+ && recv_ack == cd->ct_protocol.tcp_ct_data.last_end) {
+ /*
+ * SYN/ACK in reply direction acknowledging a SYN
+ * earlier ignored as invalid.Client and server in sync,
+ * but connection tracker is not. Use previous values
+ * to get back in sync.
+ */
+
+ struct rte_ct_tcp_state *last_seen =
+ &cd->ct_protocol.tcp_ct_data.seen[cd->ct_protocol.
+ tcp_ct_data.
+ last_dir];
+
+ /* reset new and old states to what they should
+ * have been */
+ old_state = RTE_CT_TCP_SYN_SENT;
+ new_state = RTE_CT_TCP_SYN_RECV;
+
+ last_seen->end = cd->ct_protocol.tcp_ct_data.last_end;
+ last_seen->maxend =
+ cd->ct_protocol.tcp_ct_data.last_end;
+ last_seen->maxwin =
+ RTE_MAX(cd->ct_protocol.tcp_ct_data.last_win,
+ (uint32_t)1);
+ last_seen->scale =
+ cd->ct_protocol.tcp_ct_data.last_wscale;
+ cd->ct_protocol.tcp_ct_data.last_flags &=
+ ~RTE_CT_EXP_CHALLENGE_ACK;
+ last_seen->flags =
+ cd->ct_protocol.tcp_ct_data.last_flags;
+ memset(&cd->ct_protocol.tcp_ct_data.seen[dir], 0,
+ sizeof(struct rte_ct_tcp_state));
+ break;
+ }
+
+ cd->ct_protocol.tcp_ct_data.last_index = index;
+ cd->ct_protocol.tcp_ct_data.last_dir = dir;
+ cd->ct_protocol.tcp_ct_data.last_seq = sent_seq;
+ cd->ct_protocol.tcp_ct_data.last_end =
+ rte_ct_seq_plus_length(pkt, ip_hdr_size);
+ cd->ct_protocol.tcp_ct_data.last_win =
+ rte_bswap16(tcpheader->rx_win);
+
+ /*
+ * An orinal SYN. Client and the server may be in sync, but
+ * the tracker is not . Annotate
+ * the TCP options and let the packet go through. If it is a
+ * valid SYN packet, the server will reply with a SYN/ACK, and
+ * then we'll get in sync. Otherwise, the server potentially
+ * responds with a challenge ACK if implementing RFC5961.
+ */
+ if (index == RTE_CT_TCP_SYN_FLAG &&
+ dir == RTE_CT_DIR_ORIGINAL) {
+ struct rte_ct_tcp_state seen;
+
+ /* call following to set "flag" and "scale" fields */
+ rte_ct_check_for_scaling_and_sack_perm(pkt, &seen,
+ ip_hdr_size);
+
+ /* only possible flags set for scling and sack */
+ cd->ct_protocol.tcp_ct_data.last_flags = seen.flags;
+ cd->ct_protocol.tcp_ct_data.last_wscale =
+ (seen.flags & RTE_CT_TCP_FLAG_WINDOW_SCALE) == 0 ?
+ 0 : seen.scale;
+
+ /*
+ * Mark the potential for RFC5961 challenge ACK,
+ * this pose a special problem for LAST_ACK state
+ * as ACK is intrepretated as ACKing last FIN.
+ */
+ if (old_state == RTE_CT_TCP_LAST_ACK)
+ cd->ct_protocol.tcp_ct_data.last_flags |=
+ RTE_CT_EXP_CHALLENGE_ACK;
+ }
+ return RTE_CT_FORWARD_PACKET;
+
+ case RTE_CT_TCP_TIME_WAIT:
+ /*
+ * RFC5961 compliance cause stack to send "challenge-ACK" in
+ * response to unneeded SYNs. Do not treat this as acking
+ * last FIN.
+ */
+ if (old_state == RTE_CT_TCP_LAST_ACK &&
+ index == RTE_CT_TCP_ACK_FLAG &&
+ cd->ct_protocol.tcp_ct_data.last_dir != dir &&
+ cd->ct_protocol.tcp_ct_data.last_index ==
+ RTE_CT_TCP_SYN_FLAG
+ && (cd->ct_protocol.tcp_ct_data.
+ last_flags & RTE_CT_EXP_CHALLENGE_ACK)) {
+ /* Detected RFC5961 challenge ACK */
+ cd->ct_protocol.tcp_ct_data.last_flags &=
+ ~RTE_CT_EXP_CHALLENGE_ACK;
+ return RTE_CT_FORWARD_PACKET; /* Don't change state */
+ }
+ break;
+
+ case RTE_CT_TCP_CLOSE:
+
+ if (index == RTE_CT_TCP_RST_FLAG) {
+ /*
+ * Can only transition to CLOSE state with an RST,
+ * but can remain in
+ * CLOSE state with ACK, FIN, or RST. Do special checks.
+ */
+
+ if ((cd->ct_protocol.tcp_ct_data.seen[!dir].flags &
+ RTE_CT_TCP_FLAG_MAXACK_SET) &&
+ rte_before(sent_seq, cd->ct_protocol.
+ tcp_ct_data.seen[!dir].maxack)) {
+
+ ct->counters->pkts_drop_invalid_rst++;
+ /* Invalid RST */
+ return RTE_CT_DROP_PACKET;
+ }
+
+ if (((cd->connstatus == RTE_SEEN_REPLY_CONN &&
+ cd->ct_protocol.tcp_ct_data.last_index ==
+ RTE_CT_TCP_SYN_FLAG) ||
+ (cd->connstatus != RTE_ASSURED_CONN &&
+ cd->ct_protocol.tcp_ct_data.last_index ==
+ RTE_CT_TCP_ACK_FLAG)) &&
+ recv_ack ==
+ cd->ct_protocol.tcp_ct_data.last_end) {
+ /* RST sent to invalid SYN or ACK previously
+ * let through */
+ check_window = 0;
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ if (likely(check_window)) {
+ if (unlikely(!rte_ct_tcp_in_window(cd, ct,
+ &cd->ct_protocol.tcp_ct_data,
+ dir, index,
+ pkt, ip_hdr_size))) {
+ ct->counters->pkts_drop_outof_window++;
+ return RTE_CT_DROP_PACKET;
+ }
+ }
+
+ if (new_state == RTE_CT_TCP_ESTABLISHED &&
+ old_state != RTE_CT_TCP_ESTABLISHED)
+ /* only increment for first state transition to established */
+ /* synproxy established count handled elswhere */
+ ct->counters->sessions_established++;
+ /* From this point on, all packets are in-window */
+ cd->ct_protocol.tcp_ct_data.last_index = index;
+ cd->ct_protocol.tcp_ct_data.last_dir = dir;
+
+ if (index == RTE_CT_TCP_SAK_FLAG)
+ cd->connstatus = RTE_SEEN_REPLY_CONN;
+
+ timeout_state = new_state;
+
+ if (cd->ct_protocol.tcp_ct_data.retrans >=
+ ct->misc_options.tcp_max_retrans)
+ timeout_state =
+ rte_ct_choose_min_timeout_state(ct, timeout_state,
+ RTE_CT_TCP_RETRANS);
+ else if (rte_ct_either_direction_has_flags(cd,
+ RTE_CT_TCP_FLAG_DATA_UNACKNOWLEDGED))
+ timeout_state =
+ rte_ct_choose_min_timeout_state(ct, timeout_state,
+ RTE_CT_TCP_UNACK);
+
+ if (cd->connstatus != RTE_SEEN_REPLY_CONN) {
+ if (tcpheader->tcp_flags & RTE_CT_TCPHDR_RST) {
+ /*
+ * if only reply seen is RST, there is not an
+ * established connection, so just destroy
+ * connection now.
+ */
+
+ return RTE_CT_DESTROY_CNXN_AND_FORWARD_PACKET;
+ }
+ /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection
+ pickup with loose=1. Avoid large ESTABLISHED timeout. */
+ if (new_state == RTE_CT_TCP_ESTABLISHED)
+ timeout_state = rte_ct_choose_min_timeout_state(ct,
+ timeout_state,
+ RTE_CT_TCP_UNACK);
+
+ } else if (cd->connstatus != RTE_ASSURED_CONN &&
+ (old_state == RTE_CT_TCP_SYN_RECV
+ || old_state == RTE_CT_TCP_ESTABLISHED)
+ && new_state == RTE_CT_TCP_ESTABLISHED)
+ cd->connstatus = RTE_ASSURED_CONN;
+
+ cd->ct_protocol.tcp_ct_data.state = new_state;
+ rte_ct_set_cnxn_timer_for_tcp(ct, cd, timeout_state);
+
+ return return_action;
+}
diff --git a/common/VIL/conntrack/rte_ct_tcp.h b/common/VIL/conntrack/rte_ct_tcp.h
new file mode 100644
index 00000000..391200c6
--- /dev/null
+++ b/common/VIL/conntrack/rte_ct_tcp.h
@@ -0,0 +1,484 @@
+/*
+// Copyright (c) 2017 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#ifndef __INCLUDE_RTE_CT_TCP_H__
+#define __INCLUDE_RTE_CT_TCP_H__
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include <rte_tcp.h>
+#include <rte_port.h>
+#include <rte_timer.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+#include <rte_udp.h>
+#include <rte_port.h>
+#include <rte_byteorder.h>
+#include "rte_cnxn_tracking.h"
+
+/* AN INNER, PRIVATE INTERFACE FOR RTE_CNXN_TRACKING */
+
+/* constants for TCP options */
+
+#define RTE_CT_TCPOPT_EOL 0 /* End of options */
+#define RTE_CT_TCPOPT_NOP 1 /* Padding */
+#define RTE_CT_TCPOPT_MSS 2 /* Segment size negotiating */
+#define RTE_CT_TCPOPT_WINDOW 3 /* Window scaling */
+#define RTE_CT_TCPOPT_SACK_PERM 4 /* SACK Permitted */
+#define RTE_CT_TCPOPT_SACK 5 /* SACK Block */
+#define RTE_CT_TCPOPT_TIMESTAMP 8 /* RTT estimations */
+
+#define RTE_CT_TCPOLEN_MSS 4
+#define RTE_CT_TCPOLEN_WINDOW 3
+#define RTE_CT_TCPOLEN_SACK_PERM 2
+#define RTE_CT_TCPOLEN_TIMESTAMP 10
+#define RTE_CT_TCPOLEN_PER_SACK_ENTRY 8
+
+#define RTE_CT_TCPOLEN_MSS_ALIGNED 4
+#define RTE_CT_TCPOLEN_WINDOW_ALIGNED 4
+#define RTE_CT_TCPOLEN_SACK_PERM_ALIGNED 4
+#define RTE_CT_TCPOLEN_TIMESTAMP_ALIGNED 12
+
+#define RTE_CT_MAX_TCP_WINDOW_SCALE 14
+
+#define RTE_SP_OPTIONS_MSS 1
+#define RTE_SP_OPTIONS_WINDOW_SCALE 2
+#define RTE_SP_OPTIONS_TIMESTAMP 4
+#define RTE_SP_OPTIONS_SACK_PERM 8
+
+
+enum rte_ct_packet_action {
+ RTE_CT_OPEN_CONNECTION,
+ RTE_CT_DROP_PACKET,
+ RTE_CT_FORWARD_PACKET,
+ RTE_CT_DESTROY_CNXN_AND_FORWARD_PACKET,
+ RTE_CT_REOPEN_CNXN_AND_FORWARD_PACKET,
+ RTE_CT_SEND_CLIENT_SYNACK,
+ RTE_CT_SEND_SERVER_SYN,
+ RTE_CT_SEND_SERVER_ACK,
+ RTE_CT_HIJACK
+};
+
+enum rte_ct_connstatus {
+ RTE_INIT_CONN,
+ RTE_SEEN_REPLY_CONN,
+ RTE_ASSURED_CONN
+};
+
+/* TCP tracking. */
+
+static const char *const rte_ct_tcp_names[] = {
+ "NONE",
+ "SYN_SENT",
+ "SYN_RECV",
+ "ESTABLISHED",
+ "FIN_WAIT",
+ "CLOSE_WAIT",
+ "LAST_ACK",
+ "TIME_WAIT",
+ "CLOSE",
+ "SYN_SENT2",
+ "RETRANS",
+ "UNACK",
+ "IGNORE"
+};
+
+static const char *const rte_ct_udp_names[] = {
+ "NONE_UDP",
+ "UNREPLIED",
+ "REPLIED"
+};
+
+/* Fixme: what about big packets? */
+#define RTE_MAX_ACKWIN_CONST 66000
+
+/* Window scaling is advertised by the sender */
+#define RTE_CT_TCP_FLAG_WINDOW_SCALE 0x01
+
+/* SACK is permitted by the sender */
+#define RTE_CT_TCP_FLAG_SACK_PERM 0x02
+
+/* This sender sent FIN first */
+#define RTE_CT_TCP_FLAG_CLOSE_INIT 0x04
+
+/* Be liberal in window checking */
+#define RTE_CT_TCP_FLAG_BE_LIBERAL 0x08
+
+/* Has unacknowledged data */
+#define RTE_CT_TCP_FLAG_DATA_UNACKNOWLEDGED 0x10
+
+/* The field td_maxack has been set */
+#define RTE_CT_TCP_FLAG_MAXACK_SET 0x20
+/* Marks possibility for expected RFC5961 challenge ACK */
+#define RTE_CT_EXP_CHALLENGE_ACK 0x40
+
+
+
+/* TCP header flags of interest */
+#define RTE_CT_TCPHDR_FIN 0x01
+#define RTE_CT_TCPHDR_SYN 0x02
+#define RTE_CT_TCPHDR_RST 0x04
+#define RTE_CT_TCPHDR_ACK 0x10
+
+#define RTE_CT_TCPHDR_RST_ACK (RTE_CT_TCPHDR_RST | RTE_CT_TCPHDR_ACK)
+
+
+
+/* state machine values. Note that order is important as relative checks made */
+enum rte_ct_tcp_states {
+ RTE_CT_TCP_NONE,
+ RTE_CT_TCP_SYN_SENT,
+ RTE_CT_TCP_SYN_RECV,
+ RTE_CT_TCP_ESTABLISHED,
+ RTE_CT_TCP_FIN_WAIT,
+ RTE_CT_TCP_CLOSE_WAIT,
+ RTE_CT_TCP_LAST_ACK,
+ RTE_CT_TCP_TIME_WAIT,
+ RTE_CT_TCP_CLOSE,
+ RTE_CT_TCP_SYN_SENT_2,
+ RTE_CT_TCP_RETRANS,
+ RTE_CT_TCP_UNACK,
+ RTE_CT_TCP_IGNORE
+};
+
+enum rte_ct_udp_states {
+ RTE_CT_UDP_NONE,
+ RTE_CT_UDP_UNREPLIED,
+ RTE_CT_UDP_REPLIED,
+ RTE_CT_UDP_MAX
+};
+
+
+
+#define RTE_CT_TCP_MAX RTE_CT_TCP_UNACK
+
+enum rte_ct_pkt_direction {
+ RTE_CT_DIR_ORIGINAL,
+ RTE_CT_DIR_REPLY
+};
+
+struct rte_ct_tcp_state {
+ uint32_t end; /* max of seq + len */
+ uint32_t maxend; /* max of ack + max(win, 1) */
+ uint32_t maxwin; /* max(win) */
+ uint32_t maxack; /* max of ack */
+ uint8_t scale; /* window scale factor */
+ uint8_t flags; /* per direction options */
+};
+
+struct rte_synproxy_options {
+ uint8_t options;
+ uint8_t window_scale;
+ uint16_t mss;
+ uint32_t ts_val;
+ uint32_t ts_echo_reply;
+ uint16_t initial_window;
+};
+
+struct ct_sp_cnxn_data {
+ /* buffer client pkt while waiting on server setup,
+ * store in reverse order
+ */
+ struct rte_mbuf *buffered_pkt_list;
+ uint32_t original_spoofed_seq;
+ /* difference between spoofed and real seq from server */
+ uint32_t seq_diff;
+ struct rte_synproxy_options cnxn_options;
+ /* non-zero if this connection created using synproxy */
+ uint8_t synproxied;
+ bool half_established;
+ /* non-zero after both half-connections established */
+ bool cnxn_established;
+};
+
+struct rte_ct_tcp {
+ struct rte_ct_tcp_state seen[2]; /* connection parms per direction */
+ uint8_t state;
+ uint8_t last_dir; /* Direction of the last packet
+ * (TODO: enum ip_conntrack_dir)
+ */
+ uint8_t retrans; /* Number of retransmitted packets */
+ uint8_t last_index; /* Index of the last packet */
+ uint32_t last_seq; /* Last seq number seen in dir */
+ uint32_t last_ack; /* Last seq number seen opposite dir */
+ uint32_t last_end; /* Last seq + len */
+ uint16_t last_win; /* Last window seen in dir */
+ /* For SYN packets while we may be out-of-sync */
+ uint8_t last_wscale; /* Last window scaling factor seen */
+ uint8_t last_flags; /* Last flags set */
+};
+
+/*
+ * rte_ct_cnxn_counters holds all the connection-specicif counters.
+ * TODO: Make available in public interface
+ */
+
+struct rte_ct_cnxn_counters {
+ uint64_t packets_received;//Added for CT-NAT
+ uint64_t packets_forwarded;
+ uint64_t packets_dropped;
+};
+
+struct rte_ct_proto {
+ struct rte_ct_tcp tcp_ct_data; /* TCP specific data fields*/
+ struct ct_sp_cnxn_data synproxy_data;
+};
+
+
+/*
+ * rte_ct_cnxn_data contains all the data for a TCP connection. This include
+ * state data as necessary for verifying the validity of TCP packets. In
+ * addition, it holds data necessary for implementing the TCP timers.
+ */
+
+struct rte_ct_cnxn_data {
+ /* The timer will be kept as part of the cnxn_data. When it fires, the
+ * pointer to the timer can be cast as the pointer to the cnxn_data
+ */
+ struct rte_timer timer; /* !!!!! IMPORTANT: Keep as first field !!!!! */
+
+ struct rte_ct_cnxn_counters counters;
+
+ /* full key stored here to allow the timer to remove the connection */
+ /* TODO: Consider storing key signature as well to speed up deletions.*/
+ uint32_t key[10];
+
+ struct rte_ct_proto ct_protocol;
+
+ /* the 100 ms timing step that a packet was seen for connection */
+ uint64_t expected_timeout;
+
+ /* Abstract states also used for timer values, e.g. RTE_CT_TCP_UNACK*/
+ uint8_t state_used_for_timer;
+
+ /* used to compute the "direction" of the packet */
+ uint8_t key_is_client_order;
+ uint8_t connstatus;
+ uint8_t protocol;
+ /* used to store the type of packet ipv4 or ipv6 */
+ uint8_t type;
+ //#ifdef FTP_ALG
+ // Bypass flag to indicate that ALG checking is no more needed;
+ uint8_t alg_bypass_flag;
+ // Can we use key_is_client_order for direction checking
+ uint8_t server_direction;
+ int16_t tcpSeqdiff;
+ // PORT = 0, PASV = 1
+ uint8_t ftp_session_type;
+ uint32_t tcp_payload_size;
+ int16_t seq_client;
+ int16_t ack_client;
+ int16_t seq_server;
+ int16_t ack_server;
+ //#endif
+} __rte_cache_aligned;
+
+
+#define RTE_CT_TCP_MAX_RETRANS 3
+
+struct rte_ct_tcptimeout {
+ /* a table of timeouts for each state of TCP */
+ uint64_t tcp_timeouts[RTE_CT_TCP_MAX + 1];
+};
+
+
+struct rte_ct_misc_options {
+ uint8_t synproxy_enabled;
+ uint32_t tcp_loose;
+ uint32_t tcp_be_liberal;
+ uint32_t tcp_max_retrans;
+};
+
+struct rte_ct_udptimeout {
+ uint64_t udp_timeouts[RTE_CT_UDP_MAX + 1];
+};
+
+struct rte_ct_timeout {
+ struct rte_ct_tcptimeout tcptimeout;
+ struct rte_ct_udptimeout udptimeout;
+};
+
+struct rte_ct_cnxn_tracker {
+ struct rte_hash *rhash;
+
+ /*
+ * Data for bulk hash lookup. Use this memory as temporary space.
+ * Too big for stack (64*16 bytes)
+ */
+ uint32_t hash_keys[RTE_HASH_LOOKUP_BULK_MAX][10];
+
+ /* table of pointers to above, for bulk hash lookup */
+ void *hash_key_ptrs[RTE_HASH_LOOKUP_BULK_MAX];
+ #ifdef CT_CGNAT
+ uint32_t positions[RTE_HASH_LOOKUP_BULK_MAX];/*added for ALG*/
+ #endif
+ /* hash table and timer storage */
+ uint32_t num_cnxn_entries;
+
+ /*
+ * pointer to data space used for hash table, "num_cnxn_entries" long.
+ * Memory allocated during initialization.
+ */
+ struct rte_ct_cnxn_data *hash_table_entries;
+ struct rte_CT_counter_block *counters;
+
+ uint64_t hertz;
+ uint64_t timing_cycles_per_timing_step;
+ uint64_t timing_100ms_steps;
+ uint64_t timing_100ms_steps_previous;
+ uint64_t timing_last_time;
+ struct rte_ct_timeout ct_timeout;
+ struct rte_ct_misc_options misc_options;
+
+ char name[16];
+ struct rte_ct_cnxn_data *new_connections[64];
+ struct rte_mbuf *buffered_pkt_list;
+ int latest_connection;
+ /* offset into mbuf where synnproxy can store a pointer */
+ uint16_t pointer_offset;
+} __rte_cache_aligned;
+
+/*
+ * Returns a value stating if this is a valid TCP open connection attempt.
+ * If valid, updates cnxn with any data fields it need to save.
+ */
+
+enum rte_ct_packet_action
+rte_ct_tcp_new_connection(
+ struct rte_ct_cnxn_tracker *inst,
+ struct rte_ct_cnxn_data *cnxn,
+ struct rte_mbuf *pkt,
+ int use_synproxy,
+ uint8_t ip_hdr_size);
+
+/*
+* Returns a value stating if this is a valid TCP packet for the give connection.
+* If valid, updates cnxn with any data fields it need to save.
+*/
+
+enum rte_ct_packet_action
+rte_ct_verify_tcp_packet(
+ struct rte_ct_cnxn_tracker *inst,
+ struct rte_ct_cnxn_data *cnxn,
+ struct rte_mbuf *pkt,
+ uint8_t key_was_flipped,
+ uint8_t ip_hdr_size);
+
+/*
+* Returns a value stating if this is a valid UDP open connection attempt.
+* If valid, updates cnxn with any data fields it need to save.
+*/
+
+uint8_t
+rte_ct_udp_new_connection(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_ct_cnxn_data *cd,
+ struct rte_mbuf *pkt);
+
+/*
+* Returns a value stating if this is a valid UDP packet for the give connection.
+* If valid, updates cnxn with any data fields it need to save.
+*/
+
+enum rte_ct_packet_action
+rte_ct_udp_packet(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_ct_cnxn_data *cd,
+ struct rte_mbuf *pkt,
+ uint8_t key_was_flipped);
+
+
+/*
+ * For the given connection, set a timeout based on the given state. If the
+ * timer is already set, this call will reset the timer with a new value.
+ */
+
+void
+rte_ct_set_cnxn_timer_for_tcp(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_ct_cnxn_data *cd,
+ uint8_t tcp_state);
+
+void
+rte_ct_set_cnxn_timer_for_udp(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_ct_cnxn_data *cd,
+ uint8_t tcp_state);
+
+/* Cancel timer associated with the connection. Safe to call if no timer set.*/
+void rte_ct_cancel_cnxn_timer(struct rte_ct_cnxn_data *cd);
+
+
+/*
+ * SYNPROXY related routines. Detailed comments are available in
+ * rte_ct_synproxy.c where they are implemented.
+ */
+
+
+/* these 3 routines convert a received packet to a different one */
+
+void
+rte_sp_cvt_to_spoofed_client_synack(struct rte_ct_cnxn_data *cd,
+ struct rte_mbuf *old_pkt);
+
+void
+rte_sp_cvt_to_spoofed_server_syn(struct rte_ct_cnxn_data *cd,
+ struct rte_mbuf *old_pkt);
+
+void
+rte_sp_cvt_to_spoofed_server_ack(struct rte_ct_cnxn_data *cd,
+ struct rte_mbuf *old_pkt);
+
+/* These two routines adjust seq or ack numbers,
+ * as part of the proxy mechanism
+ */
+
+void
+rte_sp_adjust_client_ack_before_window_check(
+ struct rte_ct_cnxn_data *cd,
+ void *i_hdr,
+ struct tcp_hdr *thdr,
+ enum rte_ct_pkt_direction dir);
+
+void
+rte_sp_adjust_server_seq_after_window_check(
+ struct rte_ct_cnxn_data *cd,
+ void *i_hdr,
+ struct tcp_hdr *thdr,
+ enum rte_ct_pkt_direction dir);
+
+
+
+/* parse tcp options and save in t_opts */
+void
+rte_sp_parse_options(struct rte_mbuf *pkt, struct rte_ct_cnxn_data *cd);
+
+
+/* these two routines deal with packet buffering */
+
+void
+rte_ct_buffer_packet(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_ct_cnxn_data *cd,
+ struct rte_mbuf *pkt);
+
+void
+ rte_ct_release_buffered_packets(
+ struct rte_ct_cnxn_tracker *ct,
+ struct rte_ct_cnxn_data *cd);
+
+#endif /* TCPCONNTRACK_H */
diff --git a/common/VIL/conntrack/rte_ct_udp.c b/common/VIL/conntrack/rte_ct_udp.c
new file mode 100644
index 00000000..88f3a9a4
--- /dev/null
+++ b/common/VIL/conntrack/rte_ct_udp.c
@@ -0,0 +1,49 @@
+/*
+// Copyright (c) 2017 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include <stdlib.h>
+#include <string.h>
+#include "rte_ct_tcp.h"
+#include "rte_cnxn_tracking.h"
+
+uint8_t rte_ct_udp_new_connection(__rte_unused struct rte_ct_cnxn_tracker *ct,
+ struct rte_ct_cnxn_data *cd,
+ __rte_unused struct rte_mbuf *pkt)
+{
+ /* printf("New connection UDP packet received\n"); */
+ cd->connstatus = RTE_INIT_CONN;
+ return 1;
+}
+enum rte_ct_packet_action rte_ct_udp_packet(struct rte_ct_cnxn_tracker *ct,
+ struct rte_ct_cnxn_data *cd,
+ __rte_unused struct rte_mbuf *pkt,
+ uint8_t key_was_flipped)
+{
+ enum rte_ct_pkt_direction dir;
+
+ dir = (cd->key_is_client_order == !key_was_flipped);
+ /* printf("packet received verify"); */
+ if (dir == RTE_CT_DIR_REPLY &&
+ cd->connstatus == RTE_INIT_CONN) {
+ rte_ct_set_cnxn_timer_for_udp(ct, cd, RTE_CT_UDP_REPLIED);
+ cd->connstatus = RTE_ASSURED_CONN;
+ } else if (dir == RTE_CT_DIR_REPLY &&
+ cd->connstatus == RTE_ASSURED_CONN)
+ rte_ct_set_cnxn_timer_for_udp(ct, cd, RTE_CT_UDP_REPLIED);
+ else
+ rte_ct_set_cnxn_timer_for_udp(ct, cd, RTE_CT_UDP_UNREPLIED);
+ return RTE_CT_FORWARD_PACKET;
+}