diff options
author | Deepak S <deepak.s@linux.intel.com> | 2017-07-13 21:26:50 -0700 |
---|---|---|
committer | Deepak S <deepak.s@linux.intel.com> | 2017-07-14 04:58:47 -0700 |
commit | 7286b2518ec8e4398b512ce95def9166a7af2e4a (patch) | |
tree | c93ef65d9e73e8893ccecb720152e16aae96a8b6 /VNFs/DPPD-PROX/handle_ipv6_tunnel.c | |
parent | adcb79da90176b27224eeb1d00aa0e611ef85a9b (diff) |
Adding PROX(Packet pROcessing eXecution engine) VNF to sampleVNF
JIRA: SAMPLEVNF-55
PROX is a DPDK-based application implementing Telco use-cases such as
a simplified BRAS/BNG, light-weight AFTR... It also allows configuring
finer grained network functions like QoS, Routing, load-balancing...
(We are moving PROX version v039 to sampleVNF
https://01.org/intel-data-plane-performance-demonstrators/prox-overview)
Change-Id: Ia3cb02cf0e49ac5596e922c197ff7e010293d033
Signed-off-by: Deepak S <deepak.s@linux.intel.com>
Diffstat (limited to 'VNFs/DPPD-PROX/handle_ipv6_tunnel.c')
-rw-r--r-- | VNFs/DPPD-PROX/handle_ipv6_tunnel.c | 466 |
1 files changed, 466 insertions, 0 deletions
diff --git a/VNFs/DPPD-PROX/handle_ipv6_tunnel.c b/VNFs/DPPD-PROX/handle_ipv6_tunnel.c new file mode 100644 index 00000000..a92f9cdc --- /dev/null +++ b/VNFs/DPPD-PROX/handle_ipv6_tunnel.c @@ -0,0 +1,466 @@ +/* +// Copyright (c) 2010-2017 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include <rte_ip.h> +#include <rte_udp.h> +#include <rte_tcp.h> +#include <rte_table_hash.h> +#include <rte_ether.h> +#include <rte_version.h> +#include <rte_byteorder.h> + +#include "prox_lua.h" +#include "prox_lua_types.h" + +#include "tx_pkt.h" +#include "task_init.h" +#include "task_base.h" +#include "prox_port_cfg.h" +#include "prefetch.h" +#include "lconf.h" +#include "hash_utils.h" +#include "etypes.h" +#include "prox_cksum.h" +#include "defines.h" +#include "log.h" +#include "quit.h" +#include "prox_cfg.h" +#include "parse_utils.h" +#include "cfgfile.h" +#include "prox_shared.h" + +#if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0) +#define IPPROTO_IPIP IPPROTO_IPV4 +#endif + +struct ipv6_tun_dest { + struct ipv6_addr dst_addr; + struct ether_addr dst_mac; +}; + +typedef enum ipv6_tun_dir_t { + TUNNEL_DIR_ENCAP = 0, + TUNNEL_DIR_DECAP = 1, +} ipv6_tun_dir_t; + +struct task_ipv6_tun_base { + struct task_base base; + struct ether_addr src_mac; + uint8_t core_nb; + uint64_t keys[64]; + struct rte_mbuf* fake_packets[64]; + uint16_t lookup_port_mask; // Mask used before looking up the port + void* lookup_table; // Fast lookup table for bindings + uint32_t runtime_flags; + int offload_crc; +}; + +struct task_ipv6_decap { + struct task_ipv6_tun_base base; + struct ether_addr dst_mac; +}; + +struct task_ipv6_encap { + struct task_ipv6_tun_base base; + uint32_t ipaddr; + struct ipv6_addr local_endpoint_addr; + uint8_t tunnel_hop_limit; +}; + +#define IPv6_VERSION 6 +#ifndef IPPROTO_IPV4 +#define IPPROTO_IPV4 4 +#endif + +#define MAKE_KEY_FROM_FIELDS(ipv4_addr, port, port_mask) ( ((uint64_t)ipv4_addr << 16) | (port & port_mask) ) + +static int handle_ipv6_decap_bulk(struct task_base* tbase, struct rte_mbuf** rx_mbuf, const uint16_t n_pkts); +static int handle_ipv6_encap_bulk(struct task_base* tbase, struct rte_mbuf** rx_mbuf, const uint16_t n_pkts); + +static void init_lookup_table(struct task_ipv6_tun_base* ptask, struct task_args *targ) +{ + const int socket_id = rte_lcore_to_socket_id(targ->lconf->id); + + /* The lookup table is a per-core data structure to reduce the + memory footprint and improve cache utilization. Since + operations on the hash table are not safe, the data + structure can't be used on a per socket or on a system wide + basis. */ + ptask->lookup_table = prox_sh_find_core(targ->lconf->id, "ipv6_binding_table"); + if (NULL == ptask->lookup_table) { + struct ipv6_tun_binding_table *table; + PROX_PANIC(!strcmp(targ->tun_bindings, ""), "No tun bindings specified\n"); + int ret = lua_to_ip6_tun_binding(prox_lua(), GLOBAL, targ->tun_bindings, socket_id, &table); + PROX_PANIC(ret, "Failed to read tun_bindings config:\n %s\n", get_lua_to_errors()); + + struct rte_table_hash_key8_ext_params table_hash_params = { + .n_entries = (table->num_binding_entries * 4), + .n_entries_ext = (table->num_binding_entries * 2) >> 1, + .f_hash = hash_crc32, + .seed = 0, + .signature_offset = HASH_METADATA_OFFSET(8), // Ignored for dosig tables + .key_offset = HASH_METADATA_OFFSET(0), + }; + plogx_info("IPv6 Tunnel allocating lookup table on socket %d\n", socket_id); + ptask->lookup_table = rte_table_hash_key8_ext_dosig_ops. + f_create(&table_hash_params, socket_id, sizeof(struct ipv6_tun_dest)); + PROX_PANIC(ptask->lookup_table == NULL, "Error creating IPv6 Tunnel lookup table"); + + for (unsigned idx = 0; idx < table->num_binding_entries; idx++) { + int key_found = 0; + void* entry_in_hash = NULL; + struct ipv6_tun_dest data; + struct ipv6_tun_binding_entry* entry = &table->entry[idx]; + uint64_t key = MAKE_KEY_FROM_FIELDS(rte_cpu_to_be_32(entry->public_ipv4), entry->public_port, ptask->lookup_port_mask); + rte_memcpy(&data.dst_addr, &entry->endpoint_addr, sizeof(struct ipv6_addr)); + rte_memcpy(&data.dst_mac, &entry->next_hop_mac, sizeof(struct ether_addr)); + + int ret = rte_table_hash_key8_ext_dosig_ops.f_add(ptask->lookup_table, &key, &data, &key_found, &entry_in_hash); + PROX_PANIC(ret, "Error adding entry (%d) to binding lookup table", idx); + PROX_PANIC(key_found, "key_found!!! for idx=%d\n", idx); + +#ifdef DBG_IPV6_TUN_BINDING + plog_info("Bind: %x:0x%x (port_mask 0x%x) key=0x%"PRIx64"\n", entry->public_ipv4, entry->public_port, ptask->lookup_port_mask, key); + plog_info(" -> "IPv6_BYTES_FMT" ("MAC_BYTES_FMT")\n", IPv6_BYTES(entry->endpoint_addr.bytes), MAC_BYTES(entry->next_hop_mac.addr_bytes)); + plog_info(" -> "IPv6_BYTES_FMT" ("MAC_BYTES_FMT")\n", IPv6_BYTES(data.dst_addr.bytes), MAC_BYTES(data.dst_mac.addr_bytes)); + plog_info(" -> entry_in_hash=%p\n", entry_in_hash); +#endif + } + plogx_info("IPv6 Tunnel created %d lookup table entries\n", table->num_binding_entries); + + prox_sh_add_core(targ->lconf->id, "ipv6_binding_table", ptask->lookup_table); + } +} + +static void init_task_ipv6_tun_base(struct task_ipv6_tun_base* tun_base, struct task_args* targ) +{ + memcpy(&tun_base->src_mac, find_reachable_port(targ), sizeof(tun_base->src_mac)); + + tun_base->lookup_port_mask = targ->lookup_port_mask; // Mask used before looking up the port + + init_lookup_table(tun_base, targ); + + for (uint32_t i = 0; i < 64; ++i) { + tun_base->fake_packets[i] = (struct rte_mbuf*)((uint8_t*)&tun_base->keys[i] - sizeof (struct rte_mbuf)); + } + + plogx_info("IPv6 Tunnel MAC="MAC_BYTES_FMT" port_mask=0x%x\n", + MAC_BYTES(tun_base->src_mac.addr_bytes), tun_base->lookup_port_mask); + + struct prox_port_cfg *port = find_reachable_port(targ); + if (port) { + tun_base->offload_crc = port->capabilities.tx_offload_cksum; + } +} + +static void init_task_ipv6_decap(struct task_base* tbase, struct task_args* targ) +{ + struct task_ipv6_decap* tun_task = (struct task_ipv6_decap*)tbase; + struct task_ipv6_tun_base* tun_base = (struct task_ipv6_tun_base*)tun_task; + + init_task_ipv6_tun_base(tun_base, targ); + tun_base->runtime_flags = targ->runtime_flags; + + memcpy(&tun_task->dst_mac, &targ->edaddr, sizeof(tun_task->dst_mac)); +} + +static void init_task_ipv6_encap(struct task_base* tbase, struct task_args* targ) +{ + struct task_ipv6_encap* tun_task = (struct task_ipv6_encap*)tbase; + struct task_ipv6_tun_base *tun_base = (struct task_ipv6_tun_base*)tun_task; + + init_task_ipv6_tun_base(tun_base, targ); + + rte_memcpy(&tun_task->local_endpoint_addr, &targ->local_ipv6, sizeof(tun_task->local_endpoint_addr)); + tun_task->tunnel_hop_limit = targ->tunnel_hop_limit; + tun_base->runtime_flags = targ->runtime_flags; +} + +static struct task_init task_init_ipv6_decap = { + .mode_str = "ipv6_decap", + .init = init_task_ipv6_decap, + .handle = handle_ipv6_decap_bulk, + .size = sizeof(struct task_ipv6_decap) +}; + +static struct task_init task_init_ipv6_encap = { + .mode_str = "ipv6_encap", + .init = init_task_ipv6_encap, + .handle = handle_ipv6_encap_bulk, + .size = sizeof(struct task_ipv6_encap) +}; + +__attribute__((constructor)) static void reg_task_ipv6_decap(void) +{ + reg_task(&task_init_ipv6_decap); +} + +__attribute__((constructor)) static void reg_task_ipv6_encap(void) +{ + reg_task(&task_init_ipv6_encap); +} + +static inline uint8_t handle_ipv6_decap(struct task_ipv6_decap* ptask, struct rte_mbuf* rx_mbuf, struct ipv6_tun_dest* tun_dest); +static inline uint8_t handle_ipv6_encap(struct task_ipv6_encap* ptask, struct rte_mbuf* rx_mbuf, struct ipv6_tun_dest* tun_dest); + +static inline int extract_key_fields( __attribute__((unused)) struct task_ipv6_tun_base* ptask, struct ipv4_hdr* pip4, ipv6_tun_dir_t dir, uint32_t* pAddr, uint16_t* pPort) +{ + *pAddr = (dir == TUNNEL_DIR_DECAP) ? pip4->src_addr : pip4->dst_addr; + + if (pip4->next_proto_id == IPPROTO_UDP) { + struct udp_hdr* pudp = (struct udp_hdr *)(pip4 + 1); + *pPort = rte_be_to_cpu_16((dir == TUNNEL_DIR_DECAP) ? pudp->src_port : pudp->dst_port); + } + else if (pip4->next_proto_id == IPPROTO_TCP) { + struct tcp_hdr* ptcp = (struct tcp_hdr *)(pip4 + 1); + *pPort = rte_be_to_cpu_16((dir == TUNNEL_DIR_DECAP) ? ptcp->src_port : ptcp->dst_port); + } + else { + plog_warn("IPv6 Tunnel: IPv4 packet of unexpected type proto_id=0x%x\n", pip4->next_proto_id); + *pPort = 0xffff; + return -1; + } + + return 0; +} + +static inline void extract_key(struct task_ipv6_tun_base* ptask, struct ipv4_hdr* pip4, ipv6_tun_dir_t dir, uint64_t* pkey) +{ + uint32_t lookup_addr; + uint16_t lookup_port; + + if (unlikely( extract_key_fields(ptask, pip4, dir, &lookup_addr, &lookup_port))) { + plog_warn("IPv6 Tunnel: Unable to extract fields from packet\n"); + *pkey = 0xffffffffL; + return; + } + + *pkey = MAKE_KEY_FROM_FIELDS(lookup_addr, lookup_port, ptask->lookup_port_mask); +} + +static inline struct ipv4_hdr* get_ipv4_decap(struct rte_mbuf *mbuf) +{ + struct ether_hdr* peth = rte_pktmbuf_mtod(mbuf, struct ether_hdr *); + struct ipv6_hdr* pip6 = (struct ipv6_hdr *)(peth + 1); + struct ipv4_hdr* pip4 = (struct ipv4_hdr*) (pip6 + 1); // TODO - Skip Option headers + + return pip4; +} + +static inline struct ipv4_hdr* get_ipv4_encap(struct rte_mbuf *mbuf) +{ + struct ether_hdr* peth = rte_pktmbuf_mtod(mbuf, struct ether_hdr *); + struct ipv4_hdr* pip4 = (struct ipv4_hdr *)(peth + 1); + + return pip4; +} + +static inline void extract_key_decap(struct task_ipv6_tun_base* ptask, struct rte_mbuf *mbuf, uint64_t* pkey) +{ + extract_key(ptask, get_ipv4_decap(mbuf), TUNNEL_DIR_DECAP, pkey); +} + +static inline void extract_key_decap_bulk(struct task_ipv6_tun_base* ptask, struct rte_mbuf **mbufs, uint16_t n_pkts) +{ + for (uint16_t j = 0; j < n_pkts; ++j) { + extract_key_decap(ptask, mbufs[j], &ptask->keys[j]); + } +} + +static inline void extract_key_encap(struct task_ipv6_tun_base* ptask, struct rte_mbuf *mbuf, uint64_t* pkey) +{ + extract_key(ptask, get_ipv4_encap(mbuf), TUNNEL_DIR_ENCAP, pkey); +} + +static inline void extract_key_encap_bulk(struct task_ipv6_tun_base* ptask, struct rte_mbuf **mbufs, uint16_t n_pkts) +{ + for (uint16_t j = 0; j < n_pkts; ++j) { + extract_key_encap(ptask, mbufs[j], &ptask->keys[j]); + } +} + +__attribute__((cold)) static void handle_error(struct task_ipv6_tun_base* ptask, struct rte_mbuf* mbuf, ipv6_tun_dir_t dir) +{ + uint32_t lookup_addr; + uint16_t lookup_port; + uint64_t key; + + struct ipv4_hdr* pip4 = (dir == TUNNEL_DIR_DECAP) ? get_ipv4_decap(mbuf) : get_ipv4_encap(mbuf); + extract_key_fields(ptask, pip4, dir, &lookup_addr, &lookup_port); + extract_key(ptask, pip4, dir, &key); + + plog_warn("IPv6 Tunnel (%s) lookup failed for "IPv4_BYTES_FMT":%d [key=0x%"PRIx64"]\n", + (dir == TUNNEL_DIR_DECAP) ? "decap" : "encap", + IPv4_BYTES(((unsigned char*)&lookup_addr)), lookup_port, key); +} + +static int handle_ipv6_decap_bulk(struct task_base* tbase, struct rte_mbuf** mbufs, const uint16_t n_pkts) +{ + struct task_ipv6_decap* task = (struct task_ipv6_decap *)tbase; + uint64_t pkts_mask = RTE_LEN2MASK(n_pkts, uint64_t); + struct ipv6_tun_dest* entries[64]; + uint8_t out[MAX_PKT_BURST]; + uint64_t lookup_hit_mask; + uint16_t n_kept = 0; + + prefetch_pkts(mbufs, n_pkts); + + // Lookup to verify packets are valid for their respective tunnels (their sending lwB4) + extract_key_decap_bulk(&task->base, mbufs, n_pkts); + rte_table_hash_key8_ext_dosig_ops.f_lookup(task->base.lookup_table, task->base.fake_packets, pkts_mask, &lookup_hit_mask, (void**)entries); + + if (likely(lookup_hit_mask == pkts_mask)) { + for (uint16_t j = 0; j < n_pkts; ++j) { + out[j] = handle_ipv6_decap(task, mbufs[j], entries[j]); + } + } + else { + for (uint16_t j = 0; j < n_pkts; ++j) { + if (unlikely(!((lookup_hit_mask >> j) & 0x1))) { + handle_error(&task->base, mbufs[j], TUNNEL_DIR_DECAP); + out[j] = OUT_DISCARD; + continue; + } + out[j] = handle_ipv6_decap(task, mbufs[j], entries[j]); + } + } + + return task->base.base.tx_pkt(tbase, mbufs, n_pkts, out); +} + +static int handle_ipv6_encap_bulk(struct task_base* tbase, struct rte_mbuf** mbufs, const uint16_t n_pkts) +{ + struct task_ipv6_encap* task = (struct task_ipv6_encap *)tbase; + uint64_t pkts_mask = RTE_LEN2MASK(n_pkts, uint64_t); + struct ipv6_tun_dest* entries[64]; + uint64_t lookup_hit_mask; + uint8_t out[MAX_PKT_BURST]; + uint16_t n_kept = 0; + + prefetch_first(mbufs, n_pkts); + + extract_key_encap_bulk(&task->base, mbufs, n_pkts); + rte_table_hash_key8_ext_dosig_ops.f_lookup(task->base.lookup_table, task->base.fake_packets, pkts_mask, &lookup_hit_mask, (void**)entries); + + if (likely(lookup_hit_mask == pkts_mask)) { + for (uint16_t j = 0; j < n_pkts; ++j) { + out[j] = handle_ipv6_encap(task, mbufs[j], entries[j]); + } + } + else { + for (uint16_t j = 0; j < n_pkts; ++j) { + if (unlikely(!((lookup_hit_mask >> j) & 0x1))) { + handle_error(&task->base, mbufs[j], TUNNEL_DIR_ENCAP); + out[j] = OUT_DISCARD; + continue; + } + out[j] = handle_ipv6_encap(task, mbufs[j], entries[j]); + } + } + + return task->base.base.tx_pkt(tbase, mbufs, n_pkts, out); +} + +static inline uint8_t handle_ipv6_decap(struct task_ipv6_decap* ptask, struct rte_mbuf* rx_mbuf, __attribute__((unused)) struct ipv6_tun_dest* tun_dest) +{ + struct ether_hdr* peth = rte_pktmbuf_mtod(rx_mbuf, struct ether_hdr *); + + if (unlikely(peth->ether_type != ETYPE_IPv6)) { + plog_warn("Received non IPv6 packet on ipv6 tunnel port\n"); + // Drop packet + return OUT_DISCARD; + } + + struct ipv6_hdr* pip6 = (struct ipv6_hdr *)(peth + 1); + int ipv6_hdr_len = sizeof(struct ipv6_hdr); + + // TODO - Skip over any IPv6 Extension Header: + // If pip6->next_header is in (0, 43, 44, 50, 51, 60, 135), skip ahead pip->hdr_ext_len + // bytes and repeat. Increase ipv6_hdr_len with as much, each time. + + if (unlikely(pip6->proto != IPPROTO_IPIP)) { + plog_warn("Received non IPv4 content within IPv6 tunnel packet\n"); + // Drop packet + return OUT_DISCARD; + } + + // Discard IPv6 encapsulation + rte_pktmbuf_adj(rx_mbuf, ipv6_hdr_len); + peth = rte_pktmbuf_mtod(rx_mbuf, struct ether_hdr *); + + // Restore Ethernet header + ether_addr_copy(&ptask->base.src_mac, &peth->s_addr); + ether_addr_copy(&ptask->dst_mac, &peth->d_addr); + peth->ether_type = ETYPE_IPv4; + + return 0; +} + +static inline uint8_t handle_ipv6_encap(struct task_ipv6_encap* ptask, struct rte_mbuf* rx_mbuf, __attribute__((unused)) struct ipv6_tun_dest* tun_dest) +{ + //plog_info("Found tunnel endpoint:"IPv6_BYTES_FMT" ("MAC_BYTES_FMT")\n", IPv6_BYTES(tun_dest->dst_addr), MAC_BYTES(tun_dest->dst_mac.addr_bytes)); + + struct ether_hdr* peth = (struct ether_hdr *)(rte_pktmbuf_mtod(rx_mbuf, struct ether_hdr *)); + struct ipv4_hdr* pip4 = (struct ipv4_hdr *)(peth + 1); + uint16_t ipv4_length = rte_be_to_cpu_16(pip4->total_length); + struct task_ipv6_tun_base* tun_base = (struct task_ipv6_tun_base*)ptask; + + if (unlikely((pip4->version_ihl >> 4) != 4)) { + plog_warn("Received non IPv4 packet at ipv6 tunnel input\n"); + // Drop packet + return OUT_DISCARD; + } + + if (pip4->time_to_live) { + pip4->time_to_live--; + } + else { + plog_info("TTL = 0 => Dropping\n"); + return OUT_DISCARD; + } + pip4->hdr_checksum = 0; + + // Remove padding if any (we don't want to encapsulate garbage at end of IPv4 packet) + int padding = rte_pktmbuf_pkt_len(rx_mbuf) - (ipv4_length + sizeof(struct ether_hdr)); + if (unlikely(padding > 0)) { + rte_pktmbuf_trim(rx_mbuf, padding); + } + + // Encapsulate + const int extra_space = sizeof(struct ipv6_hdr); + peth = (struct ether_hdr *)rte_pktmbuf_prepend(rx_mbuf, extra_space); + + // Ethernet Header + ether_addr_copy(&ptask->base.src_mac, &peth->s_addr); + ether_addr_copy(&tun_dest->dst_mac, &peth->d_addr); + peth->ether_type = ETYPE_IPv6; + + // Set up IPv6 Header + struct ipv6_hdr* pip6 = (struct ipv6_hdr *)(peth + 1); + pip6->vtc_flow = rte_cpu_to_be_32(IPv6_VERSION << 28); + pip6->proto = IPPROTO_IPIP; + pip6->payload_len = rte_cpu_to_be_16(ipv4_length); + pip6->hop_limits = ptask->tunnel_hop_limit; + rte_memcpy(pip6->dst_addr, &tun_dest->dst_addr, sizeof(pip6->dst_addr)); + rte_memcpy(pip6->src_addr, &ptask->local_endpoint_addr, sizeof(pip6->src_addr)); + + if (tun_base->runtime_flags & TASK_TX_CRC) { + // We modified the TTL in the IPv4 header, hence have to recompute the IPv4 checksum +#define TUNNEL_L2_LEN (sizeof(struct ether_hdr) + sizeof(struct ipv6_hdr)) + prox_ip_cksum(rx_mbuf, pip4, TUNNEL_L2_LEN, sizeof(struct ipv4_hdr), ptask->base.offload_crc); + } + return 0; +} |