summaryrefslogtreecommitdiffstats
path: root/VNFs/DPPD-PROX/packet_utils.c
diff options
context:
space:
mode:
Diffstat (limited to 'VNFs/DPPD-PROX/packet_utils.c')
-rw-r--r--VNFs/DPPD-PROX/packet_utils.c286
1 files changed, 224 insertions, 62 deletions
diff --git a/VNFs/DPPD-PROX/packet_utils.c b/VNFs/DPPD-PROX/packet_utils.c
index e06529c4..04746130 100644
--- a/VNFs/DPPD-PROX/packet_utils.c
+++ b/VNFs/DPPD-PROX/packet_utils.c
@@ -17,6 +17,8 @@
#include <rte_lcore.h>
#include <rte_hash.h>
#include <rte_hash_crc.h>
+#include <rte_lpm.h>
+
#include "task_base.h"
#include "lconf.h"
#include "prefetch.h"
@@ -25,6 +27,11 @@
#include "handle_master.h"
#include "prox_port_cfg.h"
#include "packet_utils.h"
+#include "prox_shared.h"
+#include "prox_lua.h"
+#include "hash_entry_types.h"
+#include "prox_compat.h"
+#include "tx_pkt.h"
static inline int find_ip(struct ether_hdr_arp *pkt, uint16_t len, uint32_t *ip_dst)
{
@@ -74,17 +81,92 @@ static inline int find_ip(struct ether_hdr_arp *pkt, uint16_t len, uint32_t *ip_
We should check arp_update_time in the master process. This would also require the generating task to clear its arp ring
to avoid sending many ARP while starting after a long stop.
We could also check for arp_timeout in the master so that dataplane has only to check whether MAC is available
- but this would require either thread safety, or the the exchange of information between master and generating core.
+ but this would require either thread safety, or the exchange of information between master and generating core.
*/
+static inline int add_key_and_send_arp(struct rte_hash *ip_hash, uint32_t *ip_dst, struct arp_table *entries, uint64_t tsc, uint64_t hz, uint32_t arp_update_time, prox_next_hop_index_type nh, uint64_t **time)
+{
+ int ret = rte_hash_add_key(ip_hash, (const void *)ip_dst);
+ if (unlikely(ret < 0)) {
+ // No reason to send ARP, as reply would be anyhow ignored
+ plogx_err("Unable to add ip "IPv4_BYTES_FMT" in mac_hash\n", IP4(*ip_dst));
+ return DROP_MBUF;
+ } else {
+ entries[ret].ip = *ip_dst;
+ entries[ret].nh = nh;
+ *time = &entries[ret].arp_update_time;
+ }
+ return SEND_ARP;
+}
+
+static inline int update_mac_and_send_mbuf(struct arp_table *entry, prox_rte_ether_addr *mac, uint64_t tsc, uint64_t hz, uint32_t arp_update_time, uint64_t **time)
+{
+ if (likely((tsc < entry->arp_update_time) && (tsc < entry->arp_timeout))) {
+ memcpy(mac, &entry->mac, sizeof(prox_rte_ether_addr));
+ return SEND_MBUF;
+ } else if (tsc > entry->arp_update_time) {
+ // long time since we have sent an arp, send arp
+ *time = &entry->arp_update_time;
+ if (tsc < entry->arp_timeout){
+ // MAC is valid in the table => send also the mbuf
+ memcpy(mac, &entry->mac, sizeof(prox_rte_ether_addr));
+ return SEND_MBUF_AND_ARP;
+ } else {
+ // MAC still unknown, or timed out => only send ARP
+ return SEND_ARP;
+ }
+ }
+ // MAC is unknown and we already sent an ARP recently, drop mbuf and wait for ARP reply
+ return DROP_MBUF;
+}
+
int write_dst_mac(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t *ip_dst, uint64_t **time)
{
const uint64_t hz = rte_get_tsc_hz();
struct ether_hdr_arp *packet = rte_pktmbuf_mtod(mbuf, struct ether_hdr_arp *);
prox_rte_ether_addr *mac = &packet->ether_hdr.d_addr;
+ prox_next_hop_index_type next_hop_index;
uint64_t tsc = rte_rdtsc();
struct l3_base *l3 = &(tbase->l3);
+
+ // First find the next hop
+ if (l3->ipv4_lpm) {
+ // A routing table was configured
+ // If a gw (gateway_ipv4) is also specified, it is used as default gw only i.e. lowest priority (shortest prefix)
+ // This is implemented automatically through lpm
+ uint16_t len = rte_pktmbuf_pkt_len(mbuf);
+ if (find_ip(packet, len, ip_dst) != 0) {
+ // Unable to find IP address => non IP packet => send it as it
+ return SEND_MBUF;
+ }
+ if (unlikely(rte_lpm_lookup(l3->ipv4_lpm, rte_bswap32(*ip_dst), &next_hop_index) != 0)) {
+ plog_err("No route to IP "IPv4_BYTES_FMT"\n", IP4(*ip_dst));
+ return DROP_MBUF;
+ }
+ struct arp_table *entry = &l3->next_hops[next_hop_index];
+
+ if (entry->ip) {
+ *ip_dst = entry->ip;
+ } else {
+ // no next ip: this is a local route
+ next_hop_index = MAX_HOP_INDEX;
+ }
+ // Find IP in lookup table. Send ARP if not found
+ int ret = rte_hash_lookup(l3->ip_hash, (const void *)ip_dst);
+ if (unlikely(ret < 0)) {
+ // IP not found, try to send an ARP
+ return add_key_and_send_arp(l3->ip_hash, ip_dst, l3->arp_table, tsc, hz, l3->arp_update_time, next_hop_index, time);
+ } else {
+ if (entry->ip)
+ return update_mac_and_send_mbuf(entry, mac, tsc, hz, l3->arp_update_time, time);
+ else
+ return update_mac_and_send_mbuf(&l3->arp_table[ret], mac, tsc, hz, l3->arp_update_time, time);
+ }
+ return 0;
+ }
+ // No Routing table specified: only a local ip and maybe a gateway
+ // Old default behavior: if a gw is specified, ALL packets go to this gateway (even those we could send w/o the gw
if (l3->gw.ip) {
if (likely((l3->flags & FLAG_DST_MAC_KNOWN) && (tsc < l3->gw.arp_update_time) && (tsc < l3->gw.arp_timeout))) {
memcpy(mac, &l3->gw.mac, sizeof(prox_rte_ether_addr));
@@ -117,25 +199,7 @@ int write_dst_mac(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t *ip_d
for (unsigned int idx = 0; idx < l3->n_pkts; idx++) {
if (*ip_dst == l3->optimized_arp_table[idx].ip) {
// IP address already in table
- if ((tsc < l3->optimized_arp_table[idx].arp_update_time) && (tsc < l3->optimized_arp_table[idx].arp_timeout)) {
- // MAC address was recently updated in table, use it
- memcpy(mac, &l3->optimized_arp_table[idx].mac, sizeof(prox_rte_ether_addr));
- return SEND_MBUF;
- } else if (tsc > l3->optimized_arp_table[idx].arp_update_time) {
- // ARP not sent since a long time, send ARP
- *time = &l3->optimized_arp_table[idx].arp_update_time;
- if (tsc < l3->optimized_arp_table[idx].arp_timeout) {
- // MAC still valid => also send mbuf
- memcpy(mac, &l3->optimized_arp_table[idx].mac, sizeof(prox_rte_ether_addr));
- return SEND_MBUF_AND_ARP;
- } else {
- // MAC unvalid => only send ARP
- return SEND_ARP;
- }
- } else {
- // ARP timeout elapsed, MAC not valid anymore but waiting for ARP reply
- return DROP_MBUF;
- }
+ return update_mac_and_send_mbuf(&l3->optimized_arp_table[idx], mac, tsc, hz, l3->arp_update_time, time);
}
}
// IP address not found in table
@@ -156,7 +220,7 @@ int write_dst_mac(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t *ip_d
// If it happens, we still send the ARP as easier:
// If the ARP corresponds to this error, the ARP reply will be ignored
// If ARP does not correspond to this error/ip, then ARP reply will be handled.
- plogx_err("Unable add ip %d.%d.%d.%d in mac_hash (already %d entries)\n", IP4(ip), idx);
+ plogx_err("Unable add ip "IPv4_BYTES_FMT" in mac_hash (already %d entries)\n", IP4(ip), idx);
} else {
memcpy(&l3->arp_table[ret], &l3->optimized_arp_table[idx], sizeof(struct arp_table));
}
@@ -167,35 +231,10 @@ int write_dst_mac(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t *ip_d
int ret = rte_hash_lookup(l3->ip_hash, (const void *)ip_dst);
if (unlikely(ret < 0)) {
// IP not found, try to send an ARP
- int ret = rte_hash_add_key(l3->ip_hash, (const void *)ip_dst);
- if (ret < 0) {
- // No reason to send ARP, as reply would be anyhow ignored
- plogx_err("Unable to add ip %d.%d.%d.%d in mac_hash\n", IP4(*ip_dst));
- return DROP_MBUF;
- } else {
- l3->arp_table[ret].ip = *ip_dst;
- *time = &l3->arp_table[ret].arp_update_time;
- }
- return SEND_ARP;
+ return add_key_and_send_arp(l3->ip_hash, ip_dst, &l3->arp_table[ret], tsc, hz, l3->arp_update_time, MAX_HOP_INDEX, time);
} else {
// IP has been found
- if (likely((tsc < l3->arp_table[ret].arp_update_time) && (tsc < l3->arp_table[ret].arp_timeout))) {
- // MAC still valid and ARP sent recently
- memcpy(mac, &l3->arp_table[ret].mac, sizeof(prox_rte_ether_addr));
- return SEND_MBUF;
- } else if (tsc > l3->arp_table[ret].arp_update_time) {
- // ARP not sent since a long time, send ARP
- *time = &l3->arp_table[ret].arp_update_time;
- if (tsc < l3->arp_table[ret].arp_timeout) {
- // MAC still valid => send also MBUF
- memcpy(mac, &l3->arp_table[ret].mac, sizeof(prox_rte_ether_addr));
- return SEND_MBUF_AND_ARP;
- } else {
- return SEND_ARP;
- }
- } else {
- return DROP_MBUF;
- }
+ return update_mac_and_send_mbuf(&l3->arp_table[ret], mac, tsc, hz, l3->arp_update_time, time);
}
}
// Should not happen
@@ -248,6 +287,7 @@ void task_start_l3(struct task_base *tbase, struct task_args *targ)
const int NB_ARP_MBUF = 1024;
const int ARP_MBUF_SIZE = 2048;
const int NB_CACHE_ARP_MBUF = 256;
+ const int socket_id = rte_lcore_to_socket_id(targ->lconf->id);
struct prox_port_cfg *port = find_reachable_port(targ);
if (port && (tbase->l3.arp_pool == NULL)) {
@@ -257,6 +297,45 @@ void task_start_l3(struct task_base *tbase, struct task_args *targ)
tbase->local_ipv4 = rte_be_to_cpu_32(targ->local_ipv4);
register_ip_to_ctrl_plane(tbase->l3.tmaster, tbase->local_ipv4, tbase->l3.reachable_port_id, targ->lconf->id, targ->id);
}
+ if (strcmp(targ->route_table, "") != 0) {
+ struct lpm4 *lpm;
+ int ret;
+
+ PROX_PANIC(tbase->local_ipv4 == 0, "missing local_ipv4 will route table is specified in L3 mode\n");
+
+ // LPM might be modified runtime => do not share with other cores
+ ret = lua_to_lpm4(prox_lua(), GLOBAL, targ->route_table, socket_id, &lpm);
+ PROX_PANIC(ret, "Failed to load IPv4 LPM:\n%s\n", get_lua_to_errors());
+
+ tbase->l3.ipv4_lpm = lpm->rte_lpm;
+ tbase->l3.next_hops = prox_zmalloc(sizeof(*tbase->l3.next_hops) * MAX_HOP_INDEX, socket_id);
+ PROX_PANIC(tbase->l3.next_hops == NULL, "Could not allocate memory for next hop\n");
+
+ for (uint32_t i = 0; i < MAX_HOP_INDEX; i++) {
+ if (!lpm->next_hops[i].ip_dst)
+ continue;
+ tbase->l3.nb_gws++;
+ tbase->l3.next_hops[i].ip = rte_bswap32(lpm->next_hops[i].ip_dst);
+ int tx_port = lpm->next_hops[i].mac_port.out_idx;
+ // gen only supports one port right now .... hence port = 0
+ if ((tx_port > targ->nb_txports - 1) && (tx_port > targ->nb_txrings - 1)) {
+ PROX_PANIC(1, "Routing Table contains port %d but only %d tx port/ %d ring:\n", tx_port, targ->nb_txports, targ->nb_txrings);
+ }
+ }
+ plog_info("Using routing table %s in l3 mode, with %d gateways\n", targ->route_table, tbase->l3.nb_gws);
+
+ // Last but one "next_hop_index" is not a gateway but direct routes
+ tbase->l3.next_hops[tbase->l3.nb_gws].ip = 0;
+ ret = rte_lpm_add(tbase->l3.ipv4_lpm, targ->local_ipv4, targ->local_prefix, tbase->l3.nb_gws++);
+ PROX_PANIC(ret, "Failed to add local_ipv4 "IPv4_BYTES_FMT"/%d to lpm\n", IP4(tbase->local_ipv4), targ->local_prefix);
+ // Last "next_hop_index" is default gw
+ tbase->l3.next_hops[tbase->l3.nb_gws].ip = rte_bswap32(targ->gateway_ipv4);
+ if (targ->gateway_ipv4) {
+ ret = rte_lpm_add(tbase->l3.ipv4_lpm, targ->gateway_ipv4, 0, tbase->l3.nb_gws++);
+ PROX_PANIC(ret, "Failed to add gateway_ipv4 "IPv4_BYTES_FMT"/%d to lpm\n", IP4(tbase->l3.gw.ip), 0);
+ }
+ }
+
master_init_vdev(tbase->l3.tmaster, tbase->l3.reachable_port_id, targ->lconf->id, targ->id);
name[3]++;
struct rte_mempool *ret = rte_mempool_create(name, NB_ARP_MBUF, ARP_MBUF_SIZE, NB_CACHE_ARP_MBUF,
@@ -284,8 +363,13 @@ void task_set_local_ip(struct task_base *tbase, uint32_t ip)
static void reset_arp_update_time(struct l3_base *l3, uint32_t ip)
{
uint32_t idx;
- plogx_info("\tMAC entry for IP "IPv4_BYTES_FMT" timeout in kernel\n", IP4(ip));
- if (ip == l3->gw.ip) {
+ plogx_dbg("MAC entry for IP "IPv4_BYTES_FMT" timeout in kernel\n", IP4(ip));
+
+ if (l3->ipv4_lpm) {
+ int ret = rte_hash_lookup(l3->ip_hash, (const void *)&ip);
+ if (ret >= 0)
+ l3->arp_table[ret].arp_update_time = 0;
+ } else if (ip == l3->gw.ip) {
l3->gw.arp_update_time = 0;
} else if (l3->n_pkts < 4) {
for (idx = 0; idx < l3->n_pkts; idx++) {
@@ -304,17 +388,34 @@ static void reset_arp_update_time(struct l3_base *l3, uint32_t ip)
return;
}
+static prox_next_hop_index_type get_nh_index(struct task_base *tbase, uint32_t gw_ip)
+{
+ // Check if gateway already exists
+ for (prox_next_hop_index_type i = 0; i < tbase->l3.nb_gws; i++) {
+ if (tbase->l3.next_hops[i].ip == gw_ip) {
+ return i;
+ }
+ }
+ if (tbase->l3.nb_gws < MAX_HOP_INDEX) {
+ tbase->l3.next_hops[tbase->l3.nb_gws].ip = gw_ip;
+ tbase->l3.nb_gws++;
+ return tbase->l3.nb_gws - 1;
+ } else
+ return MAX_HOP_INDEX;
+}
void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
{
uint8_t out[1];
const uint64_t hz = rte_get_tsc_hz();
- uint32_t ip, ip_dst, idx;
- int j;
+ uint32_t ip, ip_dst, idx, gateway_ip, prefix;
+ prox_next_hop_index_type gateway_index;
+ int j, ret, modified_route;
uint16_t command;
struct ether_hdr_arp *hdr;
struct l3_base *l3 = &tbase->l3;
uint64_t tsc= rte_rdtsc();
- uint64_t update_time = l3->arp_timeout * hz / 1000;
+ uint64_t arp_timeout = l3->arp_timeout * hz / 1000;
+ uint32_t nh;
for (j = 0; j < n_pkts; ++j) {
PREFETCH0(mbufs[j]);
@@ -328,6 +429,38 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui
command = mbufs[j]->udata64 & 0xFFFF;
plogx_dbg("\tReceived %s mbuf %p\n", actions_string[command], mbufs[j]);
switch(command) {
+ case ROUTE_ADD_FROM_CTRL:
+ ip = ctrl_ring_get_ip(mbufs[j]);
+ gateway_ip = ctrl_ring_get_gateway_ip(mbufs[j]);
+ prefix = ctrl_ring_get_prefix(mbufs[j]);
+ gateway_index = get_nh_index(tbase, gateway_ip);
+ if (gateway_index >= MAX_HOP_INDEX) {
+ plog_err("Unable to find or define gateway index - too many\n");
+ return;
+ }
+ modified_route = rte_lpm_is_rule_present(tbase->l3.ipv4_lpm, rte_bswap32(ip), prefix, &nh);
+ ret = rte_lpm_add(tbase->l3.ipv4_lpm, rte_bswap32(ip), prefix, gateway_index);
+ if (ret < 0) {
+ plog_err("Failed to add route to "IPv4_BYTES_FMT"/%d using "IPv4_BYTES_FMT"(index = %d)\n", IP4(ip), prefix, IP4(gateway_ip), gateway_index);
+ } else if (modified_route)
+ plogx_dbg("Modified route to "IPv4_BYTES_FMT"/%d using "IPv4_BYTES_FMT"(index = %d) (was using "IPv4_BYTES_FMT"(index = %d)\n", IP4(ip), prefix, IP4(gateway_ip), gateway_index, IP4(tbase->l3.next_hops[nh].ip), nh);
+ else {
+ plogx_dbg("Added new route to "IPv4_BYTES_FMT"/%d using "IPv4_BYTES_FMT"(index = %d)\n", IP4(ip), prefix, IP4(gateway_ip), gateway_index);
+ }
+ break;
+ case ROUTE_DEL_FROM_CTRL:
+ ip = ctrl_ring_get_ip(mbufs[j]);
+ prefix = ctrl_ring_get_prefix(mbufs[j]);
+
+ ret = rte_lpm_is_rule_present(tbase->l3.ipv4_lpm, rte_bswap32(ip), prefix, &nh);
+ if (ret > 0) {
+ ret = rte_lpm_delete(tbase->l3.ipv4_lpm, rte_bswap32(ip), prefix);
+ if (ret < 0) {
+ plog_err("Failed to add rule\n");
+ }
+ plog_info("Deleting route to "IPv4_BYTES_FMT"/%d\n", IP4(ip), prefix);
+ }
+ break;
case UPDATE_FROM_CTRL:
hdr = rte_pktmbuf_mtod(mbufs[j], struct ether_hdr_arp *);
ip = (mbufs[j]->udata64 >> 32) & 0xFFFFFFFF;
@@ -337,16 +470,33 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui
// This will cause us to send new ARP request
// However, as arp_timeout not touched, we should continue sending our regular IP packets
reset_arp_update_time(l3, ip);
- plogx_info("\tTimeout for MAC entry for IP "IPv4_BYTES_FMT"\n", IP4(ip));
return;
} else
plogx_dbg("\tUpdating MAC entry for IP "IPv4_BYTES_FMT" with MAC "MAC_BYTES_FMT"\n",
IP4(ip), MAC_BYTES(hdr->arp.data.sha.addr_bytes));
- if (ip == l3->gw.ip) {
+
+ if (l3->ipv4_lpm) {
+ uint32_t nh;
+ struct arp_table *entry;
+ ret = rte_hash_add_key(l3->ip_hash, (const void *)&ip);
+ if (ret < 0) {
+ plogx_info("Unable add ip "IPv4_BYTES_FMT" in mac_hash\n", IP4(ip));
+ } else if ((nh = l3->arp_table[ret].nh) != MAX_HOP_INDEX) {
+ entry = &l3->next_hops[nh];
+ memcpy(&entry->mac, &(hdr->arp.data.sha), sizeof(prox_rte_ether_addr));
+ entry->arp_timeout = tsc + arp_timeout;
+ update_arp_update_time(l3, &entry->arp_update_time, l3->arp_update_time);
+ } else {
+ memcpy(&l3->arp_table[ret].mac, &(hdr->arp.data.sha), sizeof(prox_rte_ether_addr));
+ l3->arp_table[ret].arp_timeout = tsc + arp_timeout;
+ update_arp_update_time(l3, &l3->arp_table[ret].arp_update_time, l3->arp_update_time);
+ }
+ }
+ else if (ip == l3->gw.ip) {
// MAC address of the gateway
memcpy(&l3->gw.mac, &hdr->arp.data.sha, 6);
l3->flags |= FLAG_DST_MAC_KNOWN;
- l3->gw.arp_timeout = tsc + update_time;
+ l3->gw.arp_timeout = tsc + arp_timeout;
update_arp_update_time(l3, &l3->gw.arp_update_time, l3->arp_update_time);
} else if (l3->n_pkts < 4) {
// Few packets tracked - should be faster to loop through them thean using a hash table
@@ -357,28 +507,40 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui
}
if (idx < l3->n_pkts) {
memcpy(&l3->optimized_arp_table[idx].mac, &(hdr->arp.data.sha), sizeof(prox_rte_ether_addr));
- l3->optimized_arp_table[idx].arp_timeout = tsc + update_time;
+ l3->optimized_arp_table[idx].arp_timeout = tsc + arp_timeout;
update_arp_update_time(l3, &l3->optimized_arp_table[idx].arp_update_time, l3->arp_update_time);
}
} else {
- int ret = rte_hash_add_key(l3->ip_hash, (const void *)&ip);
+ ret = rte_hash_add_key(l3->ip_hash, (const void *)&ip);
if (ret < 0) {
- plogx_info("Unable add ip %d.%d.%d.%d in mac_hash\n", IP4(ip));
+ plogx_info("Unable add ip "IPv4_BYTES_FMT" in mac_hash\n", IP4(ip));
} else {
memcpy(&l3->arp_table[ret].mac, &(hdr->arp.data.sha), sizeof(prox_rte_ether_addr));
- l3->arp_table[ret].arp_timeout = tsc + update_time;
+ l3->arp_table[ret].arp_timeout = tsc + arp_timeout;
update_arp_update_time(l3, &l3->arp_table[ret].arp_update_time, l3->arp_update_time);
}
}
tx_drop(mbufs[j]);
break;
case ARP_REPLY_FROM_CTRL:
- case ICMP_FROM_CTRL:
case ARP_REQ_FROM_CTRL:
+ out[0] = 0;
+ // tx_ctrlplane_pkt does not drop packets
+ plogx_dbg("\tForwarding (ARP) packet from master\n");
+ tbase->aux->tx_ctrlplane_pkt(tbase, &mbufs[j], 1, out);
+ TASK_STATS_ADD_TX_NON_DP(&tbase->aux->stats, 1);
+ break;
+ case ICMP_FROM_CTRL:
+ out[0] = 0;
+ // tx_ctrlplane_pkt does not drop packets
+ plogx_dbg("\tForwarding (PING) packet from master\n");
+ tbase->aux->tx_ctrlplane_pkt(tbase, &mbufs[j], 1, out);
+ TASK_STATS_ADD_TX_NON_DP(&tbase->aux->stats, 1);
+ break;
case PKT_FROM_TAP:
out[0] = 0;
// tx_ctrlplane_pkt does not drop packets
- plogx_dbg("\tForwarding (ARP/PING) packet from master\n");
+ plogx_dbg("\tForwarding TAP packet from master\n");
tbase->aux->tx_ctrlplane_pkt(tbase, &mbufs[j], 1, out);
TASK_STATS_ADD_TX_NON_DP(&tbase->aux->stats, 1);
break;