summaryrefslogtreecommitdiffstats
path: root/VNFs/DPPD-PROX/packet_utils.c
diff options
context:
space:
mode:
Diffstat (limited to 'VNFs/DPPD-PROX/packet_utils.c')
-rw-r--r--VNFs/DPPD-PROX/packet_utils.c461
1 files changed, 379 insertions, 82 deletions
diff --git a/VNFs/DPPD-PROX/packet_utils.c b/VNFs/DPPD-PROX/packet_utils.c
index 08178d8..466dd48 100644
--- a/VNFs/DPPD-PROX/packet_utils.c
+++ b/VNFs/DPPD-PROX/packet_utils.c
@@ -31,6 +31,9 @@
#include "prox_lua.h"
#include "hash_entry_types.h"
#include "prox_compat.h"
+#include "prox_cfg.h"
+#include "defines.h"
+#include "prox_ipv6.h"
#include "tx_pkt.h"
static inline int find_ip(struct ether_hdr_arp *pkt, uint16_t len, uint32_t *ip_dst)
@@ -76,15 +79,91 @@ static inline int find_ip(struct ether_hdr_arp *pkt, uint16_t len, uint32_t *ip_
return -1;
}
+static inline struct ipv6_addr *find_ip6(prox_rte_ether_hdr *pkt, uint16_t len, struct ipv6_addr *ip_dst)
+{
+ prox_rte_vlan_hdr *vlan_hdr;
+ prox_rte_ipv6_hdr *ip;
+ uint16_t ether_type = pkt->ether_type;
+ uint16_t l2_len = sizeof(prox_rte_ether_hdr);
+
+ // Unstack VLAN tags
+ while (((ether_type == ETYPE_8021ad) || (ether_type == ETYPE_VLAN)) && (l2_len + sizeof(prox_rte_vlan_hdr) < len)) {
+ vlan_hdr = (prox_rte_vlan_hdr *)((uint8_t *)pkt + l2_len);
+ l2_len +=4;
+ ether_type = vlan_hdr->eth_proto;
+ }
+
+ switch (ether_type) {
+ case ETYPE_MPLSU:
+ case ETYPE_MPLSM:
+ // In case of MPLS, next hop MAC is based on MPLS, not destination IP
+ l2_len = 0;
+ break;
+ case ETYPE_IPv4:
+ case ETYPE_EoGRE:
+ case ETYPE_ARP:
+ l2_len = 0;
+ break;
+ case ETYPE_IPv6:
+ break;
+ default:
+ l2_len = 0;
+ plog_warn("Unsupported packet type %x - CRC might be wrong\n", ether_type);
+ break;
+ }
+
+ if (l2_len && (l2_len + sizeof(prox_rte_ipv6_hdr) <= len)) {
+ prox_rte_ipv6_hdr *ip = (prox_rte_ipv6_hdr *)((uint8_t *)pkt + l2_len);
+ // TODO: implement LPM => replace ip_dst by next hop IP DST
+ memcpy(ip_dst, &ip->dst_addr, sizeof(struct ipv6_addr));
+ return (struct ipv6_addr *)&ip->src_addr;
+ }
+ return NULL;
+}
+
+static void send_unsollicited_neighbour_advertisement(struct task_base *tbase, struct task_args *targ)
+{
+ int ret;
+ uint8_t out = 0, port_id = tbase->l3.reachable_port_id;
+ struct rte_mbuf *mbuf;
+
+ ret = rte_mempool_get(tbase->l3.arp_nd_pool, (void **)&mbuf);
+ if (likely(ret == 0)) {
+ mbuf->port = port_id;
+ build_neighbour_advertisement(tbase->l3.tmaster, mbuf, &prox_port_cfg[port_id].eth_addr, &targ->local_ipv6, PROX_UNSOLLICITED);
+ tbase->aux->tx_ctrlplane_pkt(tbase, &mbuf, 1, &out);
+ TASK_STATS_ADD_TX_NON_DP(&tbase->aux->stats, 1);
+ } else {
+ plog_err("Failed to get a mbuf from arp/ndp mempool\n");
+ }
+}
+
+static void send_router_sollicitation(struct task_base *tbase, struct task_args *targ)
+{
+ int ret;
+ uint8_t out = 0, port_id = tbase->l3.reachable_port_id;
+ struct rte_mbuf *mbuf;
+
+ ret = rte_mempool_get(tbase->l3.arp_nd_pool, (void **)&mbuf);
+ if (likely(ret == 0)) {
+ mbuf->port = port_id;
+ build_router_sollicitation(mbuf, &prox_port_cfg[port_id].eth_addr, &targ->local_ipv6);
+ tbase->aux->tx_ctrlplane_pkt(tbase, &mbuf, 1, &out);
+ TASK_STATS_ADD_TX_NON_DP(&tbase->aux->stats, 1);
+ } else {
+ plog_err("Failed to get a mbuf from arp/ndp mempool\n");
+ }
+}
+
/* This implementation could be improved: instead of checking each time we send a packet whether we need also
to send an ARP, we should only check whether the MAC is valid.
- We should check arp_update_time in the master process. This would also require the generating task to clear its arp ring
+ We should check arp_ndp_retransmit_timeout in the master process. This would also require the generating task to clear its arp ring
to avoid sending many ARP while starting after a long stop.
- We could also check for arp_timeout in the master so that dataplane has only to check whether MAC is available
- but this would require either thread safety, or the exchange of information between master and generating core.
-*/
+ We could also check for reachable_timeout in the master so that dataplane has only to check whether MAC is available
+ but this would require either thread safety, or the the exchange of information between master and generating core.
+ */
-static inline int add_key_and_send_arp(struct rte_hash *ip_hash, uint32_t *ip_dst, struct arp_table *entries, uint64_t tsc, uint64_t hz, uint32_t arp_update_time, prox_next_hop_index_type nh, uint64_t **time)
+static inline int add_key_and_send_arp(struct rte_hash *ip_hash, uint32_t *ip_dst, struct arp_table *entries, uint64_t tsc, uint64_t hz, uint32_t arp_ndp_retransmit_timeout, prox_next_hop_index_type nh, uint64_t **time)
{
int ret = rte_hash_add_key(ip_hash, (const void *)ip_dst);
if (unlikely(ret < 0)) {
@@ -94,26 +173,26 @@ static inline int add_key_and_send_arp(struct rte_hash *ip_hash, uint32_t *ip_ds
} else {
entries[ret].ip = *ip_dst;
entries[ret].nh = nh;
- *time = &entries[ret].arp_update_time;
+ *time = &entries[ret].arp_ndp_retransmit_timeout;
}
- return SEND_ARP;
+ return SEND_ARP_ND;
}
-static inline int update_mac_and_send_mbuf(struct arp_table *entry, prox_rte_ether_addr *mac, uint64_t tsc, uint64_t hz, uint32_t arp_update_time, uint64_t **time)
+static inline int update_mac_and_send_mbuf(struct arp_table *entry, prox_rte_ether_addr *mac, uint64_t tsc, uint64_t hz, uint32_t arp_ndp_retransmit_timeout, uint64_t **time)
{
- if (likely((tsc < entry->arp_update_time) && (tsc < entry->arp_timeout))) {
+ if (likely((tsc < entry->arp_ndp_retransmit_timeout) && (tsc < entry->reachable_timeout))) {
memcpy(mac, &entry->mac, sizeof(prox_rte_ether_addr));
return SEND_MBUF;
- } else if (tsc > entry->arp_update_time) {
+ } else if (tsc > entry->arp_ndp_retransmit_timeout) {
// long time since we have sent an arp, send arp
- *time = &entry->arp_update_time;
- if (tsc < entry->arp_timeout){
+ *time = &entry->arp_ndp_retransmit_timeout;
+ if (tsc < entry->reachable_timeout){
// MAC is valid in the table => send also the mbuf
memcpy(mac, &entry->mac, sizeof(prox_rte_ether_addr));
- return SEND_MBUF_AND_ARP;
+ return SEND_MBUF_AND_ARP_ND;
} else {
// MAC still unknown, or timed out => only send ARP
- return SEND_ARP;
+ return SEND_ARP_ND;
}
}
// MAC is unknown and we already sent an ARP recently, drop mbuf and wait for ARP reply
@@ -154,7 +233,7 @@ int write_dst_mac(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t *ip_d
if (entry->ip) {
*ip_dst = entry->ip;
- return update_mac_and_send_mbuf(entry, mac, tsc, hz, l3->arp_update_time, time);
+ return update_mac_and_send_mbuf(entry, mac, tsc, hz, l3->arp_ndp_retransmit_timeout, time);
}
// no next ip: this is a local route
@@ -162,30 +241,32 @@ int write_dst_mac(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t *ip_d
int ret = rte_hash_lookup(l3->ip_hash, (const void *)ip_dst);
if (unlikely(ret < 0)) {
// IP not found, try to send an ARP
- return add_key_and_send_arp(l3->ip_hash, ip_dst, l3->arp_table, tsc, hz, l3->arp_update_time, MAX_HOP_INDEX, time);
+ return add_key_and_send_arp(l3->ip_hash, ip_dst, l3->arp_table, tsc, hz, l3->arp_ndp_retransmit_timeout, MAX_HOP_INDEX, time);
} else {
- return update_mac_and_send_mbuf(&l3->arp_table[ret], mac, tsc, hz, l3->arp_update_time, time);
+ return update_mac_and_send_mbuf(&l3->arp_table[ret], mac, tsc, hz, l3->arp_ndp_retransmit_timeout, time);
}
return 0;
}
// No Routing table specified: only a local ip and maybe a gateway
// Old default behavior: if a gw is specified, ALL packets go to this gateway (even those we could send w/o the gw
if (l3->gw.ip) {
- if (likely((l3->flags & FLAG_DST_MAC_KNOWN) && (tsc < l3->gw.arp_update_time) && (tsc < l3->gw.arp_timeout))) {
+ if (likely((l3->flags & FLAG_DST_MAC_KNOWN) && (tsc < l3->gw.arp_ndp_retransmit_timeout) && (tsc < l3->gw.reachable_timeout))) {
memcpy(mac, &l3->gw.mac, sizeof(prox_rte_ether_addr));
return SEND_MBUF;
- } else if (tsc > l3->gw.arp_update_time) {
+ } else if (tsc > l3->gw.arp_ndp_retransmit_timeout) {
// long time since we have successfully sent an arp, send arp
- // If sending ARP failed (ring full) then arp_update_time is not updated to avoid having to wait 1 sec to send ARP REQ again
- *time = &l3->gw.arp_update_time;
+ // If sending ARP failed (ring full) then arp_ndp_retransmit_timeout is not updated to avoid having to wait 1 sec to send ARP REQ again
+ *time = &l3->gw.arp_ndp_retransmit_timeout;
+ l3->gw.arp_ndp_retransmit_timeout = tsc + l3->arp_ndp_retransmit_timeout * hz / 1000;
+
*ip_dst = l3->gw.ip;
- if ((l3->flags & FLAG_DST_MAC_KNOWN) && (tsc < l3->gw.arp_timeout)){
+ if ((l3->flags & FLAG_DST_MAC_KNOWN) && (tsc < l3->gw.reachable_timeout)){
// MAC is valid in the table => send also the mbuf
memcpy(mac, &l3->gw.mac, sizeof(prox_rte_ether_addr));
- return SEND_MBUF_AND_ARP;
+ return SEND_MBUF_AND_ARP_ND;
} else {
// MAC still unknown, or timed out => only send ARP
- return SEND_ARP;
+ return SEND_ARP_ND;
}
} else {
// MAC is unknown and we already sent an ARP recently, drop mbuf and wait for ARP reply
@@ -201,17 +282,16 @@ int write_dst_mac(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t *ip_d
if (likely(l3->n_pkts < 4)) {
for (unsigned int idx = 0; idx < l3->n_pkts; idx++) {
if (*ip_dst == l3->optimized_arp_table[idx].ip) {
- // IP address already in table
- return update_mac_and_send_mbuf(&l3->optimized_arp_table[idx], mac, tsc, hz, l3->arp_update_time, time);
+ return update_mac_and_send_mbuf(&l3->optimized_arp_table[idx], mac, tsc, hz, l3->arp_ndp_retransmit_timeout, time);
}
}
// IP address not found in table
l3->optimized_arp_table[l3->n_pkts].ip = *ip_dst;
- *time = &l3->optimized_arp_table[l3->n_pkts].arp_update_time;
+ *time = &l3->optimized_arp_table[l3->n_pkts].arp_ndp_retransmit_timeout;
l3->n_pkts++;
if (l3->n_pkts < 4) {
- return SEND_ARP;
+ return SEND_ARP_ND;
}
// We have too many IP addresses to search linearly; lets use hash table instead => copy all entries in hash table
@@ -228,16 +308,137 @@ int write_dst_mac(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t *ip_d
memcpy(&l3->arp_table[ret], &l3->optimized_arp_table[idx], sizeof(struct arp_table));
}
}
- return SEND_ARP;
+ return SEND_ARP_ND;
} else {
// Find IP in lookup table. Send ARP if not found
int ret = rte_hash_lookup(l3->ip_hash, (const void *)ip_dst);
if (unlikely(ret < 0)) {
// IP not found, try to send an ARP
- return add_key_and_send_arp(l3->ip_hash, ip_dst, &l3->arp_table[ret], tsc, hz, l3->arp_update_time, MAX_HOP_INDEX, time);
+ return add_key_and_send_arp(l3->ip_hash, ip_dst, &l3->arp_table[ret], tsc, hz, l3->arp_ndp_retransmit_timeout, MAX_HOP_INDEX, time);
+ } else {
+ // IP has been found
+ return update_mac_and_send_mbuf(&l3->arp_table[ret], mac, tsc, hz, l3->arp_ndp_retransmit_timeout, time);
+ }
+ }
+ // Should not happen
+ return DROP_MBUF;
+}
+
+int write_ip6_dst_mac(struct task_base *tbase, struct rte_mbuf *mbuf, struct ipv6_addr *ip_dst)
+{
+ const uint64_t hz = rte_get_tsc_hz();
+ prox_rte_ether_hdr *packet = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *);
+ prox_rte_ether_addr *mac = &packet->d_addr;
+ struct ipv6_addr *used_ip_src;
+
+ uint64_t tsc = rte_rdtsc();
+ uint16_t len = rte_pktmbuf_pkt_len(mbuf);
+
+ struct ipv6_addr *pkt_src_ip6;
+ if ((pkt_src_ip6 = find_ip6(packet, len, ip_dst)) == NULL) {
+ // Unable to find IP address => non IP packet => send it as it
+ return SEND_MBUF;
+ }
+ struct l3_base *l3 = &(tbase->l3);
+ if (memcmp(&l3->local_ipv6, ip_dst, 8) == 0) {
+ // Same prefix as local -> use local
+ used_ip_src = &l3->local_ipv6;
+ } else if (memcmp(&l3->global_ipv6 , &null_addr, 16) != 0) {
+ // Global IP is defined -> use it
+ used_ip_src = &l3->global_ipv6;
+ } else {
+ plog_info("Error as trying to send a packet to "IPv6_BYTES_FMT" using "IPv6_BYTES_FMT" (local)\n", IPv6_BYTES(ip_dst->bytes), IPv6_BYTES(l3->local_ipv6.bytes));
+ return DROP_MBUF;
+ }
+
+ memcpy(pkt_src_ip6, used_ip_src, sizeof(struct ipv6_addr));
+ if (likely(l3->n_pkts < 4)) {
+ for (unsigned int idx = 0; idx < l3->n_pkts; idx++) {
+ if (memcmp(ip_dst, &l3->optimized_arp_table[idx].ip6, sizeof(struct ipv6_addr)) == 0) {
+ // IP address already in table
+ if ((tsc < l3->optimized_arp_table[idx].arp_ndp_retransmit_timeout) && (tsc < l3->optimized_arp_table[idx].reachable_timeout)) {
+ // MAC address was recently updated in table, use it
+ // plog_dbg("Valid MAC address found => send packet\n");
+ memcpy(mac, &l3->optimized_arp_table[idx].mac, sizeof(prox_rte_ether_addr));
+ return SEND_MBUF;
+ } else if (tsc > l3->optimized_arp_table[idx].arp_ndp_retransmit_timeout) {
+ // NDP not sent since a long time, send NDP
+ l3->optimized_arp_table[idx].arp_ndp_retransmit_timeout = tsc + l3->arp_ndp_retransmit_timeout * hz / 1000;
+ if (tsc < l3->optimized_arp_table[idx].reachable_timeout) {
+ // MAC still valid => also send mbuf
+ plog_dbg("Valid MAC found but NDP retransmit timeout => send packet and NDP\n");
+ memcpy(mac, &l3->optimized_arp_table[idx].mac, sizeof(prox_rte_ether_addr));
+ return SEND_MBUF_AND_ARP_ND;
+ } else {
+ plog_dbg("Unknown MAC => send NDP but cannot send packet\n");
+ // MAC unvalid => only send NDP
+ return SEND_ARP_ND;
+ }
+ } else {
+ // NDP timeout elapsed, MAC not valid anymore but waiting for NDP reply
+ // plog_dbg("NDP reachable timeout elapsed - waiting for NDP reply\n");
+ return DROP_MBUF;
+ }
+ }
+ }
+ // IP address not found in table
+ memcpy(&l3->optimized_arp_table[l3->n_pkts].ip6, ip_dst, sizeof(struct ipv6_addr));
+ l3->optimized_arp_table[l3->n_pkts].arp_ndp_retransmit_timeout = tsc + l3->arp_ndp_retransmit_timeout * hz / 1000;
+ l3->n_pkts++;
+
+ if (l3->n_pkts < 4) {
+ return SEND_ARP_ND;
+ }
+
+ // We have too many IP addresses to search linearly; lets use hash table instead => copy all entries in hash table
+ for (uint32_t idx = 0; idx < l3->n_pkts; idx++) {
+ struct ipv6_addr *ip6 = &l3->optimized_arp_table[idx].ip6;
+ int ret = rte_hash_add_key(l3->ip6_hash, (const void *)ip6);
+ if (ret < 0) {
+ // This should not happen as few entries so far.
+ // If it happens, we still send the NDP as easier:
+ // If the NDP corresponds to this error, the NDP reply will be ignored
+ // If NDP does not correspond to this error/ip, then NDP reply will be handled.
+ plogx_err("Unable add ip "IPv6_BYTES_FMT" in mac_hash (already %d entries)\n", IPv6_BYTES(ip6->bytes), idx);
+ } else {
+ memcpy(&l3->arp_table[ret], &l3->optimized_arp_table[idx], sizeof(struct arp_table));
+ }
+ }
+ return SEND_ARP_ND;
+ } else {
+ // Find IP in lookup table. Send ND if not found
+ int ret = rte_hash_lookup(l3->ip6_hash, (const void *)ip_dst);
+ if (unlikely(ret < 0)) {
+ // IP not found, try to send an ND
+ int ret = rte_hash_add_key(l3->ip6_hash, (const void *)ip_dst);
+ if (ret < 0) {
+ // No reason to send NDP, as reply would be anyhow ignored
+ plogx_err("Unable to add ip "IPv6_BYTES_FMT" in mac_hash\n", IPv6_BYTES(ip_dst->bytes));
+ return DROP_MBUF;
+ } else {
+ memcpy(&l3->arp_table[ret].ip6, ip_dst, sizeof(struct ipv6_addr));
+ l3->arp_table[ret].arp_ndp_retransmit_timeout = tsc + l3->arp_ndp_retransmit_timeout * hz / 1000;
+ }
+ return SEND_ARP_ND;
} else {
// IP has been found
- return update_mac_and_send_mbuf(&l3->arp_table[ret], mac, tsc, hz, l3->arp_update_time, time);
+ if (likely((tsc < l3->arp_table[ret].arp_ndp_retransmit_timeout) && (tsc < l3->arp_table[ret].reachable_timeout))) {
+ // MAC still valid and NDP sent recently
+ memcpy(mac, &l3->arp_table[ret].mac, sizeof(prox_rte_ether_addr));
+ return SEND_MBUF;
+ } else if (tsc > l3->arp_table[ret].arp_ndp_retransmit_timeout) {
+ // NDP not sent since a long time, send NDP
+ l3->arp_table[ret].arp_ndp_retransmit_timeout = tsc + l3->arp_ndp_retransmit_timeout * hz / 1000;
+ if (tsc < l3->arp_table[ret].reachable_timeout) {
+ // MAC still valid => send also MBUF
+ memcpy(mac, &l3->arp_table[ret].mac, sizeof(prox_rte_ether_addr));
+ return SEND_MBUF_AND_ARP_ND;
+ } else {
+ return SEND_ARP_ND;
+ }
+ } else {
+ return DROP_MBUF;
+ }
}
}
// Should not happen
@@ -260,12 +461,22 @@ void task_init_l3(struct task_base *tbase, struct task_args *targ)
.hash_func = rte_hash_crc,
.hash_func_init_val = 0,
};
- tbase->l3.ip_hash = rte_hash_create(&hash_params);
- PROX_PANIC(tbase->l3.ip_hash == NULL, "Failed to set up ip hash table\n");
+ if (targ->flags & TASK_ARG_L3) {
+ plog_info("\tInitializing L3 (IPv4)\n");
+ tbase->l3.ip_hash = rte_hash_create(&hash_params);
+ PROX_PANIC(tbase->l3.ip_hash == NULL, "Failed to set up ip hash table\n");
+ hash_name[0]++;
+ }
+ if (targ->flags & TASK_ARG_NDP) {
+ plog_info("\tInitializing NDP (IPv6)\n");
+ hash_params.key_len = sizeof(struct ipv6_addr);
+ tbase->l3.ip6_hash = rte_hash_create(&hash_params);
+ PROX_PANIC(tbase->l3.ip6_hash == NULL, "Failed to set up ip hash table\n");
+ }
tbase->l3.arp_table = (struct arp_table *)prox_zmalloc(n_entries * sizeof(struct arp_table), socket_id);
- PROX_PANIC(tbase->l3.arp_table == NULL, "Failed to allocate memory for %u entries in arp table\n", n_entries);
- plog_info("\tarp table, with %d entries of size %ld\n", n_entries, sizeof(struct l3_base));
+ PROX_PANIC(tbase->l3.arp_table == NULL, "Failed to allocate memory for %u entries in arp/ndp table\n", n_entries);
+ plog_info("\tarp/ndp table, with %d entries of size %ld\n", n_entries, sizeof(struct l3_base));
targ->lconf->ctrl_func_p[targ->task] = handle_ctrl_plane_pkts;
targ->lconf->ctrl_timeout = freq_to_tsc(targ->ctrl_freq);
@@ -275,36 +486,36 @@ void task_init_l3(struct task_base *tbase, struct task_args *targ)
tbase->l3.task_id = targ->id;
tbase->l3.tmaster = targ->tmaster;
tbase->l3.seed = (uint)rte_rdtsc();
- if (targ->arp_timeout != 0)
- tbase->l3.arp_timeout = targ->arp_timeout;
+ if (targ->reachable_timeout != 0)
+ tbase->l3.reachable_timeout = targ->reachable_timeout;
else
- tbase->l3.arp_timeout = DEFAULT_ARP_TIMEOUT;
- if (targ->arp_update_time != 0)
- tbase->l3.arp_update_time = targ->arp_update_time;
+ tbase->l3.reachable_timeout = DEFAULT_ARP_TIMEOUT;
+ if (targ->arp_ndp_retransmit_timeout != 0)
+ tbase->l3.arp_ndp_retransmit_timeout = targ->arp_ndp_retransmit_timeout;
else
- tbase->l3.arp_update_time = DEFAULT_ARP_UPDATE_TIME;
+ tbase->l3.arp_ndp_retransmit_timeout = DEFAULT_ARP_UPDATE_TIME;
}
void task_start_l3(struct task_base *tbase, struct task_args *targ)
{
- const int NB_ARP_MBUF = 1024;
- const int ARP_MBUF_SIZE = 2048;
- const int NB_CACHE_ARP_MBUF = 256;
const int socket_id = rte_lcore_to_socket_id(targ->lconf->id);
+ const int NB_ARP_ND_MBUF = 1024;
+ const int ARP_ND_MBUF_SIZE = 2048;
+ const int NB_CACHE_ARP_ND_MBUF = 256;
struct prox_port_cfg *port = find_reachable_port(targ);
- if (port && (tbase->l3.arp_pool == NULL)) {
+ if (port && (tbase->l3.arp_nd_pool == NULL)) {
static char name[] = "arp0_pool";
tbase->l3.reachable_port_id = port - prox_port_cfg;
if (targ->local_ipv4) {
- tbase->local_ipv4 = rte_be_to_cpu_32(targ->local_ipv4);
- register_ip_to_ctrl_plane(tbase->l3.tmaster, tbase->local_ipv4, tbase->l3.reachable_port_id, targ->lconf->id, targ->id);
+ tbase->l3.local_ipv4 = rte_be_to_cpu_32(targ->local_ipv4);
+ register_ip_to_ctrl_plane(tbase->l3.tmaster, tbase->l3.local_ipv4, tbase->l3.reachable_port_id, targ->lconf->id, targ->id);
}
if (strcmp(targ->route_table, "") != 0) {
struct lpm4 *lpm;
int ret;
- PROX_PANIC(tbase->local_ipv4 == 0, "missing local_ipv4 will route table is specified in L3 mode\n");
+ PROX_PANIC(tbase->l3.local_ipv4 == 0, "missing local_ipv4 will route table is specified in L3 mode\n");
// LPM might be modified runtime => do not share with other cores
ret = lua_to_lpm4(prox_lua(), GLOBAL, targ->route_table, socket_id, &lpm);
@@ -330,7 +541,7 @@ void task_start_l3(struct task_base *tbase, struct task_args *targ)
// Last but one "next_hop_index" is not a gateway but direct routes
tbase->l3.next_hops[tbase->l3.nb_gws].ip = 0;
ret = rte_lpm_add(tbase->l3.ipv4_lpm, targ->local_ipv4, targ->local_prefix, tbase->l3.nb_gws++);
- PROX_PANIC(ret, "Failed to add local_ipv4 "IPv4_BYTES_FMT"/%d to lpm\n", IP4(tbase->local_ipv4), targ->local_prefix);
+ PROX_PANIC(ret, "Failed to add local_ipv4 "IPv4_BYTES_FMT"/%d to lpm\n", IP4(tbase->l3.local_ipv4), targ->local_prefix);
// Last "next_hop_index" is default gw
tbase->l3.next_hops[tbase->l3.nb_gws].ip = rte_bswap32(targ->gateway_ipv4);
if (targ->gateway_ipv4) {
@@ -340,15 +551,50 @@ void task_start_l3(struct task_base *tbase, struct task_args *targ)
}
master_init_vdev(tbase->l3.tmaster, tbase->l3.reachable_port_id, targ->lconf->id, targ->id);
+
+ // Create IPv6 addr if none were configured
+ if (targ->flags & TASK_ARG_NDP) {
+ if (!memcmp(&targ->local_ipv6, &null_addr, 16)) {
+ set_link_local(&targ->local_ipv6);
+ set_EUI(&targ->local_ipv6, &port->eth_addr);
+ }
+ plog_info("\tCore %d, task %d, local IPv6 addr is "IPv6_BYTES_FMT" (%s)\n",
+ targ->lconf->id, targ->id,
+ IPv6_BYTES(targ->local_ipv6.bytes),
+ IP6_Canonical(&targ->local_ipv6));
+ memcpy(&tbase->l3.local_ipv6, &targ->local_ipv6, sizeof(struct ipv6_addr));
+
+ if (memcmp(&targ->global_ipv6, &null_addr, sizeof(struct ipv6_addr))) {
+ memcpy(&tbase->l3.global_ipv6, &targ->global_ipv6, sizeof(struct ipv6_addr));
+ plog_info("\tCore %d, task %d, global IPv6 addr is "IPv6_BYTES_FMT" (%s)\n",
+ targ->lconf->id, targ->id,
+ IPv6_BYTES(targ->global_ipv6.bytes),
+ IP6_Canonical(&targ->global_ipv6));
+ }
+ if (targ->ipv6_router)
+ register_router_to_ctrl_plane(tbase->l3.tmaster, tbase->l3.reachable_port_id, targ->lconf->id, targ->id, &targ->local_ipv6, &targ->global_ipv6, &targ->router_prefix);
+ else
+ register_node_to_ctrl_plane(tbase->l3.tmaster, &targ->local_ipv6, &targ->global_ipv6, tbase->l3.reachable_port_id, targ->lconf->id, targ->id);
+ }
+
name[3]++;
- struct rte_mempool *ret = rte_mempool_create(name, NB_ARP_MBUF, ARP_MBUF_SIZE, NB_CACHE_ARP_MBUF,
+ struct rte_mempool *ret = rte_mempool_create(name, NB_ARP_ND_MBUF, ARP_ND_MBUF_SIZE, NB_CACHE_ARP_ND_MBUF,
sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, 0,
rte_socket_id(), 0);
- PROX_PANIC(ret == NULL, "Failed to allocate ARP memory pool on socket %u with %u elements\n",
- rte_socket_id(), NB_ARP_MBUF);
- plog_info("\t\tMempool %p (%s) size = %u * %u cache %u, socket %d\n", ret, name, NB_ARP_MBUF,
- ARP_MBUF_SIZE, NB_CACHE_ARP_MBUF, rte_socket_id());
- tbase->l3.arp_pool = ret;
+ PROX_PANIC(ret == NULL, "Failed to allocate ARP/ND memory pool on socket %u with %u elements\n",
+ rte_socket_id(), NB_ARP_ND_MBUF);
+ plog_info("\tMempool %p (%s) size = %u * %u cache %u, socket %d (for ARP/ND)\n", ret, name, NB_ARP_ND_MBUF,
+ ARP_ND_MBUF_SIZE, NB_CACHE_ARP_ND_MBUF, rte_socket_id());
+ tbase->l3.arp_nd_pool = ret;
+ if ((targ->flags & TASK_ARG_NDP) && (!targ->ipv6_router)) {
+ plog_info("Sending Router Sollicitation\n");
+ send_router_sollicitation(tbase, targ);
+ }
+ if ((targ->flags & TASK_ARG_NDP) && (targ->flags & TASK_ARG_SEND_NA_AT_STARTUP)) {
+ plog_info("Sending unsollicited Neighbour Advertisement\n");
+ send_unsollicited_neighbour_advertisement(tbase, targ);
+
+ }
}
}
@@ -360,10 +606,10 @@ void task_set_gateway_ip(struct task_base *tbase, uint32_t ip)
void task_set_local_ip(struct task_base *tbase, uint32_t ip)
{
- tbase->local_ipv4 = ip;
+ tbase->l3.local_ipv4 = ip;
}
-static void reset_arp_update_time(struct l3_base *l3, uint32_t ip)
+static void reset_arp_ndp_retransmit_timeout(struct l3_base *l3, uint32_t ip)
{
uint32_t idx;
plogx_dbg("MAC entry for IP "IPv4_BYTES_FMT" timeout in kernel\n", IP4(ip));
@@ -371,9 +617,9 @@ static void reset_arp_update_time(struct l3_base *l3, uint32_t ip)
if (l3->ipv4_lpm) {
int ret = rte_hash_lookup(l3->ip_hash, (const void *)&ip);
if (ret >= 0)
- l3->arp_table[ret].arp_update_time = 0;
+ l3->arp_table[ret].arp_ndp_retransmit_timeout = 0;
} else if (ip == l3->gw.ip) {
- l3->gw.arp_update_time = 0;
+ l3->gw.arp_ndp_retransmit_timeout = 0;
} else if (l3->n_pkts < 4) {
for (idx = 0; idx < l3->n_pkts; idx++) {
uint32_t ip_dst = l3->optimized_arp_table[idx].ip;
@@ -381,12 +627,12 @@ static void reset_arp_update_time(struct l3_base *l3, uint32_t ip)
break;
}
if (idx < l3->n_pkts) {
- l3->optimized_arp_table[idx].arp_update_time = 0;
+ l3->optimized_arp_table[idx].arp_ndp_retransmit_timeout = 0;
}
} else {
int ret = rte_hash_lookup(l3->ip_hash, (const void *)&ip);
if (ret >= 0)
- l3->arp_table[ret].arp_update_time = 0;
+ l3->arp_table[ret].arp_ndp_retransmit_timeout = 0;
}
return;
}
@@ -413,15 +659,18 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui
uint32_t ip, ip_dst, idx, gateway_ip, prefix;
prox_next_hop_index_type gateway_index;
int j, ret, modified_route;
+ uint64_t addr;
+ struct ipv6_addr *ip6, *ip6_dst;
uint16_t command;
prox_rte_ether_hdr *hdr;
struct ether_hdr_arp *hdr_arp;
struct l3_base *l3 = &tbase->l3;
uint64_t tsc= rte_rdtsc();
- uint64_t arp_timeout = l3->arp_timeout * hz / 1000;
+ uint64_t reachable_timeout = l3->reachable_timeout * hz / 1000;
uint32_t nh;
prox_rte_ipv4_hdr *pip;
prox_rte_udp_hdr *udp_hdr;
+ uint8_t port = tbase->l3.reachable_port_id;
for (j = 0; j < n_pkts; ++j) {
PREFETCH0(mbufs[j]);
@@ -434,10 +683,10 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui
pip = NULL;
udp_hdr = NULL;
out[0] = OUT_HANDLED;
- command = mbufs[j]->udata64 & 0xFFFF;
+ command = get_command(mbufs[j]);
plogx_dbg("\tReceived %s mbuf %p\n", actions_string[command], mbufs[j]);
switch(command) {
- case ROUTE_ADD_FROM_CTRL:
+ case ROUTE_ADD_FROM_MASTER:
ip = ctrl_ring_get_ip(mbufs[j]);
gateway_ip = ctrl_ring_get_gateway_ip(mbufs[j]);
prefix = ctrl_ring_get_prefix(mbufs[j]);
@@ -457,7 +706,7 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui
}
tx_drop(mbufs[j]);
break;
- case ROUTE_DEL_FROM_CTRL:
+ case ROUTE_DEL_FROM_MASTER:
ip = ctrl_ring_get_ip(mbufs[j]);
prefix = ctrl_ring_get_prefix(mbufs[j]);
@@ -471,15 +720,15 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui
}
tx_drop(mbufs[j]);
break;
- case UPDATE_FROM_CTRL:
+ case MAC_INFO_FROM_MASTER:
hdr_arp = rte_pktmbuf_mtod(mbufs[j], struct ether_hdr_arp *);
- ip = (mbufs[j]->udata64 >> 32) & 0xFFFFFFFF;
+ ip = get_ip(mbufs[j]);
if (prox_rte_is_zero_ether_addr(&hdr_arp->arp.data.sha)) {
// MAC timeout or deleted from kernel table => reset update_time
// This will cause us to send new ARP request
- // However, as arp_timeout not touched, we should continue sending our regular IP packets
- reset_arp_update_time(l3, ip);
+ // However, as reachable_timeout not touched, we should continue sending our regular IP packets
+ reset_arp_ndp_retransmit_timeout(l3, ip);
return;
} else
plogx_dbg("\tUpdating MAC entry for IP "IPv4_BYTES_FMT" with MAC "MAC_BYTES_FMT"\n",
@@ -494,20 +743,20 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui
} else if ((nh = l3->arp_table[ret].nh) != MAX_HOP_INDEX) {
entry = &l3->next_hops[nh];
memcpy(&entry->mac, &(hdr_arp->arp.data.sha), sizeof(prox_rte_ether_addr));
- entry->arp_timeout = tsc + arp_timeout;
- update_arp_update_time(l3, &entry->arp_update_time, l3->arp_update_time);
+ entry->reachable_timeout = tsc + reachable_timeout;
+ update_arp_ndp_retransmit_timeout(l3, &entry->arp_ndp_retransmit_timeout, l3->arp_ndp_retransmit_timeout);
} else {
memcpy(&l3->arp_table[ret].mac, &(hdr_arp->arp.data.sha), sizeof(prox_rte_ether_addr));
- l3->arp_table[ret].arp_timeout = tsc + arp_timeout;
- update_arp_update_time(l3, &l3->arp_table[ret].arp_update_time, l3->arp_update_time);
+ l3->arp_table[ret].reachable_timeout = tsc + reachable_timeout;
+ update_arp_ndp_retransmit_timeout(l3, &l3->arp_table[ret].arp_ndp_retransmit_timeout, l3->arp_ndp_retransmit_timeout);
}
}
else if (ip == l3->gw.ip) {
// MAC address of the gateway
memcpy(&l3->gw.mac, &hdr_arp->arp.data.sha, 6);
l3->flags |= FLAG_DST_MAC_KNOWN;
- l3->gw.arp_timeout = tsc + arp_timeout;
- update_arp_update_time(l3, &l3->gw.arp_update_time, l3->arp_update_time);
+ l3->gw.reachable_timeout = tsc + reachable_timeout;
+ update_arp_ndp_retransmit_timeout(l3, &l3->gw.arp_ndp_retransmit_timeout, l3->arp_ndp_retransmit_timeout);
} else if (l3->n_pkts < 4) {
// Few packets tracked - should be faster to loop through them thean using a hash table
for (idx = 0; idx < l3->n_pkts; idx++) {
@@ -517,8 +766,8 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui
}
if (idx < l3->n_pkts) {
memcpy(&l3->optimized_arp_table[idx].mac, &(hdr_arp->arp.data.sha), sizeof(prox_rte_ether_addr));
- l3->optimized_arp_table[idx].arp_timeout = tsc + arp_timeout;
- update_arp_update_time(l3, &l3->optimized_arp_table[idx].arp_update_time, l3->arp_update_time);
+ l3->optimized_arp_table[idx].reachable_timeout = tsc + reachable_timeout;
+ update_arp_ndp_retransmit_timeout(l3, &l3->optimized_arp_table[idx].arp_ndp_retransmit_timeout, l3->arp_ndp_retransmit_timeout);
}
} else {
ret = rte_hash_add_key(l3->ip_hash, (const void *)&ip);
@@ -526,21 +775,49 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui
plogx_info("Unable add ip "IPv4_BYTES_FMT" in mac_hash\n", IP4(ip));
} else {
memcpy(&l3->arp_table[ret].mac, &(hdr_arp->arp.data.sha), sizeof(prox_rte_ether_addr));
- l3->arp_table[ret].arp_timeout = tsc + arp_timeout;
- update_arp_update_time(l3, &l3->arp_table[ret].arp_update_time, l3->arp_update_time);
+ l3->arp_table[ret].reachable_timeout = tsc + reachable_timeout;
+ update_arp_ndp_retransmit_timeout(l3, &l3->arp_table[ret].arp_ndp_retransmit_timeout, l3->arp_ndp_retransmit_timeout);
+ }
+ }
+ tx_drop(mbufs[j]);
+ break;
+ case MAC_INFO_FROM_MASTER_FOR_IPV6:
+ ip6 = ctrl_ring_get_ipv6_addr(mbufs[j]);
+ uint64_t data = ctrl_ring_get_data(mbufs[j]);
+
+ if (l3->n_pkts < 4) {
+ // Few packets tracked - should be faster to loop through them thean using a hash table
+ for (idx = 0; idx < l3->n_pkts; idx++) {
+ ip6_dst = &l3->optimized_arp_table[idx].ip6;
+ if (memcmp(ip6_dst, ip6, sizeof(struct ipv6_addr)) == 0)
+ break;
+ }
+ if (idx < l3->n_pkts) {
+ // IP found; this is a reply for one of our requests!
+ memcpy(&l3->optimized_arp_table[idx].mac, &data, sizeof(prox_rte_ether_addr));
+ l3->optimized_arp_table[idx].reachable_timeout = tsc + l3->reachable_timeout * hz / 1000;
+ }
+ } else {
+ int ret = rte_hash_add_key(l3->ip6_hash, (const void *)ip6);
+ if (ret < 0) {
+ plogx_info("Unable add ip "IPv6_BYTES_FMT" in mac_hash\n", IPv6_BYTES(ip6->bytes));
+ } else {
+ memcpy(&l3->arp_table[ret].mac, &data, sizeof(prox_rte_ether_addr));
+ l3->arp_table[ret].reachable_timeout = tsc + l3->reachable_timeout * hz / 1000;
}
}
tx_drop(mbufs[j]);
break;
- case ARP_REPLY_FROM_CTRL:
- case ARP_REQ_FROM_CTRL:
+ case SEND_NDP_FROM_MASTER:
+ case SEND_ARP_REQUEST_FROM_MASTER:
+ case SEND_ARP_REPLY_FROM_MASTER:
out[0] = 0;
// tx_ctrlplane_pkt does not drop packets
plogx_dbg("\tForwarding (ARP) packet from master\n");
tbase->aux->tx_ctrlplane_pkt(tbase, &mbufs[j], 1, out);
TASK_STATS_ADD_TX_NON_DP(&tbase->aux->stats, 1);
break;
- case ICMP_FROM_CTRL:
+ case SEND_ICMP_FROM_MASTER:
out[0] = 0;
// tx_ctrlplane_pkt does not drop packets
plogx_dbg("\tForwarding (PING) packet from master\n");
@@ -584,6 +861,26 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui
tbase->aux->tx_ctrlplane_pkt(tbase, &mbufs[j], 1, out);
TASK_STATS_ADD_TX_NON_DP(&tbase->aux->stats, 1);
break;
+ case IPV6_INFO_FROM_MASTER:
+ // addr = ctrl_ring_get_data(mbufs[j]);
+ ip6 = ctrl_ring_get_ipv6_addr(mbufs[j]);
+ if (memcmp(&l3->global_ipv6 , &null_addr, 16) == 0) {
+ memcpy(&l3->global_ipv6, ip6, sizeof(struct ipv6_addr));
+ plog_info("Core %d task %d received global IP "IPv6_BYTES_FMT"\n", l3->core_id, l3->task_id, IPv6_BYTES(ip6->bytes));
+ } else if (memcmp(&l3->global_ipv6, ip6, 8) == 0) {
+ if (l3->prefix_printed == 0) {
+ plog_info("Core %d task %d received expected prefix "IPv6_PREFIX_FMT"\n", l3->core_id, l3->task_id, IPv6_PREFIX(ip6->bytes));
+ l3->prefix_printed = 1;
+ }
+ } else {
+ plog_warn("Core %d task %d received unexpected prefix "IPv6_PREFIX_FMT", IP = "IPv6_PREFIX_FMT"\n", l3->core_id, l3->task_id, IPv6_PREFIX(ip6->bytes), IPv6_PREFIX(l3->global_ipv6.bytes));
+ }
+ tx_drop(mbufs[j]);
+ break;
+ default:
+ plog_err("Unexpected message received: %d\n", command);
+ tx_drop(mbufs[j]);
+ break;
}
}
}