diff options
Diffstat (limited to 'VNFs/DPPD-PROX/handle_master.c')
-rw-r--r-- | VNFs/DPPD-PROX/handle_master.c | 590 |
1 files changed, 485 insertions, 105 deletions
diff --git a/VNFs/DPPD-PROX/handle_master.c b/VNFs/DPPD-PROX/handle_master.c index ce5c0bc5..a528a681 100644 --- a/VNFs/DPPD-PROX/handle_master.c +++ b/VNFs/DPPD-PROX/handle_master.c @@ -19,7 +19,6 @@ #include <sys/socket.h> #include <linux/netlink.h> #include <linux/rtnetlink.h> -#include <poll.h> #include <net/if.h> #include <rte_hash.h> @@ -44,6 +43,8 @@ #include "input.h" #include "tx_pkt.h" #include "defines.h" +#include "prox_ipv6.h" +#include "packet_utils.h" #define PROX_MAX_ARP_REQUESTS 32 // Maximum number of tasks requesting the same MAC address #define NETLINK_BUF_SIZE 16384 @@ -51,18 +52,24 @@ static char netlink_buf[NETLINK_BUF_SIZE]; const char *actions_string[] = { - "UPDATE_FROM_CTRL", // Controlplane sending a MAC update to dataplane - "ROUTE_ADD_FROM_CTRL", // Controlplane sending a new route to dataplane - "ROUTE_DEL_FROM_CTRL", // Controlplane deleting a new route from dataplane - "SEND_ARP_REQUEST_FROM_CTRL", // Controlplane requesting dataplane to send ARP request - "SEND_ARP_REPLY_FROM_CTRL", // Controlplane requesting dataplane to send ARP reply - "SEND_ICMP_FROM_CTRL", // Controlplane requesting dataplane to send ICMP message - "SEND_BGP_FROM_CTRL", // Controlplane requesting dataplane to send BGP message - "ARP_TO_CTRL", // ARP sent by datplane to Controlpane for handling - "ICMP_TO_CTRL", // ICMP sent by datplane to Controlpane for handling - "BGP_TO_CTRL", // BGP sent by datplane to Controlpane for handling - "REQ_MAC_TO_CTRL", // Dataplane requesting MAC resolution to Controlplane + "MAC_INFO_FROM_MASTER", // Controlplane sending a MAC update to dataplane + "MAC_INFO_FROM_MASTER_FOR_IPV6",// Controlplane sending a MAC update to dataplane + "IPV6_INFO_FROM_MASTER", // Controlplane IPv6 Global IP info to dataplane + "ROUTE_ADD_FROM_MASTER", // Controlplane sending a new route to dataplane + "ROUTE_DEL_FROM_MASTER", // Controlplane deleting a new route from dataplane + "SEND_ARP_REQUEST_FROM_MASTER", // Controlplane requesting dataplane to send ARP request + "SEND_ARP_REPLY_FROM_MASTER", // Controlplane requesting dataplane to send ARP reply + "SEND_NDP_FROM_MASTER", // Controlplane requesting dataplane to send NDP + "SEND_ICMP_FROM_MASTER", // Controlplane requesting dataplane to send ICMP message + "SEND_BGP_FROM_MASTER", // Controlplane requesting dataplane to send BGP message + "ARP_PKT_FROM_NET_TO_MASTER", // ARP sent by datplane to Controlpane for handling + "NDP_PKT_FROM_NET_TO_MASTER," // NDP sent by datplane to Controlpane for handling + "ICMP_TO_MASTER", // ICMP sent by datplane to Controlpane for handling + "BGP_TO_MASTER" // BGP sent by datplane to Controlpane for handling + "IP4_REQ_MAC_TO_MASTER", // Dataplane requesting MAC resolution to Controlplane + "IP6_REQ_MAC_TO_MASTER", // Dataplane requesting MAC resolution to Controlplane "PKT_FROM_TAP" // Packet received by Controlplane from kernel and forwarded to dataplane for sending + }; static struct my_arp_t arp_reply = { @@ -80,68 +87,64 @@ static struct my_arp_t arp_request = { .oper = 0x100 }; -struct ip_table { - prox_rte_ether_addr mac; - struct rte_ring *ring; -}; - -struct external_ip_table { - prox_rte_ether_addr mac; - struct rte_ring *rings[PROX_MAX_ARP_REQUESTS]; - uint16_t nb_requests; -}; - -struct port_table { - prox_rte_ether_addr mac; - struct rte_ring *ring; - uint32_t ip; - uint8_t port; - uint8_t flags; - uint64_t last_echo_req_rcvd_tsc; - uint64_t last_echo_rep_rcvd_tsc; - uint32_t n_echo_req; - uint32_t n_echo_rep; -}; - -struct task_master { - struct task_base base; - struct rte_ring *ctrl_rx_ring; - struct rte_ring **ctrl_tx_rings; - struct ip_table *internal_ip_table; - struct external_ip_table *external_ip_table; - struct rte_hash *external_ip_hash; - struct rte_hash *internal_ip_hash; - struct port_table internal_port_table[PROX_MAX_PORTS]; - struct vdev all_vdev[PROX_MAX_PORTS]; - int max_vdev_id; - struct pollfd arp_fds; - struct pollfd route_fds; -}; - struct ip_port { uint32_t ip; uint8_t port; } __attribute__((packed)); -static inline uint8_t get_command(struct rte_mbuf *mbuf) -{ - return mbuf->udata64 & 0xFF; -} -static inline uint8_t get_task(struct rte_mbuf *mbuf) -{ - return (mbuf->udata64 >> 8) & 0xFF; -} -static inline uint8_t get_core(struct rte_mbuf *mbuf) -{ - return (mbuf->udata64 >> 16) & 0xFF; -} -static inline uint8_t get_port(struct rte_mbuf *mbuf) +struct ip6_port { + struct ipv6_addr ip6; + uint8_t port; +} __attribute__((packed)); + +void register_router_to_ctrl_plane(struct task_base *tbase, uint8_t port_id, uint8_t core_id, uint8_t task_id, struct ipv6_addr *local_ipv6_addr, struct ipv6_addr *global_ipv6_addr, struct ipv6_addr *router_prefix) { - return mbuf->port; + struct task_master *task = (struct task_master *)tbase; + task->internal_port_table[port_id].flags |= IPV6_ROUTER; + memcpy(&task->internal_port_table[port_id].router_prefix, router_prefix, sizeof(struct ipv6_addr)); + register_node_to_ctrl_plane(tbase, local_ipv6_addr, global_ipv6_addr, port_id, core_id, task_id); } -static inline uint32_t get_ip(struct rte_mbuf *mbuf) + +void register_node_to_ctrl_plane(struct task_base *tbase, struct ipv6_addr *local_ipv6_addr, struct ipv6_addr *global_ipv6_addr, uint8_t port_id, uint8_t core_id, uint8_t task_id) { - return (mbuf->udata64 >> 32) & 0xFFFFFFFF; + struct task_master *task = (struct task_master *)tbase; + if (task->internal_port_table[port_id].flags & IPV6_ROUTER) + plogx_dbg("\tregistering router with port %d core %d and task %d\n", port_id, core_id, task_id); + else + plogx_dbg("\tregistering node with port %d core %d and task %d\n", port_id, core_id, task_id); + + if (port_id >= PROX_MAX_PORTS) { + plog_err("Unable to register router, port %d\n", port_id); + return; + } + task->internal_port_table[port_id].ring = task->ctrl_tx_rings[core_id * MAX_TASKS_PER_CORE + task_id]; + memcpy(&task->internal_port_table[port_id].mac, &prox_port_cfg[port_id].eth_addr, sizeof(prox_rte_ether_addr)); + memcpy(&task->internal_port_table[port_id].local_ipv6_addr, local_ipv6_addr, sizeof(struct ipv6_addr)); + if (memcmp(local_ipv6_addr, &prox_cfg.random_ip, sizeof(struct ipv6_addr)) == 0) { + task->internal_port_table[port_id].flags |= HANDLE_RANDOM_LOCAL_IP_FLAG; + return; + } + memcpy(&task->internal_port_table[port_id].global_ipv6_addr, global_ipv6_addr, sizeof(struct ipv6_addr)); + if (memcmp(global_ipv6_addr, &prox_cfg.random_ip, sizeof(struct ipv6_addr)) == 0) { + task->internal_port_table[port_id].flags |= HANDLE_RANDOM_GLOBAL_IP_FLAG; + return; + } + struct ip6_port key; + memcpy(&key.ip6, local_ipv6_addr, sizeof(struct ipv6_addr)); + key.port = port_id; + int ret = rte_hash_add_key(task->internal_ip6_hash, (const void *)&key); + if (unlikely(ret < 0)) { + plog_err("Unable to register ip "IPv6_BYTES_FMT"\n", IPv6_BYTES(local_ipv6_addr->bytes)); + return; + } + memcpy(&key.ip6, global_ipv6_addr, sizeof(struct ipv6_addr)); + ret = rte_hash_add_key(task->internal_ip6_hash, (const void *)&key); + if (unlikely(ret < 0)) { + plog_err("Unable to register ip "IPv6_BYTES_FMT"\n", IPv6_BYTES(global_ipv6_addr->bytes)); + return; + } + memcpy(&task->internal_ip6_table[ret].mac, &prox_port_cfg[port_id].eth_addr, sizeof(prox_rte_ether_addr)); + task->internal_ip6_table[ret].ring = task->ctrl_tx_rings[core_id * MAX_TASKS_PER_CORE + task_id]; } void master_init_vdev(struct task_base *tbase, uint8_t port_id, uint8_t core_id, uint8_t task_id) @@ -188,7 +191,7 @@ void register_ip_to_ctrl_plane(struct task_base *tbase, uint32_t ip, uint8_t por return; } - /* TODO - stoe multiple rings if multiple cores able to handle IP + /* TODO - store multiple rings if multiple cores able to handle IP Remove them when such cores are stopped and de-register IP */ task->internal_port_table[port_id].ring = task->ctrl_tx_rings[core_id * MAX_TASKS_PER_CORE + task_id]; @@ -209,7 +212,6 @@ void register_ip_to_ctrl_plane(struct task_base *tbase, uint32_t ip, uint8_t por } memcpy(&task->internal_ip_table[ret].mac, &prox_port_cfg[port_id].eth_addr, sizeof(prox_rte_ether_addr)); task->internal_ip_table[ret].ring = task->ctrl_tx_rings[core_id * MAX_TASKS_PER_CORE + task_id]; - } static inline void handle_arp_reply(struct task_base *tbase, struct rte_mbuf *mbuf) @@ -232,7 +234,7 @@ static inline void handle_arp_reply(struct task_base *tbase, struct rte_mbuf *mb rte_mbuf_refcnt_set(mbuf, nb_requests); for (int i = 0; i < nb_requests; i++) { struct rte_ring *ring = task->external_ip_table[ret].rings[i]; - tx_ring_ip(tbase, ring, UPDATE_FROM_CTRL, mbuf, key); + tx_ring_ip(tbase, ring, MAC_INFO_FROM_MASTER, mbuf, key); } task->external_ip_table[ret].nb_requests = 0; } else { @@ -258,7 +260,7 @@ static inline void handle_arp_request(struct task_base *tbase, struct rte_mbuf * create_mac(hdr_arp, &mac); mbuf->ol_flags &= ~(PKT_TX_IP_CKSUM|PKT_TX_UDP_CKSUM); build_arp_reply(hdr_arp, &mac); - tx_ring(tbase, ring, ARP_REPLY_FROM_CTRL, mbuf); + tx_ring(tbase, ring, SEND_ARP_REPLY_FROM_MASTER, mbuf); return; } @@ -273,7 +275,7 @@ static inline void handle_arp_request(struct task_base *tbase, struct rte_mbuf * struct rte_ring *ring = task->internal_ip_table[ret].ring; mbuf->ol_flags &= ~(PKT_TX_IP_CKSUM|PKT_TX_UDP_CKSUM); build_arp_reply(hdr_arp, &task->internal_ip_table[ret].mac); - tx_ring(tbase, ring, ARP_REPLY_FROM_CTRL, mbuf); + tx_ring(tbase, ring, SEND_ARP_REPLY_FROM_MASTER, mbuf); } } @@ -337,7 +339,7 @@ static inline void handle_unknown_ip(struct task_base *tbase, struct rte_mbuf *m // We send an ARP request even if one was just sent (and not yet answered) by another task mbuf->ol_flags &= ~(PKT_TX_IP_CKSUM|PKT_TX_UDP_CKSUM); build_arp_request(mbuf, &task->internal_port_table[port].mac, ip_dst, ip_src); - tx_ring(tbase, ring, ARP_REQ_FROM_CTRL, mbuf); + tx_ring(tbase, ring, SEND_ARP_REQUEST_FROM_MASTER, mbuf); } static inline void build_icmp_reply_message(struct task_base *tbase, struct rte_mbuf *mbuf) @@ -365,7 +367,7 @@ static inline void build_icmp_reply_message(struct task_base *tbase, struct rte_ } else { struct rte_ring *ring = task->internal_ip_table[ret].ring; mbuf->ol_flags &= ~(PKT_TX_IP_CKSUM|PKT_TX_UDP_CKSUM); - tx_ring(tbase, ring, ICMP_FROM_CTRL, mbuf); + tx_ring(tbase, ring, SEND_ICMP_FROM_MASTER, mbuf); } } @@ -411,10 +413,306 @@ static inline void handle_icmp(struct task_base *tbase, struct rte_mbuf *mbuf) return; } -static inline void handle_message(struct task_base *tbase, struct rte_mbuf *mbuf, int ring_id) +static inline void handle_unknown_ip6(struct task_base *tbase, struct rte_mbuf *mbuf) { struct task_master *task = (struct task_master *)tbase; struct ether_hdr_arp *hdr_arp = rte_pktmbuf_mtod(mbuf, struct ether_hdr_arp *); + uint8_t port = get_port(mbuf); + struct ipv6_addr *ip_dst = ctrl_ring_get_ipv6_addr(mbuf); + int ret1, ret2, i; + + plogx_dbg("\tMaster trying to find MAC of external IP "IPv6_BYTES_FMT" for port %d\n", IPv6_BYTES(ip_dst->bytes), port); + if (unlikely(port >= PROX_MAX_PORTS)) { + plogx_dbg("Port %d not found", port); + tx_drop(mbuf); + return; + } + struct ipv6_addr *local_ip_src = &task->internal_port_table[port].local_ipv6_addr; + struct ipv6_addr *global_ip_src = &task->internal_port_table[port].global_ipv6_addr; + struct ipv6_addr *ip_src; + if (memcmp(local_ip_src, ip_dst, 8) == 0) + ip_src = local_ip_src; + else if (memcmp(global_ip_src, &null_addr, 16)) + ip_src = global_ip_src; + else { + plogx_dbg("Unable to find a src ip for dst ip "IPv6_BYTES_FMT"\n", IPv6_BYTES(ip_dst->bytes)); + tx_drop(mbuf); + return; + } + struct rte_ring *ring = task->ctrl_tx_rings[get_core(mbuf) * MAX_TASKS_PER_CORE + get_task(mbuf)]; + + if (ring == NULL) { + plogx_dbg("Port %d not registered", port); + tx_drop(mbuf); + return; + } + + ret2 = rte_hash_add_key(task->external_ip6_hash, (const void *)ip_dst); + if (unlikely(ret2 < 0)) { + plogx_dbg("Unable to add IP "IPv6_BYTES_FMT" in external_ip6_hash\n", IPv6_BYTES(ip_dst->bytes)); + tx_drop(mbuf); + return; + } + + // If multiple tasks requesting the same info, we will need to send a reply to all of them + // However if one task sends multiple requests to the same IP (e.g. because it is not answering) + // then we should not send multiple replies to the same task + if (task->external_ip6_table[ret2].nb_requests >= PROX_MAX_ARP_REQUESTS) { + // This can only happen if really many tasks requests the same IP + plogx_dbg("Unable to add request for IP "IPv6_BYTES_FMT" in external_ip6_table\n", IPv6_BYTES(ip_dst->bytes)); + tx_drop(mbuf); + return; + } + for (i = 0; i < task->external_ip6_table[ret2].nb_requests; i++) { + if (task->external_ip6_table[ret2].rings[i] == ring) + break; + } + if (i >= task->external_ip6_table[ret2].nb_requests) { + // If this is a new request i.e. a new task requesting a new IP + task->external_ip6_table[ret2].rings[task->external_ip6_table[ret2].nb_requests] = ring; + task->external_ip6_table[ret2].nb_requests++; + // Only needed for first request - but avoid test and copy the same 6 bytes + // In most cases we will only have one request per IP. + //memcpy(&task->external_ip6_table[ret2].mac, &task->internal_port_table[port].mac, sizeof(prox_rte_ether_addr)); + } + + // As timers are not handled by master, we might send an NS request even if one was just sent + // (and not yet answered) by another task + build_neighbour_sollicitation(mbuf, &task->internal_port_table[port].mac, ip_dst, ip_src); + tx_ring(tbase, ring, SEND_NDP_FROM_MASTER, mbuf); +} + +static inline void handle_rs(struct task_base *tbase, struct rte_mbuf *mbuf) +{ + struct task_master *task = (struct task_master *)tbase; + prox_rte_ether_hdr *hdr = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *); + prox_rte_ipv6_hdr *ipv6_hdr = (prox_rte_ipv6_hdr *)(hdr + 1); + int i, ret; + uint8_t port = get_port(mbuf); + + if (task->internal_port_table[port].flags & IPV6_ROUTER) { + plogx_dbg("\tMaster handling Router Solicitation from ip "IPv6_BYTES_FMT" on port %d\n", IPv6_BYTES(ipv6_hdr->src_addr), port); + struct rte_ring *ring = task->internal_port_table[port].ring; + build_router_advertisement(mbuf, &prox_port_cfg[port].eth_addr, &task->internal_port_table[port].local_ipv6_addr, &task->internal_port_table[port].router_prefix); + tx_ring(tbase, ring, SEND_NDP_FROM_MASTER, mbuf); + return; + } +} + +static inline void handle_ra(struct task_base *tbase, struct rte_mbuf *mbuf) +{ + struct task_master *task = (struct task_master *)tbase; + prox_rte_ether_hdr *hdr = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *); + prox_rte_ipv6_hdr *ipv6_hdr = (prox_rte_ipv6_hdr *)(hdr + 1); + int i, ret, send = 0; + uint8_t port = get_port(mbuf); + struct rte_ring *ring = task->internal_port_table[port].ring; + + plog_dbg("Master handling Router Advertisement from ip "IPv6_BYTES_FMT" on port %d - len = %d; payload_len = %d\n", IPv6_BYTES(ipv6_hdr->src_addr), port, rte_pktmbuf_pkt_len(mbuf), rte_be_to_cpu_16(ipv6_hdr->payload_len)); + if (rte_be_to_cpu_16(ipv6_hdr->payload_len) + sizeof(prox_rte_ipv6_hdr) + sizeof(prox_rte_ether_hdr) > rte_pktmbuf_pkt_len(mbuf)) { + plog_err("Unexpected length received: pkt_len = %d, ipv6 hdr length = %ld, ipv6 payload len = %d\n", rte_pktmbuf_pkt_len(mbuf), sizeof(prox_rte_ipv6_hdr), rte_be_to_cpu_16(ipv6_hdr->payload_len)); + tx_drop(mbuf); + return; + } + if (ring == NULL) { + plog_info("TX side not initialized yet => dropping\n"); + tx_drop(mbuf); + return; + } + int16_t option_len = rte_be_to_cpu_16(ipv6_hdr->payload_len) - sizeof(struct icmpv6_RA) + sizeof(struct icmpv6_option); + struct icmpv6_RA *router_advertisement = (struct icmpv6_RA *)(ipv6_hdr + 1); + struct icmpv6_option *option = (struct icmpv6_option *)&router_advertisement->options; + struct icmpv6_prefix_option *prefix_option; + while(option_len > 0) { + uint8_t type = option->type; + switch(type) { + case ICMPv6_source_link_layer_address: + plog_dbg("\tOption %d = Source Link Layer Address\n", type); + break; + case ICMPv6_prefix_information: + prefix_option = (struct icmpv6_prefix_option *)option; + plog_dbg("\tOption %d = Prefix Information = %s\n", type, IP6_Canonical(&prefix_option->prefix)); + send = 1; + break; + case ICMPv6_mtu: + plog_dbg("\tOption %d = MTU\n", type); + break; + default: + plog_dbg("\tOption %d = Unknown Option\n", type); + break; + } + if ((option->length == 0) || (option->length *8 > option_len)) { + plog_err("Unexpected option length (%d) received in option %d: %d\n", option->length, option->type, option->length); + send = 0; + break; + } + option_len -=option->length * 8; + option = (struct icmpv6_option *)(((uint8_t *)option) + option->length * 8); + } + if (send) { + struct ipv6_addr global_ipv6; + memcpy(&global_ipv6, &prefix_option->prefix, sizeof(struct ipv6_addr)); + set_EUI(&global_ipv6, &task->internal_port_table[port].mac); + tx_ring_ip6(tbase, ring, IPV6_INFO_FROM_MASTER, mbuf, &global_ipv6); + } else + tx_drop(mbuf); +} + +static inline void handle_ns(struct task_base *tbase, struct rte_mbuf *mbuf) +{ + struct task_master *task = (struct task_master *)tbase; + prox_rte_ether_hdr *hdr = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *); + prox_rte_ipv6_hdr *ipv6_hdr = (prox_rte_ipv6_hdr *)(hdr + 1); + struct icmpv6_NS *neighbour_sollicitation = (struct icmpv6_NS *)(ipv6_hdr + 1); + int i, ret; + uint8_t port = get_port(mbuf); + struct rte_ring *ring = task->internal_port_table[port].ring; + + plog_dbg("Master handling Neighbour Sollicitation for ip "IPv6_BYTES_FMT" on port %d - len = %d; payload_len = %d\n", IPv6_BYTES(neighbour_sollicitation->target_address.bytes), port, rte_pktmbuf_pkt_len(mbuf), rte_be_to_cpu_16(ipv6_hdr->payload_len)); + if (rte_be_to_cpu_16(ipv6_hdr->payload_len) + sizeof(prox_rte_ipv6_hdr) + sizeof(prox_rte_ether_hdr) > rte_pktmbuf_pkt_len(mbuf)) { + plog_err("Unexpected length received: pkt_len = %d, ipv6 hdr length = %ld, ipv6 payload len = %d\n", rte_pktmbuf_pkt_len(mbuf), sizeof(prox_rte_ipv6_hdr), rte_be_to_cpu_16(ipv6_hdr->payload_len)); + tx_drop(mbuf); + return; + } + int16_t option_len = rte_be_to_cpu_16(ipv6_hdr->payload_len) - sizeof(struct icmpv6_NS) + sizeof(struct icmpv6_option); + struct icmpv6_option *option = (struct icmpv6_option *)&neighbour_sollicitation->options; + while(option_len > 0) { + uint8_t type = option->type; + switch(type) { + case ICMPv6_source_link_layer_address: + plog_dbg("Option %d = Source Link Layer Address\n", type); + break; + default: + plog_dbg("Option %d = Unknown Option\n", type); + break; + } + if ((option->length == 0) || (option->length *8 > option_len)) { + plog_err("Unexpected option length (%d) received in option %d: %d\n", option->length, option->type, option->length); + tx_drop(mbuf); + return; + } + option_len -=option->length * 8; + option = (struct icmpv6_option *)(((uint8_t *)option) + option->length * 8); + } + struct ip6_port key; + memcpy(&key.ip6, &neighbour_sollicitation->target_address, sizeof(struct ipv6_addr)); + key.port = port; + + if (memcmp(&neighbour_sollicitation->target_address, &task->internal_port_table[port].local_ipv6_addr, 8) == 0) { + // Local IP + if (task->internal_port_table[port].flags & HANDLE_RANDOM_LOCAL_IP_FLAG) { + prox_rte_ether_addr mac; + plogx_dbg("\tMaster handling NS request for ip "IPv6_BYTES_FMT" on port %d which supports random ip\n", IPv6_BYTES(key.ip6.bytes), key.port); + struct rte_ring *ring = task->internal_port_table[port].ring; + create_mac_from_EUI(&key.ip6, &mac); + build_neighbour_advertisement(tbase, mbuf, &mac, &task->internal_port_table[port].local_ipv6_addr, PROX_SOLLICITED); + tx_ring(tbase, ring, SEND_NDP_FROM_MASTER, mbuf); + return; + } + } else { + if (task->internal_port_table[port].flags & HANDLE_RANDOM_GLOBAL_IP_FLAG) { + prox_rte_ether_addr mac; + plogx_dbg("\tMaster handling NS request for ip "IPv6_BYTES_FMT" on port %d which supports random ip\n", IPv6_BYTES(key.ip6.bytes), key.port); + struct rte_ring *ring = task->internal_port_table[port].ring; + create_mac_from_EUI(&key.ip6, &mac); + build_neighbour_advertisement(tbase, mbuf, &mac, &task->internal_port_table[port].global_ipv6_addr, PROX_SOLLICITED); + tx_ring(tbase, ring, SEND_NDP_FROM_MASTER, mbuf); + return; + } + } + + ret = rte_hash_lookup(task->internal_ip6_hash, (const void *)&key); + if (unlikely(ret < 0)) { + // entry not found for this IP. + plogx_dbg("Master ignoring Neighbour Sollicitation received on un-registered IP "IPv6_BYTES_FMT" on port %d\n", IPv6_BYTES(key.ip6.bytes), port); + tx_drop(mbuf); + } else { + struct rte_ring *ring = task->internal_ip6_table[ret].ring; + build_neighbour_advertisement(tbase, mbuf, &task->internal_ip6_table[ret].mac, &key.ip6, PROX_SOLLICITED); + tx_ring(tbase, ring, SEND_NDP_FROM_MASTER, mbuf); + } +} + +static inline void handle_na(struct task_base *tbase, struct rte_mbuf *mbuf) +{ + struct task_master *task = (struct task_master *)tbase; + prox_rte_ether_hdr *hdr = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *); + prox_rte_ipv6_hdr *ipv6_hdr = (prox_rte_ipv6_hdr *)(hdr + 1); + struct icmpv6_NA *neighbour_advertisement = (struct icmpv6_NA *)(ipv6_hdr + 1); + int i, ret; + uint8_t port = get_port(mbuf); + struct rte_ring *ring = task->internal_port_table[port].ring; + + plog_dbg("Master handling Neighbour Advertisement for ip "IPv6_BYTES_FMT" on port %d - len = %d; payload_len = %d\n", IPv6_BYTES(neighbour_advertisement->destination_address.bytes), port, rte_pktmbuf_pkt_len(mbuf), rte_be_to_cpu_16(ipv6_hdr->payload_len)); + if (rte_be_to_cpu_16(ipv6_hdr->payload_len) + sizeof(prox_rte_ipv6_hdr) + sizeof(prox_rte_ether_hdr) > rte_pktmbuf_pkt_len(mbuf)) { + plog_err("Unexpected length received: pkt_len = %d, ipv6 hdr length = %ld, ipv6 payload len = %d\n", rte_pktmbuf_pkt_len(mbuf), sizeof(prox_rte_ipv6_hdr), rte_be_to_cpu_16(ipv6_hdr->payload_len)); + tx_drop(mbuf); + return; + } + int16_t option_len = rte_be_to_cpu_16(ipv6_hdr->payload_len) - sizeof(struct icmpv6_NA) + sizeof(struct icmpv6_option); + struct icmpv6_option *option = (struct icmpv6_option *)&neighbour_advertisement->options; + uint8_t *target_address = NULL; + while(option_len > 0) { + uint8_t type = option->type; + switch(type) { + case ICMPv6_source_link_layer_address: + plog_dbg("Option %d = Source Link Layer Address\n", type); + break; + case ICMPv6_target_link_layer_address: + if (option->length != 1) { + plog_err("Unexpected option length = %u for Target Link Layer Address\n", option->length); + break; + } + target_address = option->data; + plog_dbg("Option %d = Target Link Layer Address = "MAC_BYTES_FMT"\n", type, MAC_BYTES(target_address)); + break; + default: + plog_dbg("Option %d = Unknown Option\n", type); + break; + } + if ((option->length == 0) || (option->length *8 > option_len)) { + plog_err("Unexpected option length (%d) received in option %d: %d\n", option->length, option->type, option->length); + tx_drop(mbuf); + return; + } + option_len -=option->length * 8; + option = (struct icmpv6_option *)(((uint8_t *)option) + option->length * 8); + } + + if (target_address == NULL) { + tx_drop(mbuf); + } + struct ether_hdr_arp *hdr_arp = rte_pktmbuf_mtod(mbuf, struct ether_hdr_arp *); + struct ipv6_addr *key = &neighbour_advertisement->destination_address; + + ret = rte_hash_lookup(task->external_ip6_hash, (const void *)key); + if (unlikely(ret < 0)) { + // entry not found for this IP: we did not ask a request, delete the reply + tx_drop(mbuf); + } else { + // entry found for this IP + uint16_t nb_requests = task->external_ip6_table[ret].nb_requests; + //memcpy(&hdr->d_addr.addr_bytes, &task->external_ip6_table[ret].mac, sizeof(prox_rte_ether_addr)); + // If we receive a request from multiple task for the same IP, then we update all tasks + if (task->external_ip6_table[ret].nb_requests) { + rte_mbuf_refcnt_set(mbuf, nb_requests); + for (int i = 0; i < nb_requests; i++) { + struct rte_ring *ring = task->external_ip6_table[ret].rings[i]; + tx_ring_ip6_data(tbase, ring, MAC_INFO_FROM_MASTER_FOR_IPV6, mbuf, &neighbour_advertisement->destination_address, *(uint64_t *)target_address); + } + task->external_ip6_table[ret].nb_requests = 0; + } else { + tx_drop(mbuf); + } + } +} + +static inline void handle_message(struct task_base *tbase, struct rte_mbuf *mbuf, int ring_id) +{ + struct task_master *task = (struct task_master *)tbase; + struct ether_hdr_arp *hdr_arp; + prox_rte_ether_hdr *hdr; + struct icmpv6 *icmpv6; int command = get_command(mbuf); uint8_t port = get_port(mbuf); uint32_t ip; @@ -422,7 +720,7 @@ static inline void handle_message(struct task_base *tbase, struct rte_mbuf *mbuf plogx_dbg("\tMaster received %s (%x) from mbuf %p\n", actions_string[command], command, mbuf); switch(command) { - case BGP_TO_CTRL: + case BGP_TO_MASTER: if (vdev_port != NO_VDEV_PORT) { // If a virtual (net_tap) device is attached, send the (BGP) packet to this device // The kernel will receive and handle it. @@ -432,7 +730,7 @@ static inline void handle_message(struct task_base *tbase, struct rte_mbuf *mbuf } tx_drop(mbuf); break; - case ICMP_TO_CTRL: + case ICMP_TO_MASTER: if (vdev_port != NO_VDEV_PORT) { // If a virtual (net_tap) device is attached, send the (PING) packet to this device // The kernel will receive and handle it. @@ -442,7 +740,7 @@ static inline void handle_message(struct task_base *tbase, struct rte_mbuf *mbuf } handle_icmp(tbase, mbuf); break; - case ARP_TO_CTRL: + case ARP_PKT_FROM_NET_TO_MASTER: if (vdev_port != NO_VDEV_PORT) { // If a virtual (net_tap) device is attached, send the (ARP) packet to this device // The kernel will receive and handle it. @@ -450,8 +748,9 @@ static inline void handle_message(struct task_base *tbase, struct rte_mbuf *mbuf int n = rte_eth_tx_burst(prox_port_cfg[port].dpdk_mapping, 0, &mbuf, 1); return; } + hdr_arp = rte_pktmbuf_mtod(mbuf, struct ether_hdr_arp *); if (hdr_arp->ether_hdr.ether_type != ETYPE_ARP) { - plog_err("\tUnexpected message received: ARP_TO_CTRL with ether_type %x\n", hdr_arp->ether_hdr.ether_type); + plog_err("\tUnexpected message received: ARP_PKT_FROM_NET_TO_MASTER with ether_type %x\n", hdr_arp->ether_hdr.ether_type); tx_drop(mbuf); return; } else if (arp_is_gratuitous(hdr_arp)) { @@ -469,7 +768,7 @@ static inline void handle_message(struct task_base *tbase, struct rte_mbuf *mbuf return; } break; - case REQ_MAC_TO_CTRL: + case IP4_REQ_MAC_TO_MASTER: if (vdev_port != NO_VDEV_PORT) { // We send a packet to the kernel with the proper destnation IP address and our src IP address // This means that if a generator sends packets from many sources all ARP will still @@ -489,9 +788,9 @@ static inline void handle_message(struct task_base *tbase, struct rte_mbuf *mbuf int ret = rte_hash_lookup(task->external_ip_hash, (const void *)&ip); if ((ret >= 0) && (!prox_rte_is_zero_ether_addr(&task->external_ip_table[ret].mac))) { memcpy(&hdr_arp->arp.data.sha, &task->external_ip_table[ret].mac, sizeof(prox_rte_ether_addr)); - plogx_dbg("\tMaster ready to send UPDATE_FROM_CTRL ip "IPv4_BYTES_FMT" with mac "MAC_BYTES_FMT"\n", + plogx_dbg("\tMaster ready to send MAC_INFO_FROM_MASTER ip "IPv4_BYTES_FMT" with mac "MAC_BYTES_FMT"\n", IP4(ip), MAC_BYTES(hdr_arp->arp.data.sha.addr_bytes)); - tx_ring_ip(tbase, ring, UPDATE_FROM_CTRL, mbuf, ip); + tx_ring_ip(tbase, ring, MAC_INFO_FROM_MASTER, mbuf, ip); return; } @@ -508,6 +807,61 @@ static inline void handle_message(struct task_base *tbase, struct rte_mbuf *mbuf } handle_unknown_ip(tbase, mbuf); break; + case IP6_REQ_MAC_TO_MASTER: + handle_unknown_ip6(tbase, mbuf); + break; + case NDP_PKT_FROM_NET_TO_MASTER: + hdr = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *); + prox_rte_ipv6_hdr *ipv6_hdr = (prox_rte_ipv6_hdr *)(hdr + 1); + if (unlikely((hdr->ether_type != ETYPE_IPv6) || (ipv6_hdr->proto != ICMPv6))) { + // Should not happen + if (hdr->ether_type != ETYPE_IPv6) + plog_err("\tUnexpected message received: NDP_PKT_FROM_NET_TO_MASTER with ether_type %x\n", hdr->ether_type); + else + plog_err("\tUnexpected message received: NDP_PKT_FROM_NET_TO_MASTER with ether_type %x and proto %x\n", hdr->ether_type, ipv6_hdr->proto); + tx_drop(mbuf); + return; + } + icmpv6 = (struct icmpv6 *)(ipv6_hdr + 1); + switch (icmpv6->type) { + case ICMPv6_DU: + plog_err("IPV6 ICMPV6 Destination Unreachable\n"); + tx_drop(mbuf); + break; + case ICMPv6_PTB: + plog_err("IPV6 ICMPV6 packet too big\n"); + tx_drop(mbuf); + break; + case ICMPv6_TE: + plog_err("IPV6 ICMPV6 Time Exceeded\n"); + tx_drop(mbuf); + break; + case ICMPv6_PaPr: + plog_err("IPV6 ICMPV6 Parameter Problem\n"); + tx_drop(mbuf); + break; + case ICMPv6_RS: + handle_rs(tbase, mbuf); + break; + case ICMPv6_RA: + handle_ra(tbase, mbuf); + break; + case ICMPv6_NS: + handle_ns(tbase, mbuf); + break; + case ICMPv6_NA: + handle_na(tbase, mbuf); + break; + case ICMPv6_RE: + plog_err("IPV6 ICMPV6 Redirect not handled\n"); + tx_drop(mbuf); + break; + default: + plog_err("Unexpected type %d in IPV6 ICMPV6\n", icmpv6->type); + tx_drop(mbuf); + break; + } + break; default: plogx_dbg("\tMaster received unexpected message\n"); tx_drop(mbuf); @@ -517,7 +871,6 @@ static inline void handle_message(struct task_base *tbase, struct rte_mbuf *mbuf void init_ctrl_plane(struct task_base *tbase) { - prox_cfg.flags |= DSF_CTRL_PLANE_ENABLED; struct task_master *task = (struct task_master *)tbase; int socket_id = rte_lcore_to_socket_id(prox_cfg.master); uint32_t n_entries = MAX_ARP_ENTRIES * 4; @@ -527,25 +880,52 @@ void init_ctrl_plane(struct task_base *tbase) struct rte_hash_parameters hash_params = { .name = hash_name, .entries = n_entries, - .key_len = sizeof(uint32_t), .hash_func = rte_hash_crc, .hash_func_init_val = 0, }; - task->external_ip_hash = rte_hash_create(&hash_params); - PROX_PANIC(task->external_ip_hash == NULL, "Failed to set up external ip hash\n"); - plog_info("\texternal ip hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len); - task->external_ip_table = (struct external_ip_table *)prox_zmalloc(n_entries * sizeof(struct external_ip_table), socket_id); - PROX_PANIC(task->external_ip_table == NULL, "Failed to allocate memory for %u entries in external ip table\n", n_entries); - plog_info("\texternal ip table, with %d entries of size %ld\n", n_entries, sizeof(struct external_ip_table)); - - hash_name[0]++; - hash_params.key_len = sizeof(struct ip_port); - task->internal_ip_hash = rte_hash_create(&hash_params); - PROX_PANIC(task->internal_ip_hash == NULL, "Failed to set up internal ip hash\n"); - plog_info("\tinternal ip hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len); - task->internal_ip_table = (struct ip_table *)prox_zmalloc(n_entries * sizeof(struct ip_table), socket_id); - PROX_PANIC(task->internal_ip_table == NULL, "Failed to allocate memory for %u entries in internal ip table\n", n_entries); - plog_info("\tinternal ip table, with %d entries of size %ld\n", n_entries, sizeof(struct ip_table)); + if (prox_cfg.flags & DSF_L3_ENABLED) { + hash_params.key_len = sizeof(uint32_t); + task->external_ip_hash = rte_hash_create(&hash_params); + PROX_PANIC(task->external_ip_hash == NULL, "Failed to set up external ip hash\n"); + plog_info("\texternal ip hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len); + hash_name[0]++; + + task->external_ip_table = (struct external_ip_table *)prox_zmalloc(n_entries * sizeof(struct external_ip_table), socket_id); + PROX_PANIC(task->external_ip_table == NULL, "Failed to allocate memory for %u entries in external ip table\n", n_entries); + plog_info("\texternal ip table, with %d entries of size %ld\n", n_entries, sizeof(struct external_ip_table)); + + hash_params.key_len = sizeof(struct ip_port); + task->internal_ip_hash = rte_hash_create(&hash_params); + PROX_PANIC(task->internal_ip_hash == NULL, "Failed to set up internal ip hash\n"); + plog_info("\tinternal ip hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len); + hash_name[0]++; + + task->internal_ip_table = (struct ip_table *)prox_zmalloc(n_entries * sizeof(struct ip_table), socket_id); + PROX_PANIC(task->internal_ip_table == NULL, "Failed to allocate memory for %u entries in internal ip table\n", n_entries); + plog_info("\tinternal ip table, with %d entries of size %ld\n", n_entries, sizeof(struct ip_table)); + } + + if (prox_cfg.flags & DSF_NDP_ENABLED) { + hash_params.key_len = sizeof(struct ipv6_addr); + task->external_ip6_hash = rte_hash_create(&hash_params); + PROX_PANIC(task->external_ip6_hash == NULL, "Failed to set up external ip6 hash\n"); + plog_info("\texternal ip6 hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len); + hash_name[0]++; + + task->external_ip6_table = (struct external_ip_table *)prox_zmalloc(n_entries * sizeof(struct external_ip_table), socket_id); + PROX_PANIC(task->external_ip6_table == NULL, "Failed to allocate memory for %u entries in external ip6 table\n", n_entries); + plog_info("\texternal ip6_table, with %d entries of size %ld\n", n_entries, sizeof(struct external_ip_table)); + + hash_params.key_len = sizeof(struct ip6_port); + task->internal_ip6_hash = rte_hash_create(&hash_params); + PROX_PANIC(task->internal_ip6_hash == NULL, "Failed to set up internal ip6 hash\n"); + plog_info("\tinternal ip6 hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len); + hash_name[0]++; + + task->internal_ip6_table = (struct ip_table *)prox_zmalloc(n_entries * sizeof(struct ip_table), socket_id); + PROX_PANIC(task->internal_ip6_table == NULL, "Failed to allocate memory for %u entries in internal ip6 table\n", n_entries); + plog_info("\tinternal ip6 table, with %d entries of size %ld\n", n_entries, sizeof(struct ip_table)); + } int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); PROX_PANIC(fd < 0, "Failed to open netlink socket: %d\n", errno); @@ -574,7 +954,7 @@ void init_ctrl_plane(struct task_base *tbase) task->route_fds.events = POLL_IN; plog_info("\tRTMGRP_IPV4_ROUTE netlink group bound; fd = %d\n", fd); - static char name[] = "master_arp_pool"; + static char name[] = "master_arp_nd_pool"; const int NB_ARP_MBUF = 1024; const int ARP_MBUF_SIZE = 2048; const int NB_CACHE_ARP_MBUF = 256; @@ -585,7 +965,7 @@ void init_ctrl_plane(struct task_base *tbase) rte_socket_id(), NB_ARP_MBUF); plog_info("\t\tMempool %p (%s) size = %u * %u cache %u, socket %d\n", ret, name, NB_ARP_MBUF, ARP_MBUF_SIZE, NB_CACHE_ARP_MBUF, rte_socket_id()); - tbase->l3.arp_pool = ret; + tbase->l3.arp_nd_pool = ret; } static void handle_route_event(struct task_base *tbase) @@ -638,13 +1018,13 @@ static void handle_route_event(struct task_base *tbase) } } int dpdk_vdev_port = -1; - for (int i = 0; i< rte_eth_dev_count(); i++) { + for (int i = 0; i< prox_rte_eth_dev_count_avail(); i++) { if (strcmp(prox_port_cfg[i].name, interface_name) == 0) dpdk_vdev_port = i; } if (dpdk_vdev_port != -1) { plogx_info("Received netlink message on tap interface %s for IP "IPv4_BYTES_FMT"/%d, Gateway "IPv4_BYTES_FMT"\n", interface_name, IP4(ip), dst_len, IP4(gw_ip)); - int ret1 = rte_mempool_get(tbase->l3.arp_pool, (void **)mbufs); + int ret1 = rte_mempool_get(tbase->l3.arp_nd_pool, (void **)mbufs); if (unlikely(ret1 != 0)) { plog_err("Unable to allocate a mbuf for master to core communication\n"); return; @@ -726,7 +1106,7 @@ static void handle_arp_event(struct task_base *tbase) memcpy(&task->external_ip_table[ret].mac, &mac, sizeof(prox_rte_ether_addr)); // If we receive a request from multiple task for the same IP, then we update all tasks - int ret1 = rte_mempool_get(tbase->l3.arp_pool, (void **)mbufs); + int ret1 = rte_mempool_get(tbase->l3.arp_nd_pool, (void **)mbufs); if (unlikely(ret1 != 0)) { plog_err("Unable to allocate a mbuf for master to core communication\n"); return; @@ -736,8 +1116,8 @@ static void handle_arp_event(struct task_base *tbase) struct rte_ring *ring = task->external_ip_table[ret].rings[i]; struct ether_hdr_arp *hdr = rte_pktmbuf_mtod(mbufs[0], struct ether_hdr_arp *); memcpy(&hdr->arp.data.sha, &mac, sizeof(prox_rte_ether_addr)); - tx_ring_ip(tbase, ring, UPDATE_FROM_CTRL, mbufs[0], ip); - plog_dbg("UPDATE_FROM_CTRL ip "IPv4_BYTES_FMT" with mac "MAC_BYTES_FMT"\n", IP4(ip), MAC_BYTES(mac.addr_bytes)); + tx_ring_ip(tbase, ring, MAC_INFO_FROM_MASTER, mbufs[0], ip); + plog_dbg("MAC_INFO_FROM_MASTER ip "IPv4_BYTES_FMT" with mac "MAC_BYTES_FMT"\n", IP4(ip), MAC_BYTES(mac.addr_bytes)); } task->external_ip_table[ret].nb_requests = 0; return; |