diff options
author | Xavier Simonart <xavier.simonart@intel.com> | 2020-04-24 21:52:12 +0200 |
---|---|---|
committer | Xavier Simonart <xavier.simonart@intel.com> | 2020-05-29 23:28:44 +0200 |
commit | ca250755c6ecad89fc30507a4c6707eedc658f5d (patch) | |
tree | c3a573bc038ba7872e0a19b4927c1ae96803fe68 /VNFs | |
parent | fa869940dd9bb459ac599fe80c26c9d3e720fd31 (diff) |
Added support for netlink
Through this commit ARP and ICMP messages are forwarded to the kernel
when vdev tap devices are enabled, as well as PROX l3 mode.
ICMP support has also been added to master (i.e. PROX L3 mode) and to
swap (so when L3 submode is not enabled).
Change-Id: Ie6bf52cbae7171bfca041ff18651d4ec866f44cd
Signed-off-by: Xavier Simonart <xavier.simonart@intel.com>
Diffstat (limited to 'VNFs')
-rw-r--r-- | VNFs/DPPD-PROX/handle_master.c | 353 | ||||
-rw-r--r-- | VNFs/DPPD-PROX/handle_master.h | 2 | ||||
-rw-r--r-- | VNFs/DPPD-PROX/handle_swap.c | 56 | ||||
-rw-r--r-- | VNFs/DPPD-PROX/packet_utils.c | 44 | ||||
-rw-r--r-- | VNFs/DPPD-PROX/prox_args.c | 3 | ||||
-rw-r--r-- | VNFs/DPPD-PROX/prox_cfg.h | 1 | ||||
-rw-r--r-- | VNFs/DPPD-PROX/prox_compat.h | 10 | ||||
-rw-r--r-- | VNFs/DPPD-PROX/rx_pkt.c | 40 |
8 files changed, 438 insertions, 71 deletions
diff --git a/VNFs/DPPD-PROX/handle_master.c b/VNFs/DPPD-PROX/handle_master.c index de0c0d51..fcc39eb4 100644 --- a/VNFs/DPPD-PROX/handle_master.c +++ b/VNFs/DPPD-PROX/handle_master.c @@ -15,11 +15,17 @@ */ #include <fcntl.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <poll.h> #include <rte_hash.h> #include <rte_hash_crc.h> -#include "prox_cfg.h" +#include <rte_ether.h> +#include "prox_cfg.h" #include "prox_globals.h" #include "rx_pkt.h" #include "arp.h" @@ -28,7 +34,6 @@ #include "mbuf_utils.h" #include "etypes.h" #include "defaults.h" -#include "prox_cfg.h" #include "prox_malloc.h" #include "quit.h" #include "task_init.h" @@ -40,9 +45,20 @@ #include "defines.h" #define PROX_MAX_ARP_REQUESTS 32 // Maximum number of tasks requesting the same MAC address -#define SET_NON_BLOCKING(X) fcntl(X, F_SETFL, fcntl(X, F_GETFL) | O_NONBLOCK); - -const char *actions_string[] = {"UPDATE_FROM_CTRL", "SEND_ARP_REQUEST_FROM_CTRL", "SEND_ARP_REPLY_FROM_CTRL", "HANDLE_ARP_TO_CTRL", "REQ_MAC_TO_CTRL", "PKT_FROM_TAP"}; +#define NETLINK_BUF_SIZE 16384 + +static char netlink_buf[NETLINK_BUF_SIZE]; + +const char *actions_string[] = { + "UPDATE_FROM_CTRL", // Controlplane sending a MAC update to dataplane + "SEND_ARP_REQUEST_FROM_CTRL", // Controlplane requesting dataplane to send ARP request + "SEND_ARP_REPLY_FROM_CTRL", // Controlplane requesting dataplane to send ARP reply + "SEND_ICMP_FROM_CTRL", // Controlplane requesting dataplane to send ICMP message + "ARP_TO_CTRL", // ARP sent by datplane to Controlpane for handling + "ICMP_TO_CTRL", // ICMP sent by datplane to Controlpane for handling + "REQ_MAC_TO_CTRL", // Dataplane requesting MAC resolution to Controlplane + "PKT_FROM_TAP" // Packet received by Controlplane from kernel and forwarded to dataplane for sending +}; static struct my_arp_t arp_reply = { .htype = 0x100, @@ -76,6 +92,10 @@ struct port_table { uint32_t ip; uint8_t port; uint8_t flags; + uint64_t last_echo_req_rcvd_tsc; + uint64_t last_echo_rep_rcvd_tsc; + uint32_t n_echo_req; + uint32_t n_echo_rep; }; struct task_master { @@ -89,6 +109,7 @@ struct task_master { struct port_table internal_port_table[PROX_MAX_PORTS]; struct vdev all_vdev[PROX_MAX_PORTS]; int max_vdev_id; + struct pollfd arp_fds; }; struct ip_port { @@ -121,15 +142,23 @@ void master_init_vdev(struct task_base *tbase, uint8_t port_id, uint8_t core_id, { struct task_master *task = (struct task_master *)tbase; uint8_t vdev_port = prox_port_cfg[port_id].dpdk_mapping; - int rc; + int rc, i; if (vdev_port != NO_VDEV_PORT) { + for (i = 0; i < task->max_vdev_id; i++) { + if (task->all_vdev[i].port_id == vdev_port) + break; + } + if (i < task->max_vdev_id) { + // Already initialized (e.g. by another core handling the same port). + return; + } task->all_vdev[task->max_vdev_id].port_id = vdev_port; task->all_vdev[task->max_vdev_id].ring = task->ctrl_tx_rings[core_id * MAX_TASKS_PER_CORE + task_id]; struct sockaddr_in dst, src; src.sin_family = AF_INET; src.sin_addr.s_addr = prox_port_cfg[vdev_port].ip; - src.sin_port = 5000; + src.sin_port = rte_cpu_to_be_16(5000); int fd = socket(AF_INET, SOCK_DGRAM, 0); PROX_PANIC(fd < 0, "Failed to open socket(AF_INET, SOCK_DGRAM, 0)\n"); @@ -137,7 +166,7 @@ void master_init_vdev(struct task_base *tbase, uint8_t port_id, uint8_t core_id, rc = bind(fd,(struct sockaddr *)&src, sizeof(struct sockaddr_in)); PROX_PANIC(rc, "Failed to bind("IPv4_BYTES_FMT":%d): errno = %d\n", IPv4_BYTES(((uint8_t*)&src.sin_addr.s_addr)), src.sin_port, errno); plog_info("DPDK port %d bound("IPv4_BYTES_FMT":%d) to fd %d\n", port_id, IPv4_BYTES(((uint8_t*)&src.sin_addr.s_addr)), src.sin_port, fd); - SET_NON_BLOCKING(fd); + fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_NONBLOCK); task->max_vdev_id++; } } @@ -146,10 +175,10 @@ void register_ip_to_ctrl_plane(struct task_base *tbase, uint32_t ip, uint8_t por { struct task_master *task = (struct task_master *)tbase; struct ip_port key; - plogx_info("\tregistering IP %d.%d.%d.%d with port %d core %d and task %d\n", IP4(ip), port_id, core_id, task_id); + plogx_info("\tregistering IP "IPv4_BYTES_FMT" with port %d core %d and task %d\n", IP4(ip), port_id, core_id, task_id); if (port_id >= PROX_MAX_PORTS) { - plog_err("Unable to register ip %d.%d.%d.%d, port %d\n", IP4(ip), port_id); + plog_err("Unable to register ip "IPv4_BYTES_FMT", port %d\n", IP4(ip), port_id); return; } @@ -157,7 +186,7 @@ void register_ip_to_ctrl_plane(struct task_base *tbase, uint32_t ip, uint8_t por Remove them when such cores are stopped and de-register IP */ task->internal_port_table[port_id].ring = task->ctrl_tx_rings[core_id * MAX_TASKS_PER_CORE + task_id]; - memcpy(&task->internal_port_table[port_id].mac, &prox_port_cfg[port_id].eth_addr, 6); + memcpy(&task->internal_port_table[port_id].mac, &prox_port_cfg[port_id].eth_addr, sizeof(prox_rte_ether_addr)); task->internal_port_table[port_id].ip = ip; if (ip == RANDOM_IP) { @@ -169,10 +198,10 @@ void register_ip_to_ctrl_plane(struct task_base *tbase, uint32_t ip, uint8_t por key.port = port_id; int ret = rte_hash_add_key(task->internal_ip_hash, (const void *)&key); if (unlikely(ret < 0)) { - plog_err("Unable to register ip %d.%d.%d.%d\n", IP4(ip)); + plog_err("Unable to register ip "IPv4_BYTES_FMT"\n", IP4(ip)); return; } - memcpy(&task->internal_ip_table[ret].mac, &prox_port_cfg[port_id].eth_addr, 6); + memcpy(&task->internal_ip_table[ret].mac, &prox_port_cfg[port_id].eth_addr, sizeof(prox_rte_ether_addr)); task->internal_ip_table[ret].ring = task->ctrl_tx_rings[core_id * MAX_TASKS_PER_CORE + task_id]; } @@ -183,7 +212,7 @@ static inline void handle_arp_reply(struct task_base *tbase, struct rte_mbuf *mb struct ether_hdr_arp *hdr_arp = rte_pktmbuf_mtod(mbuf, struct ether_hdr_arp *); int i, ret; uint32_t key = hdr_arp->arp.data.spa; - plogx_dbg("\tMaster handling ARP reply for ip %d.%d.%d.%d\n", IP4(key)); + plogx_dbg("\tMaster handling ARP reply for ip "IPv4_BYTES_FMT"\n", IP4(key)); ret = rte_hash_lookup(task->external_ip_hash, (const void *)&key); if (unlikely(ret < 0)) { @@ -192,7 +221,6 @@ static inline void handle_arp_reply(struct task_base *tbase, struct rte_mbuf *mb } else { // entry found for this IP uint16_t nb_requests = task->external_ip_table[ret].nb_requests; - memcpy(&hdr_arp->ether_hdr.d_addr.addr_bytes, &task->external_ip_table[ret].mac, 6); // If we receive a request from multiple task for the same IP, then we update all tasks if (task->external_ip_table[ret].nb_requests) { rte_mbuf_refcnt_set(mbuf, nb_requests); @@ -219,7 +247,7 @@ static inline void handle_arp_request(struct task_base *tbase, struct rte_mbuf * key.port = port; if (task->internal_port_table[port].flags & HANDLE_RANDOM_IP_FLAG) { prox_rte_ether_addr mac; - plogx_dbg("\tMaster handling ARP request for ip %d.%d.%d.%d on port %d which supports random ip\n", IP4(key.ip), key.port); + plogx_dbg("\tMaster handling ARP request for ip "IPv4_BYTES_FMT" on port %d which supports random ip\n", IP4(key.ip), key.port); struct rte_ring *ring = task->internal_port_table[port].ring; create_mac(hdr_arp, &mac); mbuf->ol_flags &= ~(PKT_TX_IP_CKSUM|PKT_TX_UDP_CKSUM); @@ -228,12 +256,12 @@ static inline void handle_arp_request(struct task_base *tbase, struct rte_mbuf * return; } - plogx_dbg("\tMaster handling ARP request for ip %d.%d.%d.%d\n", IP4(key.ip)); + plogx_dbg("\tMaster handling ARP request for ip "IPv4_BYTES_FMT"\n", IP4(key.ip)); ret = rte_hash_lookup(task->internal_ip_hash, (const void *)&key); if (unlikely(ret < 0)) { // entry not found for this IP. - plogx_dbg("Master ignoring ARP REQUEST received on un-registered IP %d.%d.%d.%d on port %d\n", IP4(hdr_arp->arp.data.tpa), port); + plogx_dbg("Master ignoring ARP REQUEST received on un-registered IP "IPv4_BYTES_FMT" on port %d\n", IP4(hdr_arp->arp.data.tpa), port); tx_drop(mbuf); } else { struct rte_ring *ring = task->internal_ip_table[ret].ring; @@ -243,15 +271,46 @@ static inline void handle_arp_request(struct task_base *tbase, struct rte_mbuf * } } +static inline int record_request(struct task_base *tbase, uint32_t ip_dst, uint8_t port, struct rte_ring *ring) +{ + struct task_master *task = (struct task_master *)tbase; + int ret = rte_hash_add_key(task->external_ip_hash, (const void *)&ip_dst); + int i; + + if (unlikely(ret < 0)) { + // entry not found for this IP: delete the reply + plogx_dbg("Unable to add IP "IPv4_BYTES_FMT" in external_ip_hash\n", IP4(ip_dst)); + return -1; + } + + // If multiple tasks requesting the same info, we will need to send a reply to all of them + // However if one task sends multiple requests to the same IP (e.g. because it is not answering) + // then we should not send multiple replies to the same task + if (task->external_ip_table[ret].nb_requests >= PROX_MAX_ARP_REQUESTS) { + // This can only happen if really many tasks requests the same IP + plogx_dbg("Unable to add request for IP "IPv4_BYTES_FMT" in external_ip_table\n", IP4(ip_dst)); + return -1; + } + for (i = 0; i < task->external_ip_table[ret].nb_requests; i++) { + if (task->external_ip_table[ret].rings[i] == ring) + break; + } + if (i >= task->external_ip_table[ret].nb_requests) { + // If this is a new request i.e. a new task requesting a new IP + task->external_ip_table[ret].rings[task->external_ip_table[ret].nb_requests] = ring; + task->external_ip_table[ret].nb_requests++; + } + return 0; +} + static inline void handle_unknown_ip(struct task_base *tbase, struct rte_mbuf *mbuf) { struct task_master *task = (struct task_master *)tbase; struct ether_hdr_arp *hdr_arp = rte_pktmbuf_mtod(mbuf, struct ether_hdr_arp *); uint8_t port = get_port(mbuf); uint32_t ip_dst = get_ip(mbuf); - int ret1, ret2, i; - plogx_dbg("\tMaster handling unknown ip %d.%d.%d.%d for port %d\n", IP4(ip_dst), port); + plogx_dbg("\tMaster handling unknown ip "IPv4_BYTES_FMT" for port %d\n", IP4(ip_dst), port); if (unlikely(port >= PROX_MAX_PORTS)) { plogx_dbg("Port %d not found", port); tx_drop(mbuf); @@ -266,40 +325,85 @@ static inline void handle_unknown_ip(struct task_base *tbase, struct rte_mbuf *m return; } - ret2 = rte_hash_add_key(task->external_ip_hash, (const void *)&ip_dst); - if (unlikely(ret2 < 0)) { - // entry not found for this IP: delete the reply - plogx_dbg("Unable to add IP %d.%d.%d.%d in external_ip_hash\n", IP4(ip_dst)); + if (record_request(tbase, ip_dst, port, ring) < 0) { tx_drop(mbuf); return; } + // We send an ARP request even if one was just sent (and not yet answered) by another task + mbuf->ol_flags &= ~(PKT_TX_IP_CKSUM|PKT_TX_UDP_CKSUM); + build_arp_request(mbuf, &task->internal_port_table[port].mac, ip_dst, ip_src); + tx_ring(tbase, ring, ARP_REQ_FROM_CTRL, mbuf); +} - // If multiple tasks requesting the same info, we will need to send a reply to all of them - // However if one task sends multiple requests to the same IP (e.g. because it is not answering) - // then we should not send multiple replies to the same task - if (task->external_ip_table[ret2].nb_requests >= PROX_MAX_ARP_REQUESTS) { - // This can only happen if really many tasks requests the same IP - plogx_dbg("Unable to add request for IP %d.%d.%d.%d in external_ip_table\n", IP4(ip_dst)); +static inline void build_icmp_reply_message(struct task_base *tbase, struct rte_mbuf *mbuf) +{ + struct task_master *task = (struct task_master *)tbase; + struct ip_port key; + key.port = mbuf->port; + prox_rte_ether_hdr *hdr = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *); + prox_rte_ether_addr dst_mac; + prox_rte_ether_addr_copy(&hdr->s_addr, &dst_mac); + prox_rte_ether_addr_copy(&hdr->d_addr, &hdr->s_addr); + prox_rte_ether_addr_copy(&dst_mac, &hdr->d_addr); + prox_rte_ipv4_hdr *ip_hdr = (prox_rte_ipv4_hdr *)(hdr + 1); + key.ip = ip_hdr->dst_addr; + ip_hdr->dst_addr = ip_hdr->src_addr; + ip_hdr->src_addr = key.ip; + prox_rte_icmp_hdr *picmp = (prox_rte_icmp_hdr *)(ip_hdr + 1); + picmp->icmp_type = PROX_RTE_IP_ICMP_ECHO_REPLY; + + int ret = rte_hash_lookup(task->internal_ip_hash, (const void *)&key); + if (unlikely(ret < 0)) { + // entry not found for this IP. + plogx_dbg("Master ignoring ICMP received on un-registered IP "IPv4_BYTES_FMT" on port %d\n", IP4(key.ip), mbuf->port); + tx_drop(mbuf); + } else { + struct rte_ring *ring = task->internal_ip_table[ret].ring; + mbuf->ol_flags &= ~(PKT_TX_IP_CKSUM|PKT_TX_UDP_CKSUM); + tx_ring(tbase, ring, ICMP_FROM_CTRL, mbuf); + } +} + +static inline void handle_icmp(struct task_base *tbase, struct rte_mbuf *mbuf) +{ + struct task_master *task = (struct task_master *)tbase; + uint8_t port_id = mbuf->port; + struct port_table *port = &task->internal_port_table[port_id]; + prox_rte_ether_hdr *hdr = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *); + if (hdr->ether_type != ETYPE_IPv4) { tx_drop(mbuf); return; } - for (i = 0; i < task->external_ip_table[ret2].nb_requests; i++) { - if (task->external_ip_table[ret2].rings[i] == ring) - break; + prox_rte_ipv4_hdr *ip_hdr = (prox_rte_ipv4_hdr *)(hdr + 1); + if (ip_hdr->next_proto_id != IPPROTO_ICMP) { + tx_drop(mbuf); + return; } - if (i >= task->external_ip_table[ret2].nb_requests) { - // If this is a new request i.e. a new task requesting a new IP - task->external_ip_table[ret2].rings[task->external_ip_table[ret2].nb_requests] = ring; - task->external_ip_table[ret2].nb_requests++; - // Only needed for first request - but avoid test and copy the same 6 bytes - // In most cases we will only have one request per IP. - memcpy(&task->external_ip_table[ret2].mac, &task->internal_port_table[port].mac, 6); + if (ip_hdr->dst_addr != port->ip) { + tx_drop(mbuf); + return; } - // We send an ARP request even if one was just sent (and not yet answered) by another task - mbuf->ol_flags &= ~(PKT_TX_IP_CKSUM|PKT_TX_UDP_CKSUM); - build_arp_request(mbuf, &task->internal_port_table[port].mac, ip_dst, ip_src); - tx_ring(tbase, ring, ARP_REQ_FROM_CTRL, mbuf); + prox_rte_icmp_hdr *picmp = (prox_rte_icmp_hdr *)(ip_hdr + 1); + uint8_t type = picmp->icmp_type; + if (type == PROX_RTE_IP_ICMP_ECHO_REQUEST) { + port->n_echo_req++; + if (rte_rdtsc() - port->last_echo_req_rcvd_tsc > rte_get_tsc_hz()) { + plog_dbg("Received %u Echo Request on IP "IPv4_BYTES_FMT" (last received from IP "IPv4_BYTES_FMT")\n", port->n_echo_req, IPv4_BYTES(((uint8_t*)&ip_hdr->dst_addr)), IPv4_BYTES(((uint8_t*)&ip_hdr->src_addr))); + port->n_echo_req = 0; + port->last_echo_req_rcvd_tsc = rte_rdtsc(); + } + build_icmp_reply_message(tbase, mbuf); + } else if (type == PROX_RTE_IP_ICMP_ECHO_REPLY) { + port->n_echo_rep++; + if (rte_rdtsc() - port->last_echo_rep_rcvd_tsc > rte_get_tsc_hz()) { + plog_info("Received %u Echo Reply on IP "IPv4_BYTES_FMT" (last received from IP "IPv4_BYTES_FMT")\n", port->n_echo_rep, IPv4_BYTES(((uint8_t*)&ip_hdr->dst_addr)), IPv4_BYTES(((uint8_t*)&ip_hdr->src_addr))); + port->n_echo_rep = 0; + port->last_echo_rep_rcvd_tsc = rte_rdtsc(); + } + } + tx_drop(mbuf); + return; } static inline void handle_message(struct task_base *tbase, struct rte_mbuf *mbuf, int ring_id) @@ -313,16 +417,27 @@ static inline void handle_message(struct task_base *tbase, struct rte_mbuf *mbuf plogx_dbg("\tMaster received %s (%x) from mbuf %p\n", actions_string[command], command, mbuf); switch(command) { + case ICMP_TO_CTRL: + if (vdev_port != NO_VDEV_PORT) { + // If a virtual (net_tap) device is attached, send the (PING) packet to this device + // The kernel will receive and handle it. + plogx_dbg("\tMaster forwarding packet to TAP\n"); + int n = rte_eth_tx_burst(prox_port_cfg[port].dpdk_mapping, 0, &mbuf, 1); + return; + } + handle_icmp(tbase, mbuf); + break; case ARP_TO_CTRL: if (vdev_port != NO_VDEV_PORT) { // If a virtual (net_tap) device is attached, send the (ARP) packet to this device // The kernel will receive and handle it. + plogx_dbg("\tMaster forwarding packet to TAP\n"); int n = rte_eth_tx_burst(prox_port_cfg[port].dpdk_mapping, 0, &mbuf, 1); return; } if (hdr_arp->ether_hdr.ether_type != ETYPE_ARP) { - tx_drop(mbuf); plog_err("\tUnexpected message received: ARP_TO_CTRL with ether_type %x\n", hdr_arp->ether_hdr.ether_type); + tx_drop(mbuf); return; } else if (arp_is_gratuitous(hdr_arp)) { plog_info("\tReceived gratuitous packet \n"); @@ -346,18 +461,34 @@ static inline void handle_message(struct task_base *tbase, struct rte_mbuf *mbuf // be sent from the same IP src. This might be a limitation. // This prevent to have to open as many sockets as there are sources MAC addresses // We also always use the same UDP ports - as the packet will finally not leave the system anyhow - // Content of udp might be garbage - we do not care. - prox_rte_ether_hdr *hdr = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *); - prox_rte_ipv4_hdr *ip_hdr = (prox_rte_ipv4_hdr *)(hdr + 1); - prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(ip_hdr + 1); + struct ether_hdr_arp *hdr_arp = rte_pktmbuf_mtod(mbuf, struct ether_hdr_arp *); + uint32_t ip = get_ip(mbuf); + struct rte_ring *ring = task->ctrl_tx_rings[get_core(mbuf) * MAX_TASKS_PER_CORE + get_task(mbuf)]; + + // First check whether MAC address is not already in kernel MAC table. + // If present in our hash with a non-null MAC, then present in kernel. A null MAC + // might just mean that we sent a request. + // If MAC present in kernel, do not send a packet towards the kernel to try to generate + // an ARP request, as the kernel would not generate it. + int ret = rte_hash_lookup(task->external_ip_hash, (const void *)&ip); + if ((ret >= 0) && (!prox_rte_is_zero_ether_addr(&task->external_ip_table[ret].mac))) { + memcpy(&hdr_arp->arp.data.sha, &task->external_ip_table[ret].mac, sizeof(prox_rte_ether_addr)); + plogx_dbg("\tMaster ready to send UPDATE_FROM_CTRL ip "IPv4_BYTES_FMT" with mac "MAC_BYTES_FMT"\n", + IP4(ip), MAC_BYTES(hdr_arp->arp.data.sha.addr_bytes)); + tx_ring_ip(tbase, ring, UPDATE_FROM_CTRL, mbuf, ip); + return; + } struct sockaddr_in dst; dst.sin_family = AF_INET; - dst.sin_addr.s_addr = ip_hdr->dst_addr; - dst.sin_port = 5000; - int n = sendto(prox_port_cfg[vdev_port].fd, (char*)(udp + 1), 18, 0, (struct sockaddr *)&dst, sizeof(struct sockaddr_in)); - plog_dbg("Sent %d bytes to "IPv4_BYTES_FMT" using fd %d\n", n, IPv4_BYTES(((uint8_t*)&ip_hdr->dst_addr)), prox_port_cfg[vdev_port].fd); + dst.sin_addr.s_addr = ip; + dst.sin_port = rte_cpu_to_be_16(5000); + int n = sendto(prox_port_cfg[vdev_port].fd, (char*)(&ip), 0, 0, (struct sockaddr *)&dst, sizeof(struct sockaddr_in)); + plogx_dbg("\tSent %d bytes to TAP IP "IPv4_BYTES_FMT" using fd %d\n", n, IPv4_BYTES(((uint8_t*)&ip)), prox_port_cfg[vdev_port].fd); + + record_request(tbase, ip, port, ring); + tx_drop(mbuf); break; } handle_unknown_ip(tbase, mbuf); @@ -373,9 +504,10 @@ void init_ctrl_plane(struct task_base *tbase) { prox_cfg.flags |= DSF_CTRL_PLANE_ENABLED; struct task_master *task = (struct task_master *)tbase; - int socket = rte_lcore_to_socket_id(prox_cfg.master); + int socket_id = rte_lcore_to_socket_id(prox_cfg.master); uint32_t n_entries = MAX_ARP_ENTRIES * 4; static char hash_name[30]; + sprintf(hash_name, "A%03d_hash_arp_table", prox_cfg.master); struct rte_hash_parameters hash_params = { .name = hash_name, @@ -387,7 +519,7 @@ void init_ctrl_plane(struct task_base *tbase) task->external_ip_hash = rte_hash_create(&hash_params); PROX_PANIC(task->external_ip_hash == NULL, "Failed to set up external ip hash\n"); plog_info("\texternal ip hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len); - task->external_ip_table = (struct external_ip_table *)prox_zmalloc(n_entries * sizeof(struct external_ip_table), socket); + task->external_ip_table = (struct external_ip_table *)prox_zmalloc(n_entries * sizeof(struct external_ip_table), socket_id); PROX_PANIC(task->external_ip_table == NULL, "Failed to allocate memory for %u entries in external ip table\n", n_entries); plog_info("\texternal ip table, with %d entries of size %ld\n", n_entries, sizeof(struct external_ip_table)); @@ -396,9 +528,35 @@ void init_ctrl_plane(struct task_base *tbase) task->internal_ip_hash = rte_hash_create(&hash_params); PROX_PANIC(task->internal_ip_hash == NULL, "Failed to set up internal ip hash\n"); plog_info("\tinternal ip hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len); - task->internal_ip_table = (struct ip_table *)prox_zmalloc(n_entries * sizeof(struct ip_table), socket); + task->internal_ip_table = (struct ip_table *)prox_zmalloc(n_entries * sizeof(struct ip_table), socket_id); PROX_PANIC(task->internal_ip_table == NULL, "Failed to allocate memory for %u entries in internal ip table\n", n_entries); plog_info("\tinternal ip table, with %d entries of size %ld\n", n_entries, sizeof(struct ip_table)); + + int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + PROX_PANIC(fd < 0, "Failed to open netlink socket: %d\n", errno); + fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_NONBLOCK); + + struct sockaddr_nl sockaddr; + memset(&sockaddr, 0, sizeof(struct sockaddr_nl)); + sockaddr.nl_family = AF_NETLINK; + sockaddr.nl_groups = RTMGRP_NEIGH | RTMGRP_NOTIFY; + int rc = bind(fd, (struct sockaddr *)&sockaddr, sizeof(struct sockaddr_nl)); + PROX_PANIC(rc < 0, "Failed to bind to RTMGRP_NEIGH netlink group\n"); + task->arp_fds.fd = fd; + task->arp_fds.events = POLL_IN; + plog_info("\tRTMGRP_NEIGH netlink group bound; fd = %d\n", fd); + static char name[] = "master_arp_pool"; + const int NB_ARP_MBUF = 1024; + const int ARP_MBUF_SIZE = 2048; + const int NB_CACHE_ARP_MBUF = 256; + struct rte_mempool *ret = rte_mempool_create(name, NB_ARP_MBUF, ARP_MBUF_SIZE, NB_CACHE_ARP_MBUF, + sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, 0, + rte_socket_id(), 0); + PROX_PANIC(ret == NULL, "Failed to allocate ARP memory pool on socket %u with %u elements\n", + rte_socket_id(), NB_ARP_MBUF); + plog_info("\t\tMempool %p (%s) size = %u * %u cache %u, socket %d\n", ret, name, NB_ARP_MBUF, + ARP_MBUF_SIZE, NB_CACHE_ARP_MBUF, rte_socket_id()); + tbase->l3.arp_pool = ret; } static int handle_ctrl_plane_f(struct task_base *tbase, __attribute__((unused)) struct rte_mbuf **mbuf, uint16_t n_pkts) @@ -424,6 +582,89 @@ static int handle_ctrl_plane_f(struct task_base *tbase, __attribute__((unused)) } ret +=n; } + if (poll(&task->arp_fds, 1, prox_cfg.poll_timeout) == POLL_IN) { + struct nlmsghdr * nl_hdr; + int fd = task->arp_fds.fd; + int len; + uint32_t ip = 0; + prox_rte_ether_addr mac; + memset(&mac, 0, sizeof(mac)); + len = recv(fd, netlink_buf, sizeof(netlink_buf), 0); + if (len < 0) { + plog_err("Failed to recv from netlink: %d\n", errno); + return errno; + } + nl_hdr = (struct nlmsghdr *)netlink_buf; + if (nl_hdr->nlmsg_flags & NLM_F_MULTI) { + plog_err("Unexpected multipart netlink message\n"); + return -1; + } + if ((nl_hdr->nlmsg_type != RTM_NEWNEIGH) && (nl_hdr->nlmsg_type != RTM_DELNEIGH)) + return 0; + + struct ndmsg *ndmsg = (struct ndmsg *)NLMSG_DATA(nl_hdr); + int ndm_family = ndmsg->ndm_family; + struct rtattr *rta = (struct rtattr *)RTM_RTA(ndmsg); + int rtl = RTM_PAYLOAD(nl_hdr); + for (; RTA_OK(rta, rtl); rta = RTA_NEXT(rta, rtl)) { + switch (rta->rta_type) { + case NDA_DST: + ip = *((uint32_t *)RTA_DATA(rta)); + break; + case NDA_LLADDR: + mac = *((prox_rte_ether_addr *)(uint64_t *)RTA_DATA(rta)); + break; + default: + break; + } + } + ret = rte_hash_lookup(task->external_ip_hash, (const void *)&ip); + if (unlikely(ret < 0)) { + // entry not found for this IP: we did not ask a request. + // This can happen if the kernel updated the ARP table when receiving an ARP_REQUEST + // We must record this, as the ARP entry is now in the kernel table + if (prox_rte_is_zero_ether_addr(&mac)) { + // Timeout or MAC deleted from kernel MAC table + int ret = rte_hash_del_key(task->external_ip_hash, (const void *)&ip); + plogx_dbg("ip "IPv4_BYTES_FMT" removed from external_ip_hash\n", IP4(ip)); + return 0; + } + int ret = rte_hash_add_key(task->external_ip_hash, (const void *)&ip); + if (unlikely(ret < 0)) { + // entry not found for this IP: Ignore the reply. This can happen for instance for + // an IP used by management plane. + plogx_dbg("IP "IPv4_BYTES_FMT" not found in external_ip_hash and unable to add it\n", IP4(ip)); + return -1; + } + memcpy(&task->external_ip_table[ret].mac, &mac, sizeof(prox_rte_ether_addr)); + plogx_dbg("ip "IPv4_BYTES_FMT" added in external_ip_hash with mac "MAC_BYTES_FMT"\n", IP4(ip), MAC_BYTES(mac.addr_bytes)); + return 0; + } + + // entry found for this IP + uint16_t nb_requests = task->external_ip_table[ret].nb_requests; + if (nb_requests == 0) { + return 0; + } + + memcpy(&task->external_ip_table[ret].mac, &mac, sizeof(prox_rte_ether_addr)); + + // If we receive a request from multiple task for the same IP, then we update all tasks + ret = rte_mempool_get(tbase->l3.arp_pool, (void **)mbufs); + if (unlikely(ret != 0)) { + plog_err("Unable to allocate a mbuf for master to core communication\n"); + return -1; + } + rte_mbuf_refcnt_set(mbufs[0], nb_requests); + for (int i = 0; i < nb_requests; i++) { + struct rte_ring *ring = task->external_ip_table[ret].rings[i]; + struct ether_hdr_arp *hdr = rte_pktmbuf_mtod(mbufs[0], struct ether_hdr_arp *); + memcpy(&hdr->arp.data.sha, &mac, sizeof(prox_rte_ether_addr)); + tx_ring_ip(tbase, ring, UPDATE_FROM_CTRL, mbufs[0], ip); + plog_dbg("UPDATE_FROM_CTRL ip "IPv4_BYTES_FMT" with mac "MAC_BYTES_FMT"\n", IP4(ip), MAC_BYTES(mac.addr_bytes)); + } + task->external_ip_table[ret].nb_requests = 0; + } return ret; } diff --git a/VNFs/DPPD-PROX/handle_master.h b/VNFs/DPPD-PROX/handle_master.h index 7772dd32..6ce51854 100644 --- a/VNFs/DPPD-PROX/handle_master.h +++ b/VNFs/DPPD-PROX/handle_master.h @@ -21,7 +21,9 @@ enum arp_actions { UPDATE_FROM_CTRL, ARP_REQ_FROM_CTRL, ARP_REPLY_FROM_CTRL, + ICMP_FROM_CTRL, ARP_TO_CTRL, + ICMP_TO_CTRL, REQ_MAC_TO_CTRL, PKT_FROM_TAP, MAX_ACTIONS diff --git a/VNFs/DPPD-PROX/handle_swap.c b/VNFs/DPPD-PROX/handle_swap.c index 457c2fac..a7a153a4 100644 --- a/VNFs/DPPD-PROX/handle_swap.c +++ b/VNFs/DPPD-PROX/handle_swap.c @@ -1,5 +1,5 @@ /* -// Copyright (c) 2010-2017 Intel Corporation +// Copyright (c) 2010-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -26,6 +26,7 @@ #include "qinq.h" #include "gre.h" #include "prefetch.h" +#include "defines.h" #include "igmp.h" #include "prox_cksum.h" #include "prox_compat.h" @@ -38,6 +39,10 @@ struct task_swap { uint8_t src_dst_mac[12]; uint32_t local_ipv4; int offload_crc; + uint64_t last_echo_req_rcvd_tsc; + uint64_t last_echo_rep_rcvd_tsc; + uint32_t n_echo_req; + uint32_t n_echo_rep; }; #define NB_IGMP_MBUF 1024 @@ -79,6 +84,21 @@ static inline void build_mcast_mac(uint32_t ip, prox_rte_ether_addr *dst_mac) memcpy(dst_mac, &mac, sizeof(prox_rte_ether_addr)); } +static inline void build_icmp_reply_message(struct task_base *tbase, struct rte_mbuf *mbuf) +{ + struct task_swap *task = (struct task_swap *)tbase; + prox_rte_ether_hdr *hdr = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *); + prox_rte_ether_addr dst_mac; + prox_rte_ether_addr_copy(&hdr->s_addr, &dst_mac); + prox_rte_ether_addr_copy(&hdr->d_addr, &hdr->s_addr); + prox_rte_ether_addr_copy(&dst_mac, &hdr->d_addr); + prox_rte_ipv4_hdr *ip_hdr = (prox_rte_ipv4_hdr *)(hdr + 1); + ip_hdr->dst_addr = ip_hdr->src_addr; + ip_hdr->src_addr = task->local_ipv4; + prox_rte_icmp_hdr *picmp = (prox_rte_icmp_hdr *)(ip_hdr + 1); + picmp->icmp_type = PROX_RTE_IP_ICMP_ECHO_REPLY; +} + static inline void build_igmp_message(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t ip, uint8_t igmp_message) { struct task_swap *task = (struct task_swap *)tbase; @@ -131,6 +151,7 @@ static int handle_swap_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, ui prox_rte_vlan_hdr *vlan; uint16_t j; struct igmpv2_hdr *pigmp; + prox_rte_icmp_hdr *picmp; uint8_t type; for (j = 0; j < n_pkts; ++j) { @@ -239,6 +260,39 @@ static int handle_swap_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, ui udp_hdr->src_port = port; write_src_and_dst_mac(task, mbufs[j]); break; + case IPPROTO_ICMP: + picmp = (prox_rte_icmp_hdr *)(ip_hdr + 1); + type = picmp->icmp_type; + if (type == PROX_RTE_IP_ICMP_ECHO_REQUEST) { + if (ip_hdr->dst_addr == task->local_ipv4) { + task->n_echo_req++; + if (rte_rdtsc() - task->last_echo_req_rcvd_tsc > rte_get_tsc_hz()) { + plog_info("Received %u Echo Request on IP "IPv4_BYTES_FMT" (last received from IP "IPv4_BYTES_FMT")\n", task->n_echo_req, IPv4_BYTES(((uint8_t*)&ip_hdr->dst_addr)), IPv4_BYTES(((uint8_t*)&ip_hdr->src_addr))); + task->n_echo_req = 0; + task->last_echo_req_rcvd_tsc = rte_rdtsc(); + } + build_icmp_reply_message(tbase, mbufs[j]); + } else { + out[j] = OUT_DISCARD; + continue; + } + } else if (type == PROX_RTE_IP_ICMP_ECHO_REPLY) { + if (ip_hdr->dst_addr == task->local_ipv4) { + task->n_echo_rep++; + if (rte_rdtsc() - task->last_echo_rep_rcvd_tsc > rte_get_tsc_hz()) { + plog_info("Received %u Echo Reply on IP "IPv4_BYTES_FMT" (last received from IP "IPv4_BYTES_FMT")\n", task->n_echo_rep, IPv4_BYTES(((uint8_t*)&ip_hdr->dst_addr)), IPv4_BYTES(((uint8_t*)&ip_hdr->src_addr))); + task->n_echo_rep = 0; + task->last_echo_rep_rcvd_tsc = rte_rdtsc(); + } + } else { + out[j] = OUT_DISCARD; + continue; + } + } else { + out[j] = OUT_DISCARD; + continue; + } + break; case IPPROTO_IGMP: pigmp = (struct igmpv2_hdr *)(ip_hdr + 1); // TODO: check packet len diff --git a/VNFs/DPPD-PROX/packet_utils.c b/VNFs/DPPD-PROX/packet_utils.c index b0bc6da9..fb467555 100644 --- a/VNFs/DPPD-PROX/packet_utils.c +++ b/VNFs/DPPD-PROX/packet_utils.c @@ -21,6 +21,7 @@ #include "lconf.h" #include "prefetch.h" #include "log.h" +#include "defines.h" #include "handle_master.h" #include "prox_port_cfg.h" @@ -277,6 +278,29 @@ void task_set_local_ip(struct task_base *tbase, uint32_t ip) tbase->local_ipv4 = ip; } +static void reset_arp_update_time(struct l3_base *l3, uint32_t ip) +{ + uint32_t idx; + plogx_info("\tMAC entry for IP "IPv4_BYTES_FMT" timeout in kernel\n", IP4(ip)); + if (ip == l3->gw.ip) { + l3->gw.arp_update_time = 0; + } else if (l3->n_pkts < 4) { + for (idx = 0; idx < l3->n_pkts; idx++) { + uint32_t ip_dst = l3->optimized_arp_table[idx].ip; + if (ip_dst == ip) + break; + } + if (idx < l3->n_pkts) { + l3->optimized_arp_table[idx].arp_update_time = 0; + } + } else { + int ret = rte_hash_lookup(l3->ip_hash, (const void *)&ip); + if (ret >= 0) + l3->arp_table[ret].arp_update_time = 0; + } + return; +} + void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts) { uint8_t out[1]; @@ -287,6 +311,7 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui struct ether_hdr_arp *hdr; struct l3_base *l3 = &tbase->l3; uint64_t tsc= rte_rdtsc(); + uint64_t update_time = l3->arp_timeout * hz / 1000; for (j = 0; j < n_pkts; ++j) { PREFETCH0(mbufs[j]); @@ -304,11 +329,21 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui hdr = rte_pktmbuf_mtod(mbufs[j], struct ether_hdr_arp *); ip = (mbufs[j]->udata64 >> 32) & 0xFFFFFFFF; + if (prox_rte_is_zero_ether_addr(&hdr->arp.data.sha)) { + // MAC timeout or deleted from kernel table => reset update_time + // This will cause us to send new ARP request + // However, as arp_timeout not touched, we should continue sending our regular IP packets + reset_arp_update_time(l3, ip); + plogx_info("\tTimeout for MAC entry for IP "IPv4_BYTES_FMT"\n", IP4(ip)); + return; + } else + plogx_dbg("\tUpdating MAC entry for IP "IPv4_BYTES_FMT" with MAC "MAC_BYTES_FMT"\n", + IP4(ip), MAC_BYTES(hdr->arp.data.sha.addr_bytes)); if (ip == l3->gw.ip) { // MAC address of the gateway memcpy(&l3->gw.mac, &hdr->arp.data.sha, 6); l3->flags |= FLAG_DST_MAC_KNOWN; - l3->gw.arp_timeout = tsc + l3->arp_timeout * hz / 1000; + l3->gw.arp_timeout = tsc + update_time; } else if (l3->n_pkts < 4) { // Few packets tracked - should be faster to loop through them thean using a hash table for (idx = 0; idx < l3->n_pkts; idx++) { @@ -317,9 +352,8 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui break; } if (idx < l3->n_pkts) { - // IP not found; this is a reply while we never asked for the request! memcpy(&l3->optimized_arp_table[idx].mac, &(hdr->arp.data.sha), sizeof(prox_rte_ether_addr)); - l3->optimized_arp_table[idx].arp_timeout = tsc + l3->arp_timeout * hz / 1000; + l3->optimized_arp_table[idx].arp_timeout = tsc + update_time; } } else { int ret = rte_hash_add_key(l3->ip_hash, (const void *)&ip); @@ -327,16 +361,18 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui plogx_info("Unable add ip %d.%d.%d.%d in mac_hash\n", IP4(ip)); } else { memcpy(&l3->arp_table[ret].mac, &(hdr->arp.data.sha), sizeof(prox_rte_ether_addr)); - l3->arp_table[ret].arp_timeout = tsc + l3->arp_timeout * hz / 1000; + l3->arp_table[ret].arp_timeout = tsc + update_time; } } tx_drop(mbufs[j]); break; case ARP_REPLY_FROM_CTRL: + case ICMP_FROM_CTRL: case ARP_REQ_FROM_CTRL: case PKT_FROM_TAP: out[0] = 0; // tx_ctrlplane_pkt does not drop packets + plogx_dbg("\tForwarding (ARP/PING) packet from master\n"); tbase->aux->tx_ctrlplane_pkt(tbase, &mbufs[j], 1, out); TASK_STATS_ADD_TX_NON_DP(&tbase->aux->stats, 1); break; diff --git a/VNFs/DPPD-PROX/prox_args.c b/VNFs/DPPD-PROX/prox_args.c index f4b84890..41082209 100644 --- a/VNFs/DPPD-PROX/prox_args.c +++ b/VNFs/DPPD-PROX/prox_args.c @@ -338,6 +338,9 @@ static int get_global_cfg(__attribute__((unused))unsigned sindex, char *str, voi if (STR_EQ(str, "enable bypass")) { return parse_flag(&pset->flags, DSF_ENABLE_BYPASS, pkey); } + if (STR_EQ(str, "poll timeout")) { + return parse_int(&pset->poll_timeout, pkey); + } if (STR_EQ(str, "heartbeat timeout")) { return parse_int(&pset->heartbeat_timeout, pkey); } diff --git a/VNFs/DPPD-PROX/prox_cfg.h b/VNFs/DPPD-PROX/prox_cfg.h index 9d5f25f7..8c4bd6ca 100644 --- a/VNFs/DPPD-PROX/prox_cfg.h +++ b/VNFs/DPPD-PROX/prox_cfg.h @@ -67,6 +67,7 @@ struct prox_cfg { uint32_t logbuf_pos; char *logbuf; uint32_t heartbeat_timeout; + uint32_t poll_timeout; uint64_t heartbeat_tsc; }; diff --git a/VNFs/DPPD-PROX/prox_compat.h b/VNFs/DPPD-PROX/prox_compat.h index 7cf0add9..e181cd8e 100644 --- a/VNFs/DPPD-PROX/prox_compat.h +++ b/VNFs/DPPD-PROX/prox_compat.h @@ -1,5 +1,5 @@ /* -// Copyright (c) 2010-2017 Intel Corporation +// Copyright (c) 2010-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -161,6 +161,8 @@ typedef struct vxlan_gpe_hdr prox_rte_vxlan_gpe_hdr; #define PROX_RTE_TCP_FIN_FLAG TCP_FIN_FLAG #define PROX_RTE_TCP_RST_FLAG TCP_RST_FLAG #define PROX_RTE_TCP_ACK_FLAG TCP_ACK_FLAG +#define PROX_RTE_IP_ICMP_ECHO_REPLY IP_ICMP_ECHO_REPLY +#define PROX_RTE_IP_ICMP_ECHO_REQUEST IP_ICMP_ECHO_REQUEST #define prox_rte_ether_addr_copy ether_addr_copy #define prox_rte_eth_random_addr eth_random_addr @@ -172,6 +174,7 @@ typedef struct ether_hdr prox_rte_ether_hdr; typedef struct vlan_hdr prox_rte_vlan_hdr; typedef struct udp_hdr prox_rte_udp_hdr; typedef struct tcp_hdr prox_rte_tcp_hdr; +typedef struct icmp_hdr prox_rte_icmp_hdr; #ifndef RTE_SCHED_BE_QUEUES_PER_PIPE #define RTE_SCHED_BE_QUEUES_PER_PIPE RTE_SCHED_QUEUES_PER_PIPE @@ -179,6 +182,7 @@ typedef struct tcp_hdr prox_rte_tcp_hdr; #define PROX_RTE_IS_IPV4_MCAST IS_IPV4_MCAST #define prox_rte_is_same_ether_addr is_same_ether_addr +#define prox_rte_is_zero_ether_addr is_zero_ether_addr #else #define PROX_RTE_ETHER_CRC_LEN RTE_ETHER_CRC_LEN @@ -189,6 +193,8 @@ typedef struct tcp_hdr prox_rte_tcp_hdr; #define PROX_RTE_TCP_FIN_FLAG RTE_TCP_FIN_FLAG #define PROX_RTE_TCP_RST_FLAG RTE_TCP_RST_FLAG #define PROX_RTE_TCP_ACK_FLAG RTE_TCP_ACK_FLAG +#define PROX_RTE_IP_ICMP_ECHO_REPLY RTE_IP_ICMP_ECHO_REPLY +#define PROX_RTE_IP_ICMP_ECHO_REQUEST RTE_IP_ICMP_ECHO_REQUEST #define prox_rte_ether_addr_copy rte_ether_addr_copy #define prox_rte_eth_random_addr rte_eth_random_addr @@ -201,9 +207,11 @@ typedef struct rte_vlan_hdr prox_rte_vlan_hdr; typedef struct rte_vxlan_gpe_hdr prox_rte_vxlan_gpe_hdr; typedef struct rte_udp_hdr prox_rte_udp_hdr; typedef struct rte_tcp_hdr prox_rte_tcp_hdr; +typedef struct rte_icmp_hdr prox_rte_icmp_hdr; #define PROX_RTE_IS_IPV4_MCAST RTE_IS_IPV4_MCAST #define prox_rte_is_same_ether_addr rte_is_same_ether_addr +#define prox_rte_is_zero_ether_addr rte_is_zero_ether_addr #endif diff --git a/VNFs/DPPD-PROX/rx_pkt.c b/VNFs/DPPD-PROX/rx_pkt.c index 075069c8..4832066a 100644 --- a/VNFs/DPPD-PROX/rx_pkt.c +++ b/VNFs/DPPD-PROX/rx_pkt.c @@ -1,5 +1,5 @@ /* -// Copyright (c) 2010-2017 Intel Corporation +// Copyright (c) 2010-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -125,16 +125,27 @@ static uint16_t rx_pkt_hw_param(struct task_base *tbase, struct rte_mbuf ***mbuf if (l3) { struct rte_mbuf **mbufs = *mbufs_ptr; int i; - struct ether_hdr_arp *hdr[MAX_PKT_BURST]; + struct ether_hdr_arp *hdr_arp[MAX_PKT_BURST]; + prox_rte_ether_hdr *hdr; for (i = 0; i < nb_rx; i++) { PREFETCH0(mbufs[i]); } for (i = 0; i < nb_rx; i++) { - hdr[i] = rte_pktmbuf_mtod(mbufs[i], struct ether_hdr_arp *); - PREFETCH0(hdr[i]); + hdr_arp[i] = rte_pktmbuf_mtod(mbufs[i], struct ether_hdr_arp *); + PREFETCH0(hdr_arp[i]); } for (i = 0; i < nb_rx; i++) { - if (unlikely(hdr[i]->ether_hdr.ether_type == ETYPE_ARP)) { + if (likely(hdr_arp[i]->ether_hdr.ether_type == ETYPE_IPv4)) { + hdr = (prox_rte_ether_hdr *)hdr_arp[i]; + prox_rte_ipv4_hdr *pip = (prox_rte_ipv4_hdr *)(hdr + 1); + if (pip->next_proto_id == IPPROTO_ICMP) { + dump_l3(tbase, mbufs[i]); + tx_ring(tbase, tbase->l3.ctrl_plane_ring, ICMP_TO_CTRL, mbufs[i]); + skip++; + } else if (unlikely(skip)) { + mbufs[i - skip] = mbufs[i]; + } + } else if (unlikely(hdr_arp[i]->ether_hdr.ether_type == ETYPE_ARP)) { dump_l3(tbase, mbufs[i]); tx_ring(tbase, tbase->l3.ctrl_plane_ring, ARP_TO_CTRL, mbufs[i]); skip++; @@ -181,16 +192,27 @@ static inline uint16_t rx_pkt_hw1_param(struct task_base *tbase, struct rte_mbuf if (l3) { struct rte_mbuf **mbufs = *mbufs_ptr; int i; - struct ether_hdr_arp *hdr[MAX_PKT_BURST]; + struct ether_hdr_arp *hdr_arp[MAX_PKT_BURST]; + prox_rte_ether_hdr *hdr; for (i = 0; i < nb_rx; i++) { PREFETCH0(mbufs[i]); } for (i = 0; i < nb_rx; i++) { - hdr[i] = rte_pktmbuf_mtod(mbufs[i], struct ether_hdr_arp *); - PREFETCH0(hdr[i]); + hdr_arp[i] = rte_pktmbuf_mtod(mbufs[i], struct ether_hdr_arp *); + PREFETCH0(hdr_arp[i]); } for (i = 0; i < nb_rx; i++) { - if (unlikely(hdr[i]->ether_hdr.ether_type == ETYPE_ARP)) { + if (likely(hdr_arp[i]->ether_hdr.ether_type == ETYPE_IPv4)) { + hdr = (prox_rte_ether_hdr *)hdr_arp[i]; + prox_rte_ipv4_hdr *pip = (prox_rte_ipv4_hdr *)(hdr + 1); + if (pip->next_proto_id == IPPROTO_ICMP) { + dump_l3(tbase, mbufs[i]); + tx_ring(tbase, tbase->l3.ctrl_plane_ring, ICMP_TO_CTRL, mbufs[i]); + skip++; + } else if (unlikely(skip)) { + mbufs[i - skip] = mbufs[i]; + } + } else if (unlikely(hdr_arp[i]->ether_hdr.ether_type == ETYPE_ARP)) { dump_l3(tbase, mbufs[i]); tx_ring(tbase, tbase->l3.ctrl_plane_ring, ARP_TO_CTRL, mbufs[i]); skip++; |