From 08fee9c5d2e1d1f3fe14d00683c2a4b7a17e7876 Mon Sep 17 00:00:00 2001 From: Xavier Simonart Date: Sat, 2 May 2020 21:51:24 +0200 Subject: Added initial support for BGP Through this commit BGP messages are forwarded to tap device Netlink messages are enabled to receive route Updates. In addition, generating tasks can also specify a routing table which will be used when sending packets The routes initialized by the routing table can be changed through the reception of BGP messages Change-Id: I187ba9a921885cbc9b209aae5fb654309e3388b8 Signed-off-by: Xavier Simonart --- VNFs/DPPD-PROX/gen/gen_tap.cfg | 69 ++++++++++ VNFs/DPPD-PROX/gen/l3-ipv4.lua | 29 ++++ VNFs/DPPD-PROX/gen_tap.cfg | 64 --------- VNFs/DPPD-PROX/handle_master.c | 270 +++++++++++++++++++++++++------------ VNFs/DPPD-PROX/handle_master.h | 4 + VNFs/DPPD-PROX/packet_utils.c | 286 +++++++++++++++++++++++++++++++--------- VNFs/DPPD-PROX/packet_utils.h | 10 +- VNFs/DPPD-PROX/prox_args.c | 15 ++- VNFs/DPPD-PROX/prox_compat.h | 6 + VNFs/DPPD-PROX/prox_lua_types.c | 14 +- VNFs/DPPD-PROX/rx_pkt.c | 13 ++ VNFs/DPPD-PROX/task_init.h | 1 + VNFs/DPPD-PROX/tx_pkt.c | 70 ++++++++++ VNFs/DPPD-PROX/tx_pkt.h | 17 +++ 14 files changed, 651 insertions(+), 217 deletions(-) create mode 100644 VNFs/DPPD-PROX/gen/gen_tap.cfg create mode 100644 VNFs/DPPD-PROX/gen/l3-ipv4.lua delete mode 100644 VNFs/DPPD-PROX/gen_tap.cfg (limited to 'VNFs') diff --git a/VNFs/DPPD-PROX/gen/gen_tap.cfg b/VNFs/DPPD-PROX/gen/gen_tap.cfg new file mode 100644 index 00000000..60239681 --- /dev/null +++ b/VNFs/DPPD-PROX/gen/gen_tap.cfg @@ -0,0 +1,69 @@ +;; +;; Copyright (c) 2020 Intel Corporation +;; +;; Licensed under the Apache License, Version 2.0 (the "License"); +;; you may not use this file except in compliance with the License. +;; You may obtain a copy of the License at +;; +;; http://www.apache.org/licenses/LICENSE-2.0 +;; +;; Unless required by applicable law or agreed to in writing, software +;; distributed under the License is distributed on an "AS IS" BASIS, +;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +;; See the License for the specific language governing permissions and +;; limitations under the License. +;; + +[eal options] +-n=4 ; force number of memory channels +no-output=no ; disable DPDK debug output + +[lua] +lpm4 = dofile("l3-ipv4.lua") + +[port 0] +name=p0 +vdev=gen_tap +local ipv4=$ip1 + +[defaults] +mempool size=16K + +[global] +start time=5 +name=Basic Gen + +[variables] +$hex_ip1=c0 a8 7a 7e +$hex_ip2=c0 a8 7b 7f +$ip1=192.168.122.126 +$ip2=192.168.123.127 + +[core 0s0] +mode=master + +[core 1] +name=p0 +task=0 +mode=gen +sub mode=l3 +tx port=p0 +route table=lpm4 +bps=1250000000 +pkt inline=00 00 01 00 00 01 00 00 02 00 00 02 08 00 45 00 00 1c 00 01 00 00 40 11 f7 7d ${hex_ip1} ${hex_ip2} 13 88 13 88 00 08 55 7b +pkt size=60 +lat pos=42 +packet id pos=46 +min bulk size=8 +local ipv4=${ip1}/24 + +[core 2] +name=nop +task=0 +mode=lat +sub mode=l3 +rx port=p0 +drop=no +lat pos=42 +packet id pos=46 +local ipv4=${ip1}/24 diff --git a/VNFs/DPPD-PROX/gen/l3-ipv4.lua b/VNFs/DPPD-PROX/gen/l3-ipv4.lua new file mode 100644 index 00000000..1c988341 --- /dev/null +++ b/VNFs/DPPD-PROX/gen/l3-ipv4.lua @@ -0,0 +1,29 @@ +-- +-- Copyright (c) 2010-2017 Intel Corporation +-- +-- Licensed under the Apache License, Version 2.0 (the "License"); +-- you may not use this file except in compliance with the License. +-- You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +local lpm4 = {} +lpm4.next_hops = { + {id = 0, port_id = 0, ip = ip("192.168.122.240")}, + {id = 1, port_id = 0, ip = ip("192.168.122.246")}, + {id = 2, port_id = 0, ip = ip("192.168.122.247")} +} + +lpm4.routes = { + {cidr = {ip = ip("192.168.123.0"), depth = 24}, next_hop_id = 0}, + {cidr = {ip = ip("192.168.124.0"), depth = 24}, next_hop_id = 1}, + {cidr = {ip = ip("192.168.125.0"), depth = 24}, next_hop_id = 2}, +} +return lpm4 diff --git a/VNFs/DPPD-PROX/gen_tap.cfg b/VNFs/DPPD-PROX/gen_tap.cfg deleted file mode 100644 index fd74672e..00000000 --- a/VNFs/DPPD-PROX/gen_tap.cfg +++ /dev/null @@ -1,64 +0,0 @@ -;; -;; Copyright (c) 2020 Intel Corporation -;; -;; Licensed under the Apache License, Version 2.0 (the "License"); -;; you may not use this file except in compliance with the License. -;; You may obtain a copy of the License at -;; -;; http://www.apache.org/licenses/LICENSE-2.0 -;; -;; Unless required by applicable law or agreed to in writing, software -;; distributed under the License is distributed on an "AS IS" BASIS, -;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -;; See the License for the specific language governing permissions and -;; limitations under the License. -;; - -[eal options] --n=4 ; force number of memory channels -no-output=no ; disable DPDK debug output - -[port 0] -name=p0 -vdev=gen_tap -local ipv4=$ip1 - -[defaults] -mempool size=16K - -[global] -start time=5 -name=Basic Gen - -[variables] -$hex_ip1=0a 0a 0a 01 -$hex_ip2=0a 0a 0a 02 -$ip1=10.10.10.1 -$ip2=10.10.10.2 - -[core 0s0] -mode=master - -[core 1s0] -name=p0 -task=0 -mode=gen -sub mode=l3 -tx port=p0 -bps=1250000000 -pkt inline=00 00 01 00 00 01 00 00 02 00 00 02 08 00 45 00 00 1c 00 01 00 00 40 11 f7 7d ${hex_ip1} ${hex_ip2} 13 88 13 88 00 08 55 7b -pkt size=60 -lat pos=42 -packet id pos=46 -min bulk size=8 - -[core 2s0] -name=nop -task=0 -mode=lat -sub mode=l3 -rx port=p0 -drop=no -lat pos=42 -packet id pos=46 -local ipv4=${ip1} diff --git a/VNFs/DPPD-PROX/handle_master.c b/VNFs/DPPD-PROX/handle_master.c index b6b123ce..263f0c8f 100644 --- a/VNFs/DPPD-PROX/handle_master.c +++ b/VNFs/DPPD-PROX/handle_master.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -51,11 +52,15 @@ static char netlink_buf[NETLINK_BUF_SIZE]; const char *actions_string[] = { "UPDATE_FROM_CTRL", // Controlplane sending a MAC update to dataplane + "ROUTE_ADD_FROM_CTRL", // Controlplane sending a new route to dataplane + "ROUTE_DEL_FROM_CTRL", // Controlplane deleting a new route from dataplane "SEND_ARP_REQUEST_FROM_CTRL", // Controlplane requesting dataplane to send ARP request "SEND_ARP_REPLY_FROM_CTRL", // Controlplane requesting dataplane to send ARP reply "SEND_ICMP_FROM_CTRL", // Controlplane requesting dataplane to send ICMP message + "SEND_BGP_FROM_CTRL", // Controlplane requesting dataplane to send BGP message "ARP_TO_CTRL", // ARP sent by datplane to Controlpane for handling "ICMP_TO_CTRL", // ICMP sent by datplane to Controlpane for handling + "BGP_TO_CTRL", // BGP sent by datplane to Controlpane for handling "REQ_MAC_TO_CTRL", // Dataplane requesting MAC resolution to Controlplane "PKT_FROM_TAP" // Packet received by Controlplane from kernel and forwarded to dataplane for sending }; @@ -110,6 +115,7 @@ struct task_master { struct vdev all_vdev[PROX_MAX_PORTS]; int max_vdev_id; struct pollfd arp_fds; + struct pollfd route_fds; }; struct ip_port { @@ -278,7 +284,6 @@ static inline int record_request(struct task_base *tbase, uint32_t ip_dst, uint8 int i; if (unlikely(ret < 0)) { - // entry not found for this IP: delete the reply plogx_dbg("Unable to add IP "IPv4_BYTES_FMT" in external_ip_hash\n", IP4(ip_dst)); return -1; } @@ -417,6 +422,16 @@ static inline void handle_message(struct task_base *tbase, struct rte_mbuf *mbuf plogx_dbg("\tMaster received %s (%x) from mbuf %p\n", actions_string[command], command, mbuf); switch(command) { + case BGP_TO_CTRL: + if (vdev_port != NO_VDEV_PORT) { + // If a virtual (net_tap) device is attached, send the (BGP) packet to this device + // The kernel will receive and handle it. + plogx_dbg("\tMaster forwarding BGP packet to TAP\n"); + int n = rte_eth_tx_burst(prox_port_cfg[port].dpdk_mapping, 0, &mbuf, 1); + return; + } + tx_drop(mbuf); + break; case ICMP_TO_CTRL: if (vdev_port != NO_VDEV_PORT) { // If a virtual (net_tap) device is attached, send the (PING) packet to this device @@ -545,6 +560,20 @@ void init_ctrl_plane(struct task_base *tbase) task->arp_fds.fd = fd; task->arp_fds.events = POLL_IN; plog_info("\tRTMGRP_NEIGH netlink group bound; fd = %d\n", fd); + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + PROX_PANIC(fd < 0, "Failed to open netlink socket: %d\n", errno); + fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_NONBLOCK); + struct sockaddr_nl sockaddr2; + memset(&sockaddr2, 0, sizeof(struct sockaddr_nl)); + sockaddr2.nl_family = AF_NETLINK; + sockaddr2.nl_groups = RTMGRP_IPV6_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_NOTIFY; + rc = bind(fd, (struct sockaddr *)&sockaddr2, sizeof(struct sockaddr_nl)); + PROX_PANIC(rc < 0, "Failed to bind to RTMGRP_NEIGH netlink group\n"); + task->route_fds.fd = fd; + task->route_fds.events = POLL_IN; + plog_info("\tRTMGRP_IPV4_ROUTE netlink group bound; fd = %d\n", fd); + static char name[] = "master_arp_pool"; const int NB_ARP_MBUF = 1024; const int ARP_MBUF_SIZE = 2048; @@ -559,6 +588,161 @@ void init_ctrl_plane(struct task_base *tbase) tbase->l3.arp_pool = ret; } +static void handle_route_event(struct task_base *tbase) +{ + struct task_master *task = (struct task_master *)tbase; + struct rte_mbuf *mbufs[MAX_RING_BURST]; + int fd = task->route_fds.fd, interface_index, mask = -1; + char interface_name[IF_NAMESIZE] = {0}; + int len = recv(fd, netlink_buf, sizeof(netlink_buf), 0); + uint32_t ip = 0, gw_ip = 0; + if (len < 0) { + plog_err("Failed to recv from netlink: %d\n", errno); + return; + } + struct nlmsghdr * nl_hdr = (struct nlmsghdr *)netlink_buf; + if (nl_hdr->nlmsg_flags & NLM_F_MULTI) { + plog_err("Unexpected multipart netlink message\n"); + return; + } + if ((nl_hdr->nlmsg_type != RTM_NEWROUTE) && (nl_hdr->nlmsg_type != RTM_DELROUTE)) + return; + + struct rtmsg *rtmsg = (struct rtmsg *)NLMSG_DATA(nl_hdr); + int rtm_family = rtmsg->rtm_family; + if ((rtm_family == AF_INET) && (rtmsg->rtm_table != RT_TABLE_MAIN) &&(rtmsg->rtm_table != RT_TABLE_LOCAL)) + return; + int dst_len = rtmsg->rtm_dst_len; + + struct rtattr *rta = (struct rtattr *)RTM_RTA(rtmsg); + int rtl = RTM_PAYLOAD(nl_hdr); + for (; RTA_OK(rta, rtl); rta = RTA_NEXT(rta, rtl)) { + switch (rta->rta_type) { + case RTA_DST: + ip = *((uint32_t *)RTA_DATA(rta)); + break; + case RTA_OIF: + interface_index = *((int *)RTA_DATA(rta)); + if (if_indextoname(interface_index, interface_name) == NULL) { + plog_info("Unknown Interface Index %d\n", interface_index); + } + break; + case RTA_METRICS: + mask = *((int *)RTA_DATA(rta)); + break; + case RTA_GATEWAY: + gw_ip = *((uint32_t *)RTA_DATA(rta)); + break; + default: + break; + } + } + int dpdk_vdev_port = -1; + for (int i = 0; i< rte_eth_dev_count(); i++) { + if (strcmp(prox_port_cfg[i].name, interface_name) == 0) + dpdk_vdev_port = i; + } + if (dpdk_vdev_port != -1) { + plogx_info("Received netlink message on tap interface %s for IP "IPv4_BYTES_FMT"/%d, Gateway "IPv4_BYTES_FMT"\n", interface_name, IP4(ip), dst_len, IP4(gw_ip)); + int ret1 = rte_mempool_get(tbase->l3.arp_pool, (void **)mbufs); + if (unlikely(ret1 != 0)) { + plog_err("Unable to allocate a mbuf for master to core communication\n"); + return; + } + int dpdk_port = prox_port_cfg[dpdk_vdev_port].dpdk_mapping; + tx_ring_route(tbase, task->internal_port_table[dpdk_port].ring, (nl_hdr->nlmsg_type == RTM_NEWROUTE), mbufs[0], ip, gw_ip, dst_len); + } else + plog_info("Received netlink message on unknown interface %s for IP "IPv4_BYTES_FMT"/%d, Gateway "IPv4_BYTES_FMT"\n", interface_name[0] ? interface_name:"", IP4(ip), dst_len, IP4(gw_ip)); + return; +} + +static void handle_arp_event(struct task_base *tbase) +{ + struct task_master *task = (struct task_master *)tbase; + struct rte_mbuf *mbufs[MAX_RING_BURST]; + struct nlmsghdr * nl_hdr; + int fd = task->arp_fds.fd; + int len, ret; + uint32_t ip = 0; + prox_rte_ether_addr mac; + memset(&mac, 0, sizeof(mac)); + len = recv(fd, netlink_buf, sizeof(netlink_buf), 0); + if (len < 0) { + plog_err("Failed to recv from netlink: %d\n", errno); + return; + } + nl_hdr = (struct nlmsghdr *)netlink_buf; + if (nl_hdr->nlmsg_flags & NLM_F_MULTI) { + plog_err("Unexpected multipart netlink message\n"); + return; + } + if ((nl_hdr->nlmsg_type != RTM_NEWNEIGH) && (nl_hdr->nlmsg_type != RTM_DELNEIGH)) + return; + + struct ndmsg *ndmsg = (struct ndmsg *)NLMSG_DATA(nl_hdr); + int ndm_family = ndmsg->ndm_family; + struct rtattr *rta = (struct rtattr *)RTM_RTA(ndmsg); + int rtl = RTM_PAYLOAD(nl_hdr); + for (; RTA_OK(rta, rtl); rta = RTA_NEXT(rta, rtl)) { + switch (rta->rta_type) { + case NDA_DST: + ip = *((uint32_t *)RTA_DATA(rta)); + break; + case NDA_LLADDR: + mac = *((prox_rte_ether_addr *)(uint64_t *)RTA_DATA(rta)); + break; + default: + break; + } + } + plogx_info("Received netlink ip "IPv4_BYTES_FMT" with mac "MAC_BYTES_FMT"\n", IP4(ip), MAC_BYTES(mac.addr_bytes)); + ret = rte_hash_lookup(task->external_ip_hash, (const void *)&ip); + if (unlikely(ret < 0)) { + // entry not found for this IP: we did not ask a request. + // This can happen if the kernel updated the ARP table when receiving an ARP_REQUEST + // We must record this, as the ARP entry is now in the kernel table + if (prox_rte_is_zero_ether_addr(&mac)) { + // Timeout or MAC deleted from kernel MAC table + int ret = rte_hash_del_key(task->external_ip_hash, (const void *)&ip); + plogx_dbg("ip "IPv4_BYTES_FMT" removed from external_ip_hash\n", IP4(ip)); + return; + } + int ret = rte_hash_add_key(task->external_ip_hash, (const void *)&ip); + if (unlikely(ret < 0)) { + plogx_dbg("IP "IPv4_BYTES_FMT" not found in external_ip_hash and unable to add it\n", IP4(ip)); + return; + } + memcpy(&task->external_ip_table[ret].mac, &mac, sizeof(prox_rte_ether_addr)); + plogx_dbg("ip "IPv4_BYTES_FMT" added in external_ip_hash with mac "MAC_BYTES_FMT"\n", IP4(ip), MAC_BYTES(mac.addr_bytes)); + return; + } + + // entry found for this IP + uint16_t nb_requests = task->external_ip_table[ret].nb_requests; + if (nb_requests == 0) { + return; + } + + memcpy(&task->external_ip_table[ret].mac, &mac, sizeof(prox_rte_ether_addr)); + + // If we receive a request from multiple task for the same IP, then we update all tasks + int ret1 = rte_mempool_get(tbase->l3.arp_pool, (void **)mbufs); + if (unlikely(ret1 != 0)) { + plog_err("Unable to allocate a mbuf for master to core communication\n"); + return; + } + rte_mbuf_refcnt_set(mbufs[0], nb_requests); + for (int i = 0; i < nb_requests; i++) { + struct rte_ring *ring = task->external_ip_table[ret].rings[i]; + struct ether_hdr_arp *hdr = rte_pktmbuf_mtod(mbufs[0], struct ether_hdr_arp *); + memcpy(&hdr->arp.data.sha, &mac, sizeof(prox_rte_ether_addr)); + tx_ring_ip(tbase, ring, UPDATE_FROM_CTRL, mbufs[0], ip); + plog_dbg("UPDATE_FROM_CTRL ip "IPv4_BYTES_FMT" with mac "MAC_BYTES_FMT"\n", IP4(ip), MAC_BYTES(mac.addr_bytes)); + } + task->external_ip_table[ret].nb_requests = 0; + return; +} + static int handle_ctrl_plane_f(struct task_base *tbase, __attribute__((unused)) struct rte_mbuf **mbuf, uint16_t n_pkts) { int ring_id = 0, j, ret = 0, n = 0; @@ -583,86 +767,10 @@ static int handle_ctrl_plane_f(struct task_base *tbase, __attribute__((unused)) ret +=n; } if ((task->max_vdev_id) && (poll(&task->arp_fds, 1, prox_cfg.poll_timeout) == POLL_IN)) { - struct nlmsghdr * nl_hdr; - int fd = task->arp_fds.fd; - int len; - uint32_t ip = 0; - prox_rte_ether_addr mac; - memset(&mac, 0, sizeof(mac)); - len = recv(fd, netlink_buf, sizeof(netlink_buf), 0); - if (len < 0) { - plog_err("Failed to recv from netlink: %d\n", errno); - return ret; - } - nl_hdr = (struct nlmsghdr *)netlink_buf; - if (nl_hdr->nlmsg_flags & NLM_F_MULTI) { - plog_err("Unexpected multipart netlink message\n"); - return ret; - } - if ((nl_hdr->nlmsg_type != RTM_NEWNEIGH) && (nl_hdr->nlmsg_type != RTM_DELNEIGH)) - return ret; - - struct ndmsg *ndmsg = (struct ndmsg *)NLMSG_DATA(nl_hdr); - int ndm_family = ndmsg->ndm_family; - struct rtattr *rta = (struct rtattr *)RTM_RTA(ndmsg); - int rtl = RTM_PAYLOAD(nl_hdr); - for (; RTA_OK(rta, rtl); rta = RTA_NEXT(rta, rtl)) { - switch (rta->rta_type) { - case NDA_DST: - ip = *((uint32_t *)RTA_DATA(rta)); - break; - case NDA_LLADDR: - mac = *((prox_rte_ether_addr *)(uint64_t *)RTA_DATA(rta)); - break; - default: - break; - } - } - int idx = rte_hash_lookup(task->external_ip_hash, (const void *)&ip); - if (unlikely(idx < 0)) { - // entry not found for this IP: we did not ask a request. - // This can happen if the kernel updated the ARP table when receiving an ARP_REQUEST - // We must record this, as the ARP entry is now in the kernel table - if (prox_rte_is_zero_ether_addr(&mac)) { - // Timeout or MAC deleted from kernel MAC table - idx = rte_hash_del_key(task->external_ip_hash, (const void *)&ip); - plogx_dbg("ip "IPv4_BYTES_FMT" removed from external_ip_hash\n", IP4(ip)); - return ret; - } - idx = rte_hash_add_key(task->external_ip_hash, (const void *)&ip); - if (unlikely(idx < 0)) { - // entry not found for this IP: Ignore the reply. This can happen for instance for - // an IP used by management plane. - plogx_dbg("IP "IPv4_BYTES_FMT" not found in external_ip_hash and unable to add it\n", IP4(ip)); - return ret; - } - memcpy(&task->external_ip_table[idx].mac, &mac, sizeof(prox_rte_ether_addr)); - plogx_dbg("ip "IPv4_BYTES_FMT" added in external_ip_hash with mac "MAC_BYTES_FMT"\n", IP4(ip), MAC_BYTES(mac.addr_bytes)); - return ret; - } - - // entry found for this IP - uint16_t nb_requests = task->external_ip_table[idx].nb_requests; - if (nb_requests == 0) { - return ret; - } - - memcpy(&task->external_ip_table[idx].mac, &mac, sizeof(prox_rte_ether_addr)); - - // If we receive a request from multiple task for the same IP, then we update all tasks - if (unlikely(rte_mempool_get(tbase->l3.arp_pool, (void **)mbufs) != 0)) { - plog_err("Unable to allocate a mbuf for master to core communication\n"); - return ret; - } - rte_mbuf_refcnt_set(mbufs[0], nb_requests); - for (int i = 0; i < nb_requests; i++) { - struct rte_ring *ring = task->external_ip_table[idx].rings[i]; - struct ether_hdr_arp *hdr = rte_pktmbuf_mtod(mbufs[0], struct ether_hdr_arp *); - memcpy(&hdr->arp.data.sha, &mac, sizeof(prox_rte_ether_addr)); - tx_ring_ip(tbase, ring, UPDATE_FROM_CTRL, mbufs[0], ip); - plog_dbg("UPDATE_FROM_CTRL ip "IPv4_BYTES_FMT" with mac "MAC_BYTES_FMT"\n", IP4(ip), MAC_BYTES(mac.addr_bytes)); - } - task->external_ip_table[idx].nb_requests = 0; + handle_arp_event(tbase); + } + if (poll(&task->route_fds, 1, prox_cfg.poll_timeout) == POLL_IN) { + handle_route_event(tbase); } return ret; } diff --git a/VNFs/DPPD-PROX/handle_master.h b/VNFs/DPPD-PROX/handle_master.h index 6ce51854..79154458 100644 --- a/VNFs/DPPD-PROX/handle_master.h +++ b/VNFs/DPPD-PROX/handle_master.h @@ -19,11 +19,15 @@ enum arp_actions { UPDATE_FROM_CTRL, + ROUTE_ADD_FROM_CTRL, + ROUTE_DEL_FROM_CTRL, ARP_REQ_FROM_CTRL, ARP_REPLY_FROM_CTRL, ICMP_FROM_CTRL, + BGP_FROM_CTRL, ARP_TO_CTRL, ICMP_TO_CTRL, + BGP_TO_CTRL, REQ_MAC_TO_CTRL, PKT_FROM_TAP, MAX_ACTIONS diff --git a/VNFs/DPPD-PROX/packet_utils.c b/VNFs/DPPD-PROX/packet_utils.c index e06529c4..04746130 100644 --- a/VNFs/DPPD-PROX/packet_utils.c +++ b/VNFs/DPPD-PROX/packet_utils.c @@ -17,6 +17,8 @@ #include #include #include +#include + #include "task_base.h" #include "lconf.h" #include "prefetch.h" @@ -25,6 +27,11 @@ #include "handle_master.h" #include "prox_port_cfg.h" #include "packet_utils.h" +#include "prox_shared.h" +#include "prox_lua.h" +#include "hash_entry_types.h" +#include "prox_compat.h" +#include "tx_pkt.h" static inline int find_ip(struct ether_hdr_arp *pkt, uint16_t len, uint32_t *ip_dst) { @@ -74,17 +81,92 @@ static inline int find_ip(struct ether_hdr_arp *pkt, uint16_t len, uint32_t *ip_ We should check arp_update_time in the master process. This would also require the generating task to clear its arp ring to avoid sending many ARP while starting after a long stop. We could also check for arp_timeout in the master so that dataplane has only to check whether MAC is available - but this would require either thread safety, or the the exchange of information between master and generating core. + but this would require either thread safety, or the exchange of information between master and generating core. */ +static inline int add_key_and_send_arp(struct rte_hash *ip_hash, uint32_t *ip_dst, struct arp_table *entries, uint64_t tsc, uint64_t hz, uint32_t arp_update_time, prox_next_hop_index_type nh, uint64_t **time) +{ + int ret = rte_hash_add_key(ip_hash, (const void *)ip_dst); + if (unlikely(ret < 0)) { + // No reason to send ARP, as reply would be anyhow ignored + plogx_err("Unable to add ip "IPv4_BYTES_FMT" in mac_hash\n", IP4(*ip_dst)); + return DROP_MBUF; + } else { + entries[ret].ip = *ip_dst; + entries[ret].nh = nh; + *time = &entries[ret].arp_update_time; + } + return SEND_ARP; +} + +static inline int update_mac_and_send_mbuf(struct arp_table *entry, prox_rte_ether_addr *mac, uint64_t tsc, uint64_t hz, uint32_t arp_update_time, uint64_t **time) +{ + if (likely((tsc < entry->arp_update_time) && (tsc < entry->arp_timeout))) { + memcpy(mac, &entry->mac, sizeof(prox_rte_ether_addr)); + return SEND_MBUF; + } else if (tsc > entry->arp_update_time) { + // long time since we have sent an arp, send arp + *time = &entry->arp_update_time; + if (tsc < entry->arp_timeout){ + // MAC is valid in the table => send also the mbuf + memcpy(mac, &entry->mac, sizeof(prox_rte_ether_addr)); + return SEND_MBUF_AND_ARP; + } else { + // MAC still unknown, or timed out => only send ARP + return SEND_ARP; + } + } + // MAC is unknown and we already sent an ARP recently, drop mbuf and wait for ARP reply + return DROP_MBUF; +} + int write_dst_mac(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t *ip_dst, uint64_t **time) { const uint64_t hz = rte_get_tsc_hz(); struct ether_hdr_arp *packet = rte_pktmbuf_mtod(mbuf, struct ether_hdr_arp *); prox_rte_ether_addr *mac = &packet->ether_hdr.d_addr; + prox_next_hop_index_type next_hop_index; uint64_t tsc = rte_rdtsc(); struct l3_base *l3 = &(tbase->l3); + + // First find the next hop + if (l3->ipv4_lpm) { + // A routing table was configured + // If a gw (gateway_ipv4) is also specified, it is used as default gw only i.e. lowest priority (shortest prefix) + // This is implemented automatically through lpm + uint16_t len = rte_pktmbuf_pkt_len(mbuf); + if (find_ip(packet, len, ip_dst) != 0) { + // Unable to find IP address => non IP packet => send it as it + return SEND_MBUF; + } + if (unlikely(rte_lpm_lookup(l3->ipv4_lpm, rte_bswap32(*ip_dst), &next_hop_index) != 0)) { + plog_err("No route to IP "IPv4_BYTES_FMT"\n", IP4(*ip_dst)); + return DROP_MBUF; + } + struct arp_table *entry = &l3->next_hops[next_hop_index]; + + if (entry->ip) { + *ip_dst = entry->ip; + } else { + // no next ip: this is a local route + next_hop_index = MAX_HOP_INDEX; + } + // Find IP in lookup table. Send ARP if not found + int ret = rte_hash_lookup(l3->ip_hash, (const void *)ip_dst); + if (unlikely(ret < 0)) { + // IP not found, try to send an ARP + return add_key_and_send_arp(l3->ip_hash, ip_dst, l3->arp_table, tsc, hz, l3->arp_update_time, next_hop_index, time); + } else { + if (entry->ip) + return update_mac_and_send_mbuf(entry, mac, tsc, hz, l3->arp_update_time, time); + else + return update_mac_and_send_mbuf(&l3->arp_table[ret], mac, tsc, hz, l3->arp_update_time, time); + } + return 0; + } + // No Routing table specified: only a local ip and maybe a gateway + // Old default behavior: if a gw is specified, ALL packets go to this gateway (even those we could send w/o the gw if (l3->gw.ip) { if (likely((l3->flags & FLAG_DST_MAC_KNOWN) && (tsc < l3->gw.arp_update_time) && (tsc < l3->gw.arp_timeout))) { memcpy(mac, &l3->gw.mac, sizeof(prox_rte_ether_addr)); @@ -117,25 +199,7 @@ int write_dst_mac(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t *ip_d for (unsigned int idx = 0; idx < l3->n_pkts; idx++) { if (*ip_dst == l3->optimized_arp_table[idx].ip) { // IP address already in table - if ((tsc < l3->optimized_arp_table[idx].arp_update_time) && (tsc < l3->optimized_arp_table[idx].arp_timeout)) { - // MAC address was recently updated in table, use it - memcpy(mac, &l3->optimized_arp_table[idx].mac, sizeof(prox_rte_ether_addr)); - return SEND_MBUF; - } else if (tsc > l3->optimized_arp_table[idx].arp_update_time) { - // ARP not sent since a long time, send ARP - *time = &l3->optimized_arp_table[idx].arp_update_time; - if (tsc < l3->optimized_arp_table[idx].arp_timeout) { - // MAC still valid => also send mbuf - memcpy(mac, &l3->optimized_arp_table[idx].mac, sizeof(prox_rte_ether_addr)); - return SEND_MBUF_AND_ARP; - } else { - // MAC unvalid => only send ARP - return SEND_ARP; - } - } else { - // ARP timeout elapsed, MAC not valid anymore but waiting for ARP reply - return DROP_MBUF; - } + return update_mac_and_send_mbuf(&l3->optimized_arp_table[idx], mac, tsc, hz, l3->arp_update_time, time); } } // IP address not found in table @@ -156,7 +220,7 @@ int write_dst_mac(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t *ip_d // If it happens, we still send the ARP as easier: // If the ARP corresponds to this error, the ARP reply will be ignored // If ARP does not correspond to this error/ip, then ARP reply will be handled. - plogx_err("Unable add ip %d.%d.%d.%d in mac_hash (already %d entries)\n", IP4(ip), idx); + plogx_err("Unable add ip "IPv4_BYTES_FMT" in mac_hash (already %d entries)\n", IP4(ip), idx); } else { memcpy(&l3->arp_table[ret], &l3->optimized_arp_table[idx], sizeof(struct arp_table)); } @@ -167,35 +231,10 @@ int write_dst_mac(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t *ip_d int ret = rte_hash_lookup(l3->ip_hash, (const void *)ip_dst); if (unlikely(ret < 0)) { // IP not found, try to send an ARP - int ret = rte_hash_add_key(l3->ip_hash, (const void *)ip_dst); - if (ret < 0) { - // No reason to send ARP, as reply would be anyhow ignored - plogx_err("Unable to add ip %d.%d.%d.%d in mac_hash\n", IP4(*ip_dst)); - return DROP_MBUF; - } else { - l3->arp_table[ret].ip = *ip_dst; - *time = &l3->arp_table[ret].arp_update_time; - } - return SEND_ARP; + return add_key_and_send_arp(l3->ip_hash, ip_dst, &l3->arp_table[ret], tsc, hz, l3->arp_update_time, MAX_HOP_INDEX, time); } else { // IP has been found - if (likely((tsc < l3->arp_table[ret].arp_update_time) && (tsc < l3->arp_table[ret].arp_timeout))) { - // MAC still valid and ARP sent recently - memcpy(mac, &l3->arp_table[ret].mac, sizeof(prox_rte_ether_addr)); - return SEND_MBUF; - } else if (tsc > l3->arp_table[ret].arp_update_time) { - // ARP not sent since a long time, send ARP - *time = &l3->arp_table[ret].arp_update_time; - if (tsc < l3->arp_table[ret].arp_timeout) { - // MAC still valid => send also MBUF - memcpy(mac, &l3->arp_table[ret].mac, sizeof(prox_rte_ether_addr)); - return SEND_MBUF_AND_ARP; - } else { - return SEND_ARP; - } - } else { - return DROP_MBUF; - } + return update_mac_and_send_mbuf(&l3->arp_table[ret], mac, tsc, hz, l3->arp_update_time, time); } } // Should not happen @@ -248,6 +287,7 @@ void task_start_l3(struct task_base *tbase, struct task_args *targ) const int NB_ARP_MBUF = 1024; const int ARP_MBUF_SIZE = 2048; const int NB_CACHE_ARP_MBUF = 256; + const int socket_id = rte_lcore_to_socket_id(targ->lconf->id); struct prox_port_cfg *port = find_reachable_port(targ); if (port && (tbase->l3.arp_pool == NULL)) { @@ -257,6 +297,45 @@ void task_start_l3(struct task_base *tbase, struct task_args *targ) tbase->local_ipv4 = rte_be_to_cpu_32(targ->local_ipv4); register_ip_to_ctrl_plane(tbase->l3.tmaster, tbase->local_ipv4, tbase->l3.reachable_port_id, targ->lconf->id, targ->id); } + if (strcmp(targ->route_table, "") != 0) { + struct lpm4 *lpm; + int ret; + + PROX_PANIC(tbase->local_ipv4 == 0, "missing local_ipv4 will route table is specified in L3 mode\n"); + + // LPM might be modified runtime => do not share with other cores + ret = lua_to_lpm4(prox_lua(), GLOBAL, targ->route_table, socket_id, &lpm); + PROX_PANIC(ret, "Failed to load IPv4 LPM:\n%s\n", get_lua_to_errors()); + + tbase->l3.ipv4_lpm = lpm->rte_lpm; + tbase->l3.next_hops = prox_zmalloc(sizeof(*tbase->l3.next_hops) * MAX_HOP_INDEX, socket_id); + PROX_PANIC(tbase->l3.next_hops == NULL, "Could not allocate memory for next hop\n"); + + for (uint32_t i = 0; i < MAX_HOP_INDEX; i++) { + if (!lpm->next_hops[i].ip_dst) + continue; + tbase->l3.nb_gws++; + tbase->l3.next_hops[i].ip = rte_bswap32(lpm->next_hops[i].ip_dst); + int tx_port = lpm->next_hops[i].mac_port.out_idx; + // gen only supports one port right now .... hence port = 0 + if ((tx_port > targ->nb_txports - 1) && (tx_port > targ->nb_txrings - 1)) { + PROX_PANIC(1, "Routing Table contains port %d but only %d tx port/ %d ring:\n", tx_port, targ->nb_txports, targ->nb_txrings); + } + } + plog_info("Using routing table %s in l3 mode, with %d gateways\n", targ->route_table, tbase->l3.nb_gws); + + // Last but one "next_hop_index" is not a gateway but direct routes + tbase->l3.next_hops[tbase->l3.nb_gws].ip = 0; + ret = rte_lpm_add(tbase->l3.ipv4_lpm, targ->local_ipv4, targ->local_prefix, tbase->l3.nb_gws++); + PROX_PANIC(ret, "Failed to add local_ipv4 "IPv4_BYTES_FMT"/%d to lpm\n", IP4(tbase->local_ipv4), targ->local_prefix); + // Last "next_hop_index" is default gw + tbase->l3.next_hops[tbase->l3.nb_gws].ip = rte_bswap32(targ->gateway_ipv4); + if (targ->gateway_ipv4) { + ret = rte_lpm_add(tbase->l3.ipv4_lpm, targ->gateway_ipv4, 0, tbase->l3.nb_gws++); + PROX_PANIC(ret, "Failed to add gateway_ipv4 "IPv4_BYTES_FMT"/%d to lpm\n", IP4(tbase->l3.gw.ip), 0); + } + } + master_init_vdev(tbase->l3.tmaster, tbase->l3.reachable_port_id, targ->lconf->id, targ->id); name[3]++; struct rte_mempool *ret = rte_mempool_create(name, NB_ARP_MBUF, ARP_MBUF_SIZE, NB_CACHE_ARP_MBUF, @@ -284,8 +363,13 @@ void task_set_local_ip(struct task_base *tbase, uint32_t ip) static void reset_arp_update_time(struct l3_base *l3, uint32_t ip) { uint32_t idx; - plogx_info("\tMAC entry for IP "IPv4_BYTES_FMT" timeout in kernel\n", IP4(ip)); - if (ip == l3->gw.ip) { + plogx_dbg("MAC entry for IP "IPv4_BYTES_FMT" timeout in kernel\n", IP4(ip)); + + if (l3->ipv4_lpm) { + int ret = rte_hash_lookup(l3->ip_hash, (const void *)&ip); + if (ret >= 0) + l3->arp_table[ret].arp_update_time = 0; + } else if (ip == l3->gw.ip) { l3->gw.arp_update_time = 0; } else if (l3->n_pkts < 4) { for (idx = 0; idx < l3->n_pkts; idx++) { @@ -304,17 +388,34 @@ static void reset_arp_update_time(struct l3_base *l3, uint32_t ip) return; } +static prox_next_hop_index_type get_nh_index(struct task_base *tbase, uint32_t gw_ip) +{ + // Check if gateway already exists + for (prox_next_hop_index_type i = 0; i < tbase->l3.nb_gws; i++) { + if (tbase->l3.next_hops[i].ip == gw_ip) { + return i; + } + } + if (tbase->l3.nb_gws < MAX_HOP_INDEX) { + tbase->l3.next_hops[tbase->l3.nb_gws].ip = gw_ip; + tbase->l3.nb_gws++; + return tbase->l3.nb_gws - 1; + } else + return MAX_HOP_INDEX; +} void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts) { uint8_t out[1]; const uint64_t hz = rte_get_tsc_hz(); - uint32_t ip, ip_dst, idx; - int j; + uint32_t ip, ip_dst, idx, gateway_ip, prefix; + prox_next_hop_index_type gateway_index; + int j, ret, modified_route; uint16_t command; struct ether_hdr_arp *hdr; struct l3_base *l3 = &tbase->l3; uint64_t tsc= rte_rdtsc(); - uint64_t update_time = l3->arp_timeout * hz / 1000; + uint64_t arp_timeout = l3->arp_timeout * hz / 1000; + uint32_t nh; for (j = 0; j < n_pkts; ++j) { PREFETCH0(mbufs[j]); @@ -328,6 +429,38 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui command = mbufs[j]->udata64 & 0xFFFF; plogx_dbg("\tReceived %s mbuf %p\n", actions_string[command], mbufs[j]); switch(command) { + case ROUTE_ADD_FROM_CTRL: + ip = ctrl_ring_get_ip(mbufs[j]); + gateway_ip = ctrl_ring_get_gateway_ip(mbufs[j]); + prefix = ctrl_ring_get_prefix(mbufs[j]); + gateway_index = get_nh_index(tbase, gateway_ip); + if (gateway_index >= MAX_HOP_INDEX) { + plog_err("Unable to find or define gateway index - too many\n"); + return; + } + modified_route = rte_lpm_is_rule_present(tbase->l3.ipv4_lpm, rte_bswap32(ip), prefix, &nh); + ret = rte_lpm_add(tbase->l3.ipv4_lpm, rte_bswap32(ip), prefix, gateway_index); + if (ret < 0) { + plog_err("Failed to add route to "IPv4_BYTES_FMT"/%d using "IPv4_BYTES_FMT"(index = %d)\n", IP4(ip), prefix, IP4(gateway_ip), gateway_index); + } else if (modified_route) + plogx_dbg("Modified route to "IPv4_BYTES_FMT"/%d using "IPv4_BYTES_FMT"(index = %d) (was using "IPv4_BYTES_FMT"(index = %d)\n", IP4(ip), prefix, IP4(gateway_ip), gateway_index, IP4(tbase->l3.next_hops[nh].ip), nh); + else { + plogx_dbg("Added new route to "IPv4_BYTES_FMT"/%d using "IPv4_BYTES_FMT"(index = %d)\n", IP4(ip), prefix, IP4(gateway_ip), gateway_index); + } + break; + case ROUTE_DEL_FROM_CTRL: + ip = ctrl_ring_get_ip(mbufs[j]); + prefix = ctrl_ring_get_prefix(mbufs[j]); + + ret = rte_lpm_is_rule_present(tbase->l3.ipv4_lpm, rte_bswap32(ip), prefix, &nh); + if (ret > 0) { + ret = rte_lpm_delete(tbase->l3.ipv4_lpm, rte_bswap32(ip), prefix); + if (ret < 0) { + plog_err("Failed to add rule\n"); + } + plog_info("Deleting route to "IPv4_BYTES_FMT"/%d\n", IP4(ip), prefix); + } + break; case UPDATE_FROM_CTRL: hdr = rte_pktmbuf_mtod(mbufs[j], struct ether_hdr_arp *); ip = (mbufs[j]->udata64 >> 32) & 0xFFFFFFFF; @@ -337,16 +470,33 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui // This will cause us to send new ARP request // However, as arp_timeout not touched, we should continue sending our regular IP packets reset_arp_update_time(l3, ip); - plogx_info("\tTimeout for MAC entry for IP "IPv4_BYTES_FMT"\n", IP4(ip)); return; } else plogx_dbg("\tUpdating MAC entry for IP "IPv4_BYTES_FMT" with MAC "MAC_BYTES_FMT"\n", IP4(ip), MAC_BYTES(hdr->arp.data.sha.addr_bytes)); - if (ip == l3->gw.ip) { + + if (l3->ipv4_lpm) { + uint32_t nh; + struct arp_table *entry; + ret = rte_hash_add_key(l3->ip_hash, (const void *)&ip); + if (ret < 0) { + plogx_info("Unable add ip "IPv4_BYTES_FMT" in mac_hash\n", IP4(ip)); + } else if ((nh = l3->arp_table[ret].nh) != MAX_HOP_INDEX) { + entry = &l3->next_hops[nh]; + memcpy(&entry->mac, &(hdr->arp.data.sha), sizeof(prox_rte_ether_addr)); + entry->arp_timeout = tsc + arp_timeout; + update_arp_update_time(l3, &entry->arp_update_time, l3->arp_update_time); + } else { + memcpy(&l3->arp_table[ret].mac, &(hdr->arp.data.sha), sizeof(prox_rte_ether_addr)); + l3->arp_table[ret].arp_timeout = tsc + arp_timeout; + update_arp_update_time(l3, &l3->arp_table[ret].arp_update_time, l3->arp_update_time); + } + } + else if (ip == l3->gw.ip) { // MAC address of the gateway memcpy(&l3->gw.mac, &hdr->arp.data.sha, 6); l3->flags |= FLAG_DST_MAC_KNOWN; - l3->gw.arp_timeout = tsc + update_time; + l3->gw.arp_timeout = tsc + arp_timeout; update_arp_update_time(l3, &l3->gw.arp_update_time, l3->arp_update_time); } else if (l3->n_pkts < 4) { // Few packets tracked - should be faster to loop through them thean using a hash table @@ -357,28 +507,40 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui } if (idx < l3->n_pkts) { memcpy(&l3->optimized_arp_table[idx].mac, &(hdr->arp.data.sha), sizeof(prox_rte_ether_addr)); - l3->optimized_arp_table[idx].arp_timeout = tsc + update_time; + l3->optimized_arp_table[idx].arp_timeout = tsc + arp_timeout; update_arp_update_time(l3, &l3->optimized_arp_table[idx].arp_update_time, l3->arp_update_time); } } else { - int ret = rte_hash_add_key(l3->ip_hash, (const void *)&ip); + ret = rte_hash_add_key(l3->ip_hash, (const void *)&ip); if (ret < 0) { - plogx_info("Unable add ip %d.%d.%d.%d in mac_hash\n", IP4(ip)); + plogx_info("Unable add ip "IPv4_BYTES_FMT" in mac_hash\n", IP4(ip)); } else { memcpy(&l3->arp_table[ret].mac, &(hdr->arp.data.sha), sizeof(prox_rte_ether_addr)); - l3->arp_table[ret].arp_timeout = tsc + update_time; + l3->arp_table[ret].arp_timeout = tsc + arp_timeout; update_arp_update_time(l3, &l3->arp_table[ret].arp_update_time, l3->arp_update_time); } } tx_drop(mbufs[j]); break; case ARP_REPLY_FROM_CTRL: - case ICMP_FROM_CTRL: case ARP_REQ_FROM_CTRL: + out[0] = 0; + // tx_ctrlplane_pkt does not drop packets + plogx_dbg("\tForwarding (ARP) packet from master\n"); + tbase->aux->tx_ctrlplane_pkt(tbase, &mbufs[j], 1, out); + TASK_STATS_ADD_TX_NON_DP(&tbase->aux->stats, 1); + break; + case ICMP_FROM_CTRL: + out[0] = 0; + // tx_ctrlplane_pkt does not drop packets + plogx_dbg("\tForwarding (PING) packet from master\n"); + tbase->aux->tx_ctrlplane_pkt(tbase, &mbufs[j], 1, out); + TASK_STATS_ADD_TX_NON_DP(&tbase->aux->stats, 1); + break; case PKT_FROM_TAP: out[0] = 0; // tx_ctrlplane_pkt does not drop packets - plogx_dbg("\tForwarding (ARP/PING) packet from master\n"); + plogx_dbg("\tForwarding TAP packet from master\n"); tbase->aux->tx_ctrlplane_pkt(tbase, &mbufs[j], 1, out); TASK_STATS_ADD_TX_NON_DP(&tbase->aux->stats, 1); break; diff --git a/VNFs/DPPD-PROX/packet_utils.h b/VNFs/DPPD-PROX/packet_utils.h index a111b944..021528de 100644 --- a/VNFs/DPPD-PROX/packet_utils.h +++ b/VNFs/DPPD-PROX/packet_utils.h @@ -27,7 +27,7 @@ #define FLAG_DST_MAC_KNOWN 1 #define MAX_ARP_ENTRIES 65536 -#define IP4(x) x & 0xff, (x >> 8) & 0xff, (x >> 16) & 0xff, x >> 24 +#define IP4(x) x & 0xff, (x >> 8) & 0xff, (x >> 16) & 0xff, x >> 24 // From Network (BE) enum { SEND_MBUF_AND_ARP, SEND_MBUF, @@ -43,6 +43,7 @@ struct arp_table { uint64_t arp_update_time; uint64_t arp_timeout; uint32_t ip; + uint32_t nh; prox_rte_ether_addr mac; }; struct l3_base { @@ -55,12 +56,15 @@ struct l3_base { uint8_t task_id; uint32_t arp_timeout; uint32_t arp_update_time; + uint seed; + prox_next_hop_index_type nb_gws; struct arp_table gw; struct arp_table optimized_arp_table[4]; struct rte_hash *ip_hash; struct arp_table *arp_table; struct rte_mempool *arp_pool; - uint seed; + struct rte_lpm *ipv4_lpm; + struct arp_table *next_hops; }; void task_init_l3(struct task_base *tbase, struct task_args *targ); @@ -69,6 +73,7 @@ int write_dst_mac(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t *ip_d void task_set_gateway_ip(struct task_base *tbase, uint32_t ip); void task_set_local_ip(struct task_base *tbase, uint32_t ip); void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts); + static inline void update_arp_update_time(struct l3_base *l3, uint64_t *ptr, uint32_t base) { // randomize timers - from 0.5 to 1.5 * configured time @@ -77,5 +82,4 @@ static inline void update_arp_update_time(struct l3_base *l3, uint64_t *ptr, uin uint64_t rand = 500 + (1000L * rand_r(&l3->seed)) / RAND_MAX; *ptr = tsc + (base * rand / 1000) * hz / 1000; } - #endif /* _PACKET_UTILS_H_ */ diff --git a/VNFs/DPPD-PROX/prox_args.c b/VNFs/DPPD-PROX/prox_args.c index 41082209..30b4cbd7 100644 --- a/VNFs/DPPD-PROX/prox_args.c +++ b/VNFs/DPPD-PROX/prox_args.c @@ -1389,10 +1389,23 @@ static int get_core_cfg(unsigned sindex, char *str, void *data) if (STR_EQ(str, "gateway ipv4")) { /* Gateway IP address used when generating */ if ((targ->flags & TASK_ARG_L3) == 0) plog_warn("gateway ipv4 configured but L3 sub mode not enabled\n"); + if (targ->local_ipv4) + targ->local_prefix = 32; return parse_ip(&targ->gateway_ipv4, pkey); } if (STR_EQ(str, "local ipv4")) { /* source IP address to be used for packets */ - return parse_ip(&targ->local_ipv4, pkey); + struct ip4_subnet cidr; + if (parse_ip4_cidr(&cidr, pkey) != 0) { + if (targ->gateway_ipv4) + targ->local_prefix = 32; + else + targ->local_prefix = 0; + return parse_ip(&targ->local_ipv4, pkey); + } else { + targ->local_ipv4 = cidr.ip; + targ->local_prefix = cidr.prefix; + return 0; + } } if (STR_EQ(str, "remote ipv4")) { /* source IP address to be used for packets */ return parse_ip(&targ->remote_ipv4, pkey); diff --git a/VNFs/DPPD-PROX/prox_compat.h b/VNFs/DPPD-PROX/prox_compat.h index e181cd8e..bd059a6c 100644 --- a/VNFs/DPPD-PROX/prox_compat.h +++ b/VNFs/DPPD-PROX/prox_compat.h @@ -37,6 +37,12 @@ struct prox_rte_table_params { uint64_t seed; }; +#if RTE_VERSION < RTE_VERSION_NUM(16,4,0,1) +typedef uint8_t prox_next_hop_index_type; +#else +typedef uint32_t prox_next_hop_index_type; +#endif + #if RTE_VERSION < RTE_VERSION_NUM(17,11,0,0) static void *prox_rte_table_create(struct prox_rte_table_params *params, int socket_id, uint32_t entry_size) diff --git a/VNFs/DPPD-PROX/prox_lua_types.c b/VNFs/DPPD-PROX/prox_lua_types.c index 3ef3d472..bc1671d4 100644 --- a/VNFs/DPPD-PROX/prox_lua_types.c +++ b/VNFs/DPPD-PROX/prox_lua_types.c @@ -437,11 +437,11 @@ int lua_to_next_hop(struct lua_State *L, enum lua_place from, const char *name, while (lua_next(L, -2)) { if (lua_to_int(L, TABLE, "id", &next_hop_index) || lua_to_int(L, TABLE, "port_id", &port_id) || - lua_to_ip(L, TABLE, "ip", &ip) || - lua_to_mac(L, TABLE, "mac", &mac) || - lua_to_int(L, TABLE, "mpls", &mpls)) + lua_to_ip(L, TABLE, "ip", &ip)) return -1; + lua_to_mac(L, TABLE, "mac", &mac); + lua_to_int(L, TABLE, "mpls", &mpls); PROX_PANIC(port_id >= PROX_MAX_PORTS, "Port id too high (only supporting %d ports)\n", PROX_MAX_PORTS); PROX_PANIC(next_hop_index >= MAX_HOP_INDEX, "Next-hop to high (only supporting %d next hops)\n", MAX_HOP_INDEX); @@ -504,6 +504,7 @@ int lua_to_next_hop6(struct lua_State *L, enum lua_place from, const char *name, return 0; } +#define MAX_NEW_RULES 128 int lua_to_routes4(struct lua_State *L, enum lua_place from, const char *name, uint8_t socket, struct lpm4 *lpm) { struct ip4_subnet dst; @@ -514,11 +515,12 @@ int lua_to_routes4(struct lua_State *L, enum lua_place from, const char *name, u char lpm_name[64]; int ret; int pop; + static int count = 1; if ((pop = lua_getfrom(L, from, name)) < 0) return -1; - snprintf(lpm_name, sizeof(lpm_name), "IPv4_lpm_s%u", socket); + snprintf(lpm_name, sizeof(lpm_name), "IPv4_lpm_s%u_%d", socket, count++); if (!lua_istable(L, -1)) { set_err("Data is not a table\n"); @@ -531,12 +533,12 @@ int lua_to_routes4(struct lua_State *L, enum lua_place from, const char *name, u lua_pop(L, 1); #if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,1) struct rte_lpm_config conf; - conf.max_rules = 2 * n_tot_rules; + conf.max_rules = 2 * n_tot_rules + MAX_NEW_RULES; conf.number_tbl8s = 256; conf.flags = 0; new_lpm = rte_lpm_create(lpm_name, socket, &conf); #else - new_lpm = rte_lpm_create(lpm_name, socket, 2 * n_tot_rules, 0); + new_lpm = rte_lpm_create(lpm_name, socket, 2 * n_tot_rules + MAX_NEW_RULES, 0); #endif PROX_PANIC(NULL == new_lpm, "Failed to allocate lpm\n"); diff --git a/VNFs/DPPD-PROX/rx_pkt.c b/VNFs/DPPD-PROX/rx_pkt.c index 4832066a..6a6112b5 100644 --- a/VNFs/DPPD-PROX/rx_pkt.c +++ b/VNFs/DPPD-PROX/rx_pkt.c @@ -30,6 +30,8 @@ #include "handle_master.h" #include "input.h" /* Needed for callback on dump */ +#define TCP_PORT_BGP rte_cpu_to_be_16(179) + /* _param version of the rx_pkt_hw functions are used to create two instances of very similar variations of these functions. The variations are specified by the "multi" parameter which significies @@ -138,10 +140,15 @@ static uint16_t rx_pkt_hw_param(struct task_base *tbase, struct rte_mbuf ***mbuf if (likely(hdr_arp[i]->ether_hdr.ether_type == ETYPE_IPv4)) { hdr = (prox_rte_ether_hdr *)hdr_arp[i]; prox_rte_ipv4_hdr *pip = (prox_rte_ipv4_hdr *)(hdr + 1); + prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(pip + 1); if (pip->next_proto_id == IPPROTO_ICMP) { dump_l3(tbase, mbufs[i]); tx_ring(tbase, tbase->l3.ctrl_plane_ring, ICMP_TO_CTRL, mbufs[i]); skip++; + } else if ((tcp->src_port == TCP_PORT_BGP) || (tcp->dst_port == TCP_PORT_BGP)) { + dump_l3(tbase, mbufs[i]); + tx_ring(tbase, tbase->l3.ctrl_plane_ring, BGP_TO_CTRL, mbufs[i]); + skip++; } else if (unlikely(skip)) { mbufs[i - skip] = mbufs[i]; } @@ -202,13 +209,19 @@ static inline uint16_t rx_pkt_hw1_param(struct task_base *tbase, struct rte_mbuf PREFETCH0(hdr_arp[i]); } for (i = 0; i < nb_rx; i++) { + // plog_info("ether_type = %x\n", hdr_arp[i]->ether_hdr.ether_type); if (likely(hdr_arp[i]->ether_hdr.ether_type == ETYPE_IPv4)) { hdr = (prox_rte_ether_hdr *)hdr_arp[i]; prox_rte_ipv4_hdr *pip = (prox_rte_ipv4_hdr *)(hdr + 1); + prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(pip + 1); if (pip->next_proto_id == IPPROTO_ICMP) { dump_l3(tbase, mbufs[i]); tx_ring(tbase, tbase->l3.ctrl_plane_ring, ICMP_TO_CTRL, mbufs[i]); skip++; + } else if ((tcp->src_port == TCP_PORT_BGP) || (tcp->dst_port == TCP_PORT_BGP)) { + dump_l3(tbase, mbufs[i]); + tx_ring(tbase, tbase->l3.ctrl_plane_ring, BGP_TO_CTRL, mbufs[i]); + skip++; } else if (unlikely(skip)) { mbufs[i - skip] = mbufs[i]; } diff --git a/VNFs/DPPD-PROX/task_init.h b/VNFs/DPPD-PROX/task_init.h index 4108f54d..98c0a8dc 100644 --- a/VNFs/DPPD-PROX/task_init.h +++ b/VNFs/DPPD-PROX/task_init.h @@ -129,6 +129,7 @@ struct task_args { uint32_t gateway_ipv4; uint32_t local_ipv4; uint32_t remote_ipv4; + uint32_t local_prefix; uint32_t arp_timeout; uint32_t arp_update_time; struct ipv6_addr local_ipv6; /* For IPv6 Tunnel, it's the local tunnel endpoint address */ diff --git a/VNFs/DPPD-PROX/tx_pkt.c b/VNFs/DPPD-PROX/tx_pkt.c index 8bf501f6..2a4f53b2 100644 --- a/VNFs/DPPD-PROX/tx_pkt.c +++ b/VNFs/DPPD-PROX/tx_pkt.c @@ -845,3 +845,73 @@ void tx_ring(struct task_base *tbase, struct rte_ring *ring, uint16_t command, rte_pktmbuf_free(mbuf); } } + +void tx_ring_route(struct task_base *tbase, struct rte_ring *ring, int add, struct rte_mbuf *mbuf, uint32_t ip, uint32_t gateway_ip, uint32_t prefix) +{ + uint8_t command; + if (add) + command = ROUTE_ADD_FROM_CTRL; + else + command = ROUTE_DEL_FROM_CTRL; + + plogx_dbg("\tSending command %s to ring %p using mbuf %p - ring size now %d\n", actions_string[command], ring, mbuf, rte_ring_free_count(ring)); + ctrl_ring_set_command(mbuf, command); + ctrl_ring_set_ip(mbuf, ip); + ctrl_ring_set_gateway_ip(mbuf, gateway_ip); + ctrl_ring_set_prefix(mbuf, prefix); + if (tbase->aux->task_rt_dump.cur_trace) { + trace_one_rx_pkt(tbase, mbuf); + } + int ret = rte_ring_enqueue(ring, mbuf); + if (unlikely(ret != 0)) { + plogx_dbg("\tFail to send command %s to ring %p using mbuf %p - ring size now %d\n", actions_string[command], ring, mbuf, rte_ring_free_count(ring)); + TASK_STATS_ADD_DROP_DISCARD(&tbase->aux->stats, 1); + rte_pktmbuf_free(mbuf); + } +} + +void ctrl_ring_set_command(struct rte_mbuf *mbuf, uint64_t udata64) +{ + mbuf->udata64 = udata64; +} + +uint64_t ctrl_ring_get_command(struct rte_mbuf *mbuf) +{ + return mbuf->udata64; +} + +void ctrl_ring_set_ip(struct rte_mbuf *mbuf, uint32_t udata32) +{ + struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom)); + prox_headroom->ip = udata32; +} + +uint32_t ctrl_ring_get_ip(struct rte_mbuf *mbuf) +{ + struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom)); + return prox_headroom->ip; +} + +void ctrl_ring_set_gateway_ip(struct rte_mbuf *mbuf, uint32_t udata32) +{ + struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom)); + prox_headroom->gateway_ip = udata32; +} + +uint32_t ctrl_ring_get_gateway_ip(struct rte_mbuf *mbuf) +{ + struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom)); + return prox_headroom->gateway_ip; +} + +void ctrl_ring_set_prefix(struct rte_mbuf *mbuf, uint32_t udata32) +{ + struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom)); + prox_headroom->prefix = udata32; +} + +uint32_t ctrl_ring_get_prefix(struct rte_mbuf *mbuf) +{ + struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom)); + return prox_headroom->prefix; +} diff --git a/VNFs/DPPD-PROX/tx_pkt.h b/VNFs/DPPD-PROX/tx_pkt.h index 708a9837..f7443cf4 100644 --- a/VNFs/DPPD-PROX/tx_pkt.h +++ b/VNFs/DPPD-PROX/tx_pkt.h @@ -22,6 +22,13 @@ struct task_base; struct rte_mbuf; +struct prox_headroom { + uint64_t command; + uint32_t ip; + uint32_t prefix; + uint32_t gateway_ip; +} __attribute__((packed)); + void flush_queues_hw(struct task_base *tbase); void flush_queues_sw(struct task_base *tbase); @@ -86,4 +93,14 @@ int tx_ring_cti(struct task_base *tbase, struct rte_ring *ring, uint16_t command void tx_ring_ip(struct task_base *tbase, struct rte_ring *ring, uint16_t command, struct rte_mbuf *mbuf, uint32_t ip); void tx_ring(struct task_base *tbase, struct rte_ring *ring, uint16_t command, struct rte_mbuf *mbuf); +void ctrl_ring_set_command(struct rte_mbuf *mbuf, uint64_t udata64); +uint64_t ctrl_ring_get_command(struct rte_mbuf *mbuf); +void ctrl_ring_set_ip(struct rte_mbuf *mbuf, uint32_t udata32); +uint32_t ctrl_ring_get_ip(struct rte_mbuf *mbuf); +void ctrl_ring_set_gateway_ip(struct rte_mbuf *mbuf, uint32_t udata32); +uint32_t ctrl_ring_get_gateway_ip(struct rte_mbuf *mbuf); +void ctrl_ring_set_prefix(struct rte_mbuf *mbuf, uint32_t udata32); +uint32_t ctrl_ring_get_prefix(struct rte_mbuf *mbuf); +void tx_ring_route(struct task_base *tbase, struct rte_ring *ring, int add, struct rte_mbuf *mbuf, uint32_t ip, uint32_t gateway_ip, uint32_t prefix); + #endif /* _TX_PKT_H_ */ -- cgit 1.2.3-korg