summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorXavier Simonart <xavier.simonart@intel.com>2020-05-02 21:51:24 +0200
committerXavier Simonart <xavier.simonart@intel.com>2020-05-29 23:31:54 +0200
commit08fee9c5d2e1d1f3fe14d00683c2a4b7a17e7876 (patch)
tree5b1f2a9aaab23b654c9504f83eaa1cb8ac2a0b01
parent354bfce1e946ec796516b3ae7f3fc677960867ef (diff)
Added initial support for BGP
Through this commit BGP messages are forwarded to tap device Netlink messages are enabled to receive route Updates. In addition, generating tasks can also specify a routing table which will be used when sending packets The routes initialized by the routing table can be changed through the reception of BGP messages Change-Id: I187ba9a921885cbc9b209aae5fb654309e3388b8 Signed-off-by: Xavier Simonart <xavier.simonart@intel.com>
-rw-r--r--VNFs/DPPD-PROX/gen/gen_tap.cfg (renamed from VNFs/DPPD-PROX/gen_tap.cfg)19
-rw-r--r--VNFs/DPPD-PROX/gen/l3-ipv4.lua29
-rw-r--r--VNFs/DPPD-PROX/handle_master.c270
-rw-r--r--VNFs/DPPD-PROX/handle_master.h4
-rw-r--r--VNFs/DPPD-PROX/packet_utils.c286
-rw-r--r--VNFs/DPPD-PROX/packet_utils.h10
-rw-r--r--VNFs/DPPD-PROX/prox_args.c15
-rw-r--r--VNFs/DPPD-PROX/prox_compat.h6
-rw-r--r--VNFs/DPPD-PROX/prox_lua_types.c14
-rw-r--r--VNFs/DPPD-PROX/rx_pkt.c13
-rw-r--r--VNFs/DPPD-PROX/task_init.h1
-rw-r--r--VNFs/DPPD-PROX/tx_pkt.c70
-rw-r--r--VNFs/DPPD-PROX/tx_pkt.h17
13 files changed, 594 insertions, 160 deletions
diff --git a/VNFs/DPPD-PROX/gen_tap.cfg b/VNFs/DPPD-PROX/gen/gen_tap.cfg
index fd74672e..60239681 100644
--- a/VNFs/DPPD-PROX/gen_tap.cfg
+++ b/VNFs/DPPD-PROX/gen/gen_tap.cfg
@@ -18,6 +18,9 @@
-n=4 ; force number of memory channels
no-output=no ; disable DPDK debug output
+[lua]
+lpm4 = dofile("l3-ipv4.lua")
+
[port 0]
name=p0
vdev=gen_tap
@@ -31,28 +34,30 @@ start time=5
name=Basic Gen
[variables]
-$hex_ip1=0a 0a 0a 01
-$hex_ip2=0a 0a 0a 02
-$ip1=10.10.10.1
-$ip2=10.10.10.2
+$hex_ip1=c0 a8 7a 7e
+$hex_ip2=c0 a8 7b 7f
+$ip1=192.168.122.126
+$ip2=192.168.123.127
[core 0s0]
mode=master
-[core 1s0]
+[core 1]
name=p0
task=0
mode=gen
sub mode=l3
tx port=p0
+route table=lpm4
bps=1250000000
pkt inline=00 00 01 00 00 01 00 00 02 00 00 02 08 00 45 00 00 1c 00 01 00 00 40 11 f7 7d ${hex_ip1} ${hex_ip2} 13 88 13 88 00 08 55 7b
pkt size=60
lat pos=42
packet id pos=46
min bulk size=8
+local ipv4=${ip1}/24
-[core 2s0]
+[core 2]
name=nop
task=0
mode=lat
@@ -61,4 +66,4 @@ rx port=p0
drop=no
lat pos=42
packet id pos=46
-local ipv4=${ip1}
+local ipv4=${ip1}/24
diff --git a/VNFs/DPPD-PROX/gen/l3-ipv4.lua b/VNFs/DPPD-PROX/gen/l3-ipv4.lua
new file mode 100644
index 00000000..1c988341
--- /dev/null
+++ b/VNFs/DPPD-PROX/gen/l3-ipv4.lua
@@ -0,0 +1,29 @@
+--
+-- Copyright (c) 2010-2017 Intel Corporation
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+local lpm4 = {}
+lpm4.next_hops = {
+ {id = 0, port_id = 0, ip = ip("192.168.122.240")},
+ {id = 1, port_id = 0, ip = ip("192.168.122.246")},
+ {id = 2, port_id = 0, ip = ip("192.168.122.247")}
+}
+
+lpm4.routes = {
+ {cidr = {ip = ip("192.168.123.0"), depth = 24}, next_hop_id = 0},
+ {cidr = {ip = ip("192.168.124.0"), depth = 24}, next_hop_id = 1},
+ {cidr = {ip = ip("192.168.125.0"), depth = 24}, next_hop_id = 2},
+}
+return lpm4
diff --git a/VNFs/DPPD-PROX/handle_master.c b/VNFs/DPPD-PROX/handle_master.c
index b6b123ce..263f0c8f 100644
--- a/VNFs/DPPD-PROX/handle_master.c
+++ b/VNFs/DPPD-PROX/handle_master.c
@@ -20,6 +20,7 @@
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <poll.h>
+#include <net/if.h>
#include <rte_hash.h>
#include <rte_hash_crc.h>
@@ -51,11 +52,15 @@ static char netlink_buf[NETLINK_BUF_SIZE];
const char *actions_string[] = {
"UPDATE_FROM_CTRL", // Controlplane sending a MAC update to dataplane
+ "ROUTE_ADD_FROM_CTRL", // Controlplane sending a new route to dataplane
+ "ROUTE_DEL_FROM_CTRL", // Controlplane deleting a new route from dataplane
"SEND_ARP_REQUEST_FROM_CTRL", // Controlplane requesting dataplane to send ARP request
"SEND_ARP_REPLY_FROM_CTRL", // Controlplane requesting dataplane to send ARP reply
"SEND_ICMP_FROM_CTRL", // Controlplane requesting dataplane to send ICMP message
+ "SEND_BGP_FROM_CTRL", // Controlplane requesting dataplane to send BGP message
"ARP_TO_CTRL", // ARP sent by datplane to Controlpane for handling
"ICMP_TO_CTRL", // ICMP sent by datplane to Controlpane for handling
+ "BGP_TO_CTRL", // BGP sent by datplane to Controlpane for handling
"REQ_MAC_TO_CTRL", // Dataplane requesting MAC resolution to Controlplane
"PKT_FROM_TAP" // Packet received by Controlplane from kernel and forwarded to dataplane for sending
};
@@ -110,6 +115,7 @@ struct task_master {
struct vdev all_vdev[PROX_MAX_PORTS];
int max_vdev_id;
struct pollfd arp_fds;
+ struct pollfd route_fds;
};
struct ip_port {
@@ -278,7 +284,6 @@ static inline int record_request(struct task_base *tbase, uint32_t ip_dst, uint8
int i;
if (unlikely(ret < 0)) {
- // entry not found for this IP: delete the reply
plogx_dbg("Unable to add IP "IPv4_BYTES_FMT" in external_ip_hash\n", IP4(ip_dst));
return -1;
}
@@ -417,6 +422,16 @@ static inline void handle_message(struct task_base *tbase, struct rte_mbuf *mbuf
plogx_dbg("\tMaster received %s (%x) from mbuf %p\n", actions_string[command], command, mbuf);
switch(command) {
+ case BGP_TO_CTRL:
+ if (vdev_port != NO_VDEV_PORT) {
+ // If a virtual (net_tap) device is attached, send the (BGP) packet to this device
+ // The kernel will receive and handle it.
+ plogx_dbg("\tMaster forwarding BGP packet to TAP\n");
+ int n = rte_eth_tx_burst(prox_port_cfg[port].dpdk_mapping, 0, &mbuf, 1);
+ return;
+ }
+ tx_drop(mbuf);
+ break;
case ICMP_TO_CTRL:
if (vdev_port != NO_VDEV_PORT) {
// If a virtual (net_tap) device is attached, send the (PING) packet to this device
@@ -545,6 +560,20 @@ void init_ctrl_plane(struct task_base *tbase)
task->arp_fds.fd = fd;
task->arp_fds.events = POLL_IN;
plog_info("\tRTMGRP_NEIGH netlink group bound; fd = %d\n", fd);
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ PROX_PANIC(fd < 0, "Failed to open netlink socket: %d\n", errno);
+ fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_NONBLOCK);
+ struct sockaddr_nl sockaddr2;
+ memset(&sockaddr2, 0, sizeof(struct sockaddr_nl));
+ sockaddr2.nl_family = AF_NETLINK;
+ sockaddr2.nl_groups = RTMGRP_IPV6_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_NOTIFY;
+ rc = bind(fd, (struct sockaddr *)&sockaddr2, sizeof(struct sockaddr_nl));
+ PROX_PANIC(rc < 0, "Failed to bind to RTMGRP_NEIGH netlink group\n");
+ task->route_fds.fd = fd;
+ task->route_fds.events = POLL_IN;
+ plog_info("\tRTMGRP_IPV4_ROUTE netlink group bound; fd = %d\n", fd);
+
static char name[] = "master_arp_pool";
const int NB_ARP_MBUF = 1024;
const int ARP_MBUF_SIZE = 2048;
@@ -559,6 +588,161 @@ void init_ctrl_plane(struct task_base *tbase)
tbase->l3.arp_pool = ret;
}
+static void handle_route_event(struct task_base *tbase)
+{
+ struct task_master *task = (struct task_master *)tbase;
+ struct rte_mbuf *mbufs[MAX_RING_BURST];
+ int fd = task->route_fds.fd, interface_index, mask = -1;
+ char interface_name[IF_NAMESIZE] = {0};
+ int len = recv(fd, netlink_buf, sizeof(netlink_buf), 0);
+ uint32_t ip = 0, gw_ip = 0;
+ if (len < 0) {
+ plog_err("Failed to recv from netlink: %d\n", errno);
+ return;
+ }
+ struct nlmsghdr * nl_hdr = (struct nlmsghdr *)netlink_buf;
+ if (nl_hdr->nlmsg_flags & NLM_F_MULTI) {
+ plog_err("Unexpected multipart netlink message\n");
+ return;
+ }
+ if ((nl_hdr->nlmsg_type != RTM_NEWROUTE) && (nl_hdr->nlmsg_type != RTM_DELROUTE))
+ return;
+
+ struct rtmsg *rtmsg = (struct rtmsg *)NLMSG_DATA(nl_hdr);
+ int rtm_family = rtmsg->rtm_family;
+ if ((rtm_family == AF_INET) && (rtmsg->rtm_table != RT_TABLE_MAIN) &&(rtmsg->rtm_table != RT_TABLE_LOCAL))
+ return;
+ int dst_len = rtmsg->rtm_dst_len;
+
+ struct rtattr *rta = (struct rtattr *)RTM_RTA(rtmsg);
+ int rtl = RTM_PAYLOAD(nl_hdr);
+ for (; RTA_OK(rta, rtl); rta = RTA_NEXT(rta, rtl)) {
+ switch (rta->rta_type) {
+ case RTA_DST:
+ ip = *((uint32_t *)RTA_DATA(rta));
+ break;
+ case RTA_OIF:
+ interface_index = *((int *)RTA_DATA(rta));
+ if (if_indextoname(interface_index, interface_name) == NULL) {
+ plog_info("Unknown Interface Index %d\n", interface_index);
+ }
+ break;
+ case RTA_METRICS:
+ mask = *((int *)RTA_DATA(rta));
+ break;
+ case RTA_GATEWAY:
+ gw_ip = *((uint32_t *)RTA_DATA(rta));
+ break;
+ default:
+ break;
+ }
+ }
+ int dpdk_vdev_port = -1;
+ for (int i = 0; i< rte_eth_dev_count(); i++) {
+ if (strcmp(prox_port_cfg[i].name, interface_name) == 0)
+ dpdk_vdev_port = i;
+ }
+ if (dpdk_vdev_port != -1) {
+ plogx_info("Received netlink message on tap interface %s for IP "IPv4_BYTES_FMT"/%d, Gateway "IPv4_BYTES_FMT"\n", interface_name, IP4(ip), dst_len, IP4(gw_ip));
+ int ret1 = rte_mempool_get(tbase->l3.arp_pool, (void **)mbufs);
+ if (unlikely(ret1 != 0)) {
+ plog_err("Unable to allocate a mbuf for master to core communication\n");
+ return;
+ }
+ int dpdk_port = prox_port_cfg[dpdk_vdev_port].dpdk_mapping;
+ tx_ring_route(tbase, task->internal_port_table[dpdk_port].ring, (nl_hdr->nlmsg_type == RTM_NEWROUTE), mbufs[0], ip, gw_ip, dst_len);
+ } else
+ plog_info("Received netlink message on unknown interface %s for IP "IPv4_BYTES_FMT"/%d, Gateway "IPv4_BYTES_FMT"\n", interface_name[0] ? interface_name:"", IP4(ip), dst_len, IP4(gw_ip));
+ return;
+}
+
+static void handle_arp_event(struct task_base *tbase)
+{
+ struct task_master *task = (struct task_master *)tbase;
+ struct rte_mbuf *mbufs[MAX_RING_BURST];
+ struct nlmsghdr * nl_hdr;
+ int fd = task->arp_fds.fd;
+ int len, ret;
+ uint32_t ip = 0;
+ prox_rte_ether_addr mac;
+ memset(&mac, 0, sizeof(mac));
+ len = recv(fd, netlink_buf, sizeof(netlink_buf), 0);
+ if (len < 0) {
+ plog_err("Failed to recv from netlink: %d\n", errno);
+ return;
+ }
+ nl_hdr = (struct nlmsghdr *)netlink_buf;
+ if (nl_hdr->nlmsg_flags & NLM_F_MULTI) {
+ plog_err("Unexpected multipart netlink message\n");
+ return;
+ }
+ if ((nl_hdr->nlmsg_type != RTM_NEWNEIGH) && (nl_hdr->nlmsg_type != RTM_DELNEIGH))
+ return;
+
+ struct ndmsg *ndmsg = (struct ndmsg *)NLMSG_DATA(nl_hdr);
+ int ndm_family = ndmsg->ndm_family;
+ struct rtattr *rta = (struct rtattr *)RTM_RTA(ndmsg);
+ int rtl = RTM_PAYLOAD(nl_hdr);
+ for (; RTA_OK(rta, rtl); rta = RTA_NEXT(rta, rtl)) {
+ switch (rta->rta_type) {
+ case NDA_DST:
+ ip = *((uint32_t *)RTA_DATA(rta));
+ break;
+ case NDA_LLADDR:
+ mac = *((prox_rte_ether_addr *)(uint64_t *)RTA_DATA(rta));
+ break;
+ default:
+ break;
+ }
+ }
+ plogx_info("Received netlink ip "IPv4_BYTES_FMT" with mac "MAC_BYTES_FMT"\n", IP4(ip), MAC_BYTES(mac.addr_bytes));
+ ret = rte_hash_lookup(task->external_ip_hash, (const void *)&ip);
+ if (unlikely(ret < 0)) {
+ // entry not found for this IP: we did not ask a request.
+ // This can happen if the kernel updated the ARP table when receiving an ARP_REQUEST
+ // We must record this, as the ARP entry is now in the kernel table
+ if (prox_rte_is_zero_ether_addr(&mac)) {
+ // Timeout or MAC deleted from kernel MAC table
+ int ret = rte_hash_del_key(task->external_ip_hash, (const void *)&ip);
+ plogx_dbg("ip "IPv4_BYTES_FMT" removed from external_ip_hash\n", IP4(ip));
+ return;
+ }
+ int ret = rte_hash_add_key(task->external_ip_hash, (const void *)&ip);
+ if (unlikely(ret < 0)) {
+ plogx_dbg("IP "IPv4_BYTES_FMT" not found in external_ip_hash and unable to add it\n", IP4(ip));
+ return;
+ }
+ memcpy(&task->external_ip_table[ret].mac, &mac, sizeof(prox_rte_ether_addr));
+ plogx_dbg("ip "IPv4_BYTES_FMT" added in external_ip_hash with mac "MAC_BYTES_FMT"\n", IP4(ip), MAC_BYTES(mac.addr_bytes));
+ return;
+ }
+
+ // entry found for this IP
+ uint16_t nb_requests = task->external_ip_table[ret].nb_requests;
+ if (nb_requests == 0) {
+ return;
+ }
+
+ memcpy(&task->external_ip_table[ret].mac, &mac, sizeof(prox_rte_ether_addr));
+
+ // If we receive a request from multiple task for the same IP, then we update all tasks
+ int ret1 = rte_mempool_get(tbase->l3.arp_pool, (void **)mbufs);
+ if (unlikely(ret1 != 0)) {
+ plog_err("Unable to allocate a mbuf for master to core communication\n");
+ return;
+ }
+ rte_mbuf_refcnt_set(mbufs[0], nb_requests);
+ for (int i = 0; i < nb_requests; i++) {
+ struct rte_ring *ring = task->external_ip_table[ret].rings[i];
+ struct ether_hdr_arp *hdr = rte_pktmbuf_mtod(mbufs[0], struct ether_hdr_arp *);
+ memcpy(&hdr->arp.data.sha, &mac, sizeof(prox_rte_ether_addr));
+ tx_ring_ip(tbase, ring, UPDATE_FROM_CTRL, mbufs[0], ip);
+ plog_dbg("UPDATE_FROM_CTRL ip "IPv4_BYTES_FMT" with mac "MAC_BYTES_FMT"\n", IP4(ip), MAC_BYTES(mac.addr_bytes));
+ }
+ task->external_ip_table[ret].nb_requests = 0;
+ return;
+}
+
static int handle_ctrl_plane_f(struct task_base *tbase, __attribute__((unused)) struct rte_mbuf **mbuf, uint16_t n_pkts)
{
int ring_id = 0, j, ret = 0, n = 0;
@@ -583,86 +767,10 @@ static int handle_ctrl_plane_f(struct task_base *tbase, __attribute__((unused))
ret +=n;
}
if ((task->max_vdev_id) && (poll(&task->arp_fds, 1, prox_cfg.poll_timeout) == POLL_IN)) {
- struct nlmsghdr * nl_hdr;
- int fd = task->arp_fds.fd;
- int len;
- uint32_t ip = 0;
- prox_rte_ether_addr mac;
- memset(&mac, 0, sizeof(mac));
- len = recv(fd, netlink_buf, sizeof(netlink_buf), 0);
- if (len < 0) {
- plog_err("Failed to recv from netlink: %d\n", errno);
- return ret;
- }
- nl_hdr = (struct nlmsghdr *)netlink_buf;
- if (nl_hdr->nlmsg_flags & NLM_F_MULTI) {
- plog_err("Unexpected multipart netlink message\n");
- return ret;
- }
- if ((nl_hdr->nlmsg_type != RTM_NEWNEIGH) && (nl_hdr->nlmsg_type != RTM_DELNEIGH))
- return ret;
-
- struct ndmsg *ndmsg = (struct ndmsg *)NLMSG_DATA(nl_hdr);
- int ndm_family = ndmsg->ndm_family;
- struct rtattr *rta = (struct rtattr *)RTM_RTA(ndmsg);
- int rtl = RTM_PAYLOAD(nl_hdr);
- for (; RTA_OK(rta, rtl); rta = RTA_NEXT(rta, rtl)) {
- switch (rta->rta_type) {
- case NDA_DST:
- ip = *((uint32_t *)RTA_DATA(rta));
- break;
- case NDA_LLADDR:
- mac = *((prox_rte_ether_addr *)(uint64_t *)RTA_DATA(rta));
- break;
- default:
- break;
- }
- }
- int idx = rte_hash_lookup(task->external_ip_hash, (const void *)&ip);
- if (unlikely(idx < 0)) {
- // entry not found for this IP: we did not ask a request.
- // This can happen if the kernel updated the ARP table when receiving an ARP_REQUEST
- // We must record this, as the ARP entry is now in the kernel table
- if (prox_rte_is_zero_ether_addr(&mac)) {
- // Timeout or MAC deleted from kernel MAC table
- idx = rte_hash_del_key(task->external_ip_hash, (const void *)&ip);
- plogx_dbg("ip "IPv4_BYTES_FMT" removed from external_ip_hash\n", IP4(ip));
- return ret;
- }
- idx = rte_hash_add_key(task->external_ip_hash, (const void *)&ip);
- if (unlikely(idx < 0)) {
- // entry not found for this IP: Ignore the reply. This can happen for instance for
- // an IP used by management plane.
- plogx_dbg("IP "IPv4_BYTES_FMT" not found in external_ip_hash and unable to add it\n", IP4(ip));
- return ret;
- }
- memcpy(&task->external_ip_table[idx].mac, &mac, sizeof(prox_rte_ether_addr));
- plogx_dbg("ip "IPv4_BYTES_FMT" added in external_ip_hash with mac "MAC_BYTES_FMT"\n", IP4(ip), MAC_BYTES(mac.addr_bytes));
- return ret;
- }
-
- // entry found for this IP
- uint16_t nb_requests = task->external_ip_table[idx].nb_requests;
- if (nb_requests == 0) {
- return ret;
- }
-
- memcpy(&task->external_ip_table[idx].mac, &mac, sizeof(prox_rte_ether_addr));
-
- // If we receive a request from multiple task for the same IP, then we update all tasks
- if (unlikely(rte_mempool_get(tbase->l3.arp_pool, (void **)mbufs) != 0)) {
- plog_err("Unable to allocate a mbuf for master to core communication\n");
- return ret;
- }
- rte_mbuf_refcnt_set(mbufs[0], nb_requests);
- for (int i = 0; i < nb_requests; i++) {
- struct rte_ring *ring = task->external_ip_table[idx].rings[i];
- struct ether_hdr_arp *hdr = rte_pktmbuf_mtod(mbufs[0], struct ether_hdr_arp *);
- memcpy(&hdr->arp.data.sha, &mac, sizeof(prox_rte_ether_addr));
- tx_ring_ip(tbase, ring, UPDATE_FROM_CTRL, mbufs[0], ip);
- plog_dbg("UPDATE_FROM_CTRL ip "IPv4_BYTES_FMT" with mac "MAC_BYTES_FMT"\n", IP4(ip), MAC_BYTES(mac.addr_bytes));
- }
- task->external_ip_table[idx].nb_requests = 0;
+ handle_arp_event(tbase);
+ }
+ if (poll(&task->route_fds, 1, prox_cfg.poll_timeout) == POLL_IN) {
+ handle_route_event(tbase);
}
return ret;
}
diff --git a/VNFs/DPPD-PROX/handle_master.h b/VNFs/DPPD-PROX/handle_master.h
index 6ce51854..79154458 100644
--- a/VNFs/DPPD-PROX/handle_master.h
+++ b/VNFs/DPPD-PROX/handle_master.h
@@ -19,11 +19,15 @@
enum arp_actions {
UPDATE_FROM_CTRL,
+ ROUTE_ADD_FROM_CTRL,
+ ROUTE_DEL_FROM_CTRL,
ARP_REQ_FROM_CTRL,
ARP_REPLY_FROM_CTRL,
ICMP_FROM_CTRL,
+ BGP_FROM_CTRL,
ARP_TO_CTRL,
ICMP_TO_CTRL,
+ BGP_TO_CTRL,
REQ_MAC_TO_CTRL,
PKT_FROM_TAP,
MAX_ACTIONS
diff --git a/VNFs/DPPD-PROX/packet_utils.c b/VNFs/DPPD-PROX/packet_utils.c
index e06529c4..04746130 100644
--- a/VNFs/DPPD-PROX/packet_utils.c
+++ b/VNFs/DPPD-PROX/packet_utils.c
@@ -17,6 +17,8 @@
#include <rte_lcore.h>
#include <rte_hash.h>
#include <rte_hash_crc.h>
+#include <rte_lpm.h>
+
#include "task_base.h"
#include "lconf.h"
#include "prefetch.h"
@@ -25,6 +27,11 @@
#include "handle_master.h"
#include "prox_port_cfg.h"
#include "packet_utils.h"
+#include "prox_shared.h"
+#include "prox_lua.h"
+#include "hash_entry_types.h"
+#include "prox_compat.h"
+#include "tx_pkt.h"
static inline int find_ip(struct ether_hdr_arp *pkt, uint16_t len, uint32_t *ip_dst)
{
@@ -74,17 +81,92 @@ static inline int find_ip(struct ether_hdr_arp *pkt, uint16_t len, uint32_t *ip_
We should check arp_update_time in the master process. This would also require the generating task to clear its arp ring
to avoid sending many ARP while starting after a long stop.
We could also check for arp_timeout in the master so that dataplane has only to check whether MAC is available
- but this would require either thread safety, or the the exchange of information between master and generating core.
+ but this would require either thread safety, or the exchange of information between master and generating core.
*/
+static inline int add_key_and_send_arp(struct rte_hash *ip_hash, uint32_t *ip_dst, struct arp_table *entries, uint64_t tsc, uint64_t hz, uint32_t arp_update_time, prox_next_hop_index_type nh, uint64_t **time)
+{
+ int ret = rte_hash_add_key(ip_hash, (const void *)ip_dst);
+ if (unlikely(ret < 0)) {
+ // No reason to send ARP, as reply would be anyhow ignored
+ plogx_err("Unable to add ip "IPv4_BYTES_FMT" in mac_hash\n", IP4(*ip_dst));
+ return DROP_MBUF;
+ } else {
+ entries[ret].ip = *ip_dst;
+ entries[ret].nh = nh;
+ *time = &entries[ret].arp_update_time;
+ }
+ return SEND_ARP;
+}
+
+static inline int update_mac_and_send_mbuf(struct arp_table *entry, prox_rte_ether_addr *mac, uint64_t tsc, uint64_t hz, uint32_t arp_update_time, uint64_t **time)
+{
+ if (likely((tsc < entry->arp_update_time) && (tsc < entry->arp_timeout))) {
+ memcpy(mac, &entry->mac, sizeof(prox_rte_ether_addr));
+ return SEND_MBUF;
+ } else if (tsc > entry->arp_update_time) {
+ // long time since we have sent an arp, send arp
+ *time = &entry->arp_update_time;
+ if (tsc < entry->arp_timeout){
+ // MAC is valid in the table => send also the mbuf
+ memcpy(mac, &entry->mac, sizeof(prox_rte_ether_addr));
+ return SEND_MBUF_AND_ARP;
+ } else {
+ // MAC still unknown, or timed out => only send ARP
+ return SEND_ARP;
+ }
+ }
+ // MAC is unknown and we already sent an ARP recently, drop mbuf and wait for ARP reply
+ return DROP_MBUF;
+}
+
int write_dst_mac(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t *ip_dst, uint64_t **time)
{
const uint64_t hz = rte_get_tsc_hz();
struct ether_hdr_arp *packet = rte_pktmbuf_mtod(mbuf, struct ether_hdr_arp *);
prox_rte_ether_addr *mac = &packet->ether_hdr.d_addr;
+ prox_next_hop_index_type next_hop_index;
uint64_t tsc = rte_rdtsc();
struct l3_base *l3 = &(tbase->l3);
+
+ // First find the next hop
+ if (l3->ipv4_lpm) {
+ // A routing table was configured
+ // If a gw (gateway_ipv4) is also specified, it is used as default gw only i.e. lowest priority (shortest prefix)
+ // This is implemented automatically through lpm
+ uint16_t len = rte_pktmbuf_pkt_len(mbuf);
+ if (find_ip(packet, len, ip_dst) != 0) {
+ // Unable to find IP address => non IP packet => send it as it
+ return SEND_MBUF;
+ }
+ if (unlikely(rte_lpm_lookup(l3->ipv4_lpm, rte_bswap32(*ip_dst), &next_hop_index) != 0)) {
+ plog_err("No route to IP "IPv4_BYTES_FMT"\n", IP4(*ip_dst));
+ return DROP_MBUF;
+ }
+ struct arp_table *entry = &l3->next_hops[next_hop_index];
+
+ if (entry->ip) {
+ *ip_dst = entry->ip;
+ } else {
+ // no next ip: this is a local route
+ next_hop_index = MAX_HOP_INDEX;
+ }
+ // Find IP in lookup table. Send ARP if not found
+ int ret = rte_hash_lookup(l3->ip_hash, (const void *)ip_dst);
+ if (unlikely(ret < 0)) {
+ // IP not found, try to send an ARP
+ return add_key_and_send_arp(l3->ip_hash, ip_dst, l3->arp_table, tsc, hz, l3->arp_update_time, next_hop_index, time);
+ } else {
+ if (entry->ip)
+ return update_mac_and_send_mbuf(entry, mac, tsc, hz, l3->arp_update_time, time);
+ else
+ return update_mac_and_send_mbuf(&l3->arp_table[ret], mac, tsc, hz, l3->arp_update_time, time);
+ }
+ return 0;
+ }
+ // No Routing table specified: only a local ip and maybe a gateway
+ // Old default behavior: if a gw is specified, ALL packets go to this gateway (even those we could send w/o the gw
if (l3->gw.ip) {
if (likely((l3->flags & FLAG_DST_MAC_KNOWN) && (tsc < l3->gw.arp_update_time) && (tsc < l3->gw.arp_timeout))) {
memcpy(mac, &l3->gw.mac, sizeof(prox_rte_ether_addr));
@@ -117,25 +199,7 @@ int write_dst_mac(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t *ip_d
for (unsigned int idx = 0; idx < l3->n_pkts; idx++) {
if (*ip_dst == l3->optimized_arp_table[idx].ip) {
// IP address already in table
- if ((tsc < l3->optimized_arp_table[idx].arp_update_time) && (tsc < l3->optimized_arp_table[idx].arp_timeout)) {
- // MAC address was recently updated in table, use it
- memcpy(mac, &l3->optimized_arp_table[idx].mac, sizeof(prox_rte_ether_addr));
- return SEND_MBUF;
- } else if (tsc > l3->optimized_arp_table[idx].arp_update_time) {
- // ARP not sent since a long time, send ARP
- *time = &l3->optimized_arp_table[idx].arp_update_time;
- if (tsc < l3->optimized_arp_table[idx].arp_timeout) {
- // MAC still valid => also send mbuf
- memcpy(mac, &l3->optimized_arp_table[idx].mac, sizeof(prox_rte_ether_addr));
- return SEND_MBUF_AND_ARP;
- } else {
- // MAC unvalid => only send ARP
- return SEND_ARP;
- }
- } else {
- // ARP timeout elapsed, MAC not valid anymore but waiting for ARP reply
- return DROP_MBUF;
- }
+ return update_mac_and_send_mbuf(&l3->optimized_arp_table[idx], mac, tsc, hz, l3->arp_update_time, time);
}
}
// IP address not found in table
@@ -156,7 +220,7 @@ int write_dst_mac(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t *ip_d
// If it happens, we still send the ARP as easier:
// If the ARP corresponds to this error, the ARP reply will be ignored
// If ARP does not correspond to this error/ip, then ARP reply will be handled.
- plogx_err("Unable add ip %d.%d.%d.%d in mac_hash (already %d entries)\n", IP4(ip), idx);
+ plogx_err("Unable add ip "IPv4_BYTES_FMT" in mac_hash (already %d entries)\n", IP4(ip), idx);
} else {
memcpy(&l3->arp_table[ret], &l3->optimized_arp_table[idx], sizeof(struct arp_table));
}
@@ -167,35 +231,10 @@ int write_dst_mac(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t *ip_d
int ret = rte_hash_lookup(l3->ip_hash, (const void *)ip_dst);
if (unlikely(ret < 0)) {
// IP not found, try to send an ARP
- int ret = rte_hash_add_key(l3->ip_hash, (const void *)ip_dst);
- if (ret < 0) {
- // No reason to send ARP, as reply would be anyhow ignored
- plogx_err("Unable to add ip %d.%d.%d.%d in mac_hash\n", IP4(*ip_dst));
- return DROP_MBUF;
- } else {
- l3->arp_table[ret].ip = *ip_dst;
- *time = &l3->arp_table[ret].arp_update_time;
- }
- return SEND_ARP;
+ return add_key_and_send_arp(l3->ip_hash, ip_dst, &l3->arp_table[ret], tsc, hz, l3->arp_update_time, MAX_HOP_INDEX, time);
} else {
// IP has been found
- if (likely((tsc < l3->arp_table[ret].arp_update_time) && (tsc < l3->arp_table[ret].arp_timeout))) {
- // MAC still valid and ARP sent recently
- memcpy(mac, &l3->arp_table[ret].mac, sizeof(prox_rte_ether_addr));
- return SEND_MBUF;
- } else if (tsc > l3->arp_table[ret].arp_update_time) {
- // ARP not sent since a long time, send ARP
- *time = &l3->arp_table[ret].arp_update_time;
- if (tsc < l3->arp_table[ret].arp_timeout) {
- // MAC still valid => send also MBUF
- memcpy(mac, &l3->arp_table[ret].mac, sizeof(prox_rte_ether_addr));
- return SEND_MBUF_AND_ARP;
- } else {
- return SEND_ARP;
- }
- } else {
- return DROP_MBUF;
- }
+ return update_mac_and_send_mbuf(&l3->arp_table[ret], mac, tsc, hz, l3->arp_update_time, time);
}
}
// Should not happen
@@ -248,6 +287,7 @@ void task_start_l3(struct task_base *tbase, struct task_args *targ)
const int NB_ARP_MBUF = 1024;
const int ARP_MBUF_SIZE = 2048;
const int NB_CACHE_ARP_MBUF = 256;
+ const int socket_id = rte_lcore_to_socket_id(targ->lconf->id);
struct prox_port_cfg *port = find_reachable_port(targ);
if (port && (tbase->l3.arp_pool == NULL)) {
@@ -257,6 +297,45 @@ void task_start_l3(struct task_base *tbase, struct task_args *targ)
tbase->local_ipv4 = rte_be_to_cpu_32(targ->local_ipv4);
register_ip_to_ctrl_plane(tbase->l3.tmaster, tbase->local_ipv4, tbase->l3.reachable_port_id, targ->lconf->id, targ->id);
}
+ if (strcmp(targ->route_table, "") != 0) {
+ struct lpm4 *lpm;
+ int ret;
+
+ PROX_PANIC(tbase->local_ipv4 == 0, "missing local_ipv4 will route table is specified in L3 mode\n");
+
+ // LPM might be modified runtime => do not share with other cores
+ ret = lua_to_lpm4(prox_lua(), GLOBAL, targ->route_table, socket_id, &lpm);
+ PROX_PANIC(ret, "Failed to load IPv4 LPM:\n%s\n", get_lua_to_errors());
+
+ tbase->l3.ipv4_lpm = lpm->rte_lpm;
+ tbase->l3.next_hops = prox_zmalloc(sizeof(*tbase->l3.next_hops) * MAX_HOP_INDEX, socket_id);
+ PROX_PANIC(tbase->l3.next_hops == NULL, "Could not allocate memory for next hop\n");
+
+ for (uint32_t i = 0; i < MAX_HOP_INDEX; i++) {
+ if (!lpm->next_hops[i].ip_dst)
+ continue;
+ tbase->l3.nb_gws++;
+ tbase->l3.next_hops[i].ip = rte_bswap32(lpm->next_hops[i].ip_dst);
+ int tx_port = lpm->next_hops[i].mac_port.out_idx;
+ // gen only supports one port right now .... hence port = 0
+ if ((tx_port > targ->nb_txports - 1) && (tx_port > targ->nb_txrings - 1)) {
+ PROX_PANIC(1, "Routing Table contains port %d but only %d tx port/ %d ring:\n", tx_port, targ->nb_txports, targ->nb_txrings);
+ }
+ }
+ plog_info("Using routing table %s in l3 mode, with %d gateways\n", targ->route_table, tbase->l3.nb_gws);
+
+ // Last but one "next_hop_index" is not a gateway but direct routes
+ tbase->l3.next_hops[tbase->l3.nb_gws].ip = 0;
+ ret = rte_lpm_add(tbase->l3.ipv4_lpm, targ->local_ipv4, targ->local_prefix, tbase->l3.nb_gws++);
+ PROX_PANIC(ret, "Failed to add local_ipv4 "IPv4_BYTES_FMT"/%d to lpm\n", IP4(tbase->local_ipv4), targ->local_prefix);
+ // Last "next_hop_index" is default gw
+ tbase->l3.next_hops[tbase->l3.nb_gws].ip = rte_bswap32(targ->gateway_ipv4);
+ if (targ->gateway_ipv4) {
+ ret = rte_lpm_add(tbase->l3.ipv4_lpm, targ->gateway_ipv4, 0, tbase->l3.nb_gws++);
+ PROX_PANIC(ret, "Failed to add gateway_ipv4 "IPv4_BYTES_FMT"/%d to lpm\n", IP4(tbase->l3.gw.ip), 0);
+ }
+ }
+
master_init_vdev(tbase->l3.tmaster, tbase->l3.reachable_port_id, targ->lconf->id, targ->id);
name[3]++;
struct rte_mempool *ret = rte_mempool_create(name, NB_ARP_MBUF, ARP_MBUF_SIZE, NB_CACHE_ARP_MBUF,
@@ -284,8 +363,13 @@ void task_set_local_ip(struct task_base *tbase, uint32_t ip)
static void reset_arp_update_time(struct l3_base *l3, uint32_t ip)
{
uint32_t idx;
- plogx_info("\tMAC entry for IP "IPv4_BYTES_FMT" timeout in kernel\n", IP4(ip));
- if (ip == l3->gw.ip) {
+ plogx_dbg("MAC entry for IP "IPv4_BYTES_FMT" timeout in kernel\n", IP4(ip));
+
+ if (l3->ipv4_lpm) {
+ int ret = rte_hash_lookup(l3->ip_hash, (const void *)&ip);
+ if (ret >= 0)
+ l3->arp_table[ret].arp_update_time = 0;
+ } else if (ip == l3->gw.ip) {
l3->gw.arp_update_time = 0;
} else if (l3->n_pkts < 4) {
for (idx = 0; idx < l3->n_pkts; idx++) {
@@ -304,17 +388,34 @@ static void reset_arp_update_time(struct l3_base *l3, uint32_t ip)
return;
}
+static prox_next_hop_index_type get_nh_index(struct task_base *tbase, uint32_t gw_ip)
+{
+ // Check if gateway already exists
+ for (prox_next_hop_index_type i = 0; i < tbase->l3.nb_gws; i++) {
+ if (tbase->l3.next_hops[i].ip == gw_ip) {
+ return i;
+ }
+ }
+ if (tbase->l3.nb_gws < MAX_HOP_INDEX) {
+ tbase->l3.next_hops[tbase->l3.nb_gws].ip = gw_ip;
+ tbase->l3.nb_gws++;
+ return tbase->l3.nb_gws - 1;
+ } else
+ return MAX_HOP_INDEX;
+}
void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
{
uint8_t out[1];
const uint64_t hz = rte_get_tsc_hz();
- uint32_t ip, ip_dst, idx;
- int j;
+ uint32_t ip, ip_dst, idx, gateway_ip, prefix;
+ prox_next_hop_index_type gateway_index;
+ int j, ret, modified_route;
uint16_t command;
struct ether_hdr_arp *hdr;
struct l3_base *l3 = &tbase->l3;
uint64_t tsc= rte_rdtsc();
- uint64_t update_time = l3->arp_timeout * hz / 1000;
+ uint64_t arp_timeout = l3->arp_timeout * hz / 1000;
+ uint32_t nh;
for (j = 0; j < n_pkts; ++j) {
PREFETCH0(mbufs[j]);
@@ -328,6 +429,38 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui
command = mbufs[j]->udata64 & 0xFFFF;
plogx_dbg("\tReceived %s mbuf %p\n", actions_string[command], mbufs[j]);
switch(command) {
+ case ROUTE_ADD_FROM_CTRL:
+ ip = ctrl_ring_get_ip(mbufs[j]);
+ gateway_ip = ctrl_ring_get_gateway_ip(mbufs[j]);
+ prefix = ctrl_ring_get_prefix(mbufs[j]);
+ gateway_index = get_nh_index(tbase, gateway_ip);
+ if (gateway_index >= MAX_HOP_INDEX) {
+ plog_err("Unable to find or define gateway index - too many\n");
+ return;
+ }
+ modified_route = rte_lpm_is_rule_present(tbase->l3.ipv4_lpm, rte_bswap32(ip), prefix, &nh);
+ ret = rte_lpm_add(tbase->l3.ipv4_lpm, rte_bswap32(ip), prefix, gateway_index);
+ if (ret < 0) {
+ plog_err("Failed to add route to "IPv4_BYTES_FMT"/%d using "IPv4_BYTES_FMT"(index = %d)\n", IP4(ip), prefix, IP4(gateway_ip), gateway_index);
+ } else if (modified_route)
+ plogx_dbg("Modified route to "IPv4_BYTES_FMT"/%d using "IPv4_BYTES_FMT"(index = %d) (was using "IPv4_BYTES_FMT"(index = %d)\n", IP4(ip), prefix, IP4(gateway_ip), gateway_index, IP4(tbase->l3.next_hops[nh].ip), nh);
+ else {
+ plogx_dbg("Added new route to "IPv4_BYTES_FMT"/%d using "IPv4_BYTES_FMT"(index = %d)\n", IP4(ip), prefix, IP4(gateway_ip), gateway_index);
+ }
+ break;
+ case ROUTE_DEL_FROM_CTRL:
+ ip = ctrl_ring_get_ip(mbufs[j]);
+ prefix = ctrl_ring_get_prefix(mbufs[j]);
+
+ ret = rte_lpm_is_rule_present(tbase->l3.ipv4_lpm, rte_bswap32(ip), prefix, &nh);
+ if (ret > 0) {
+ ret = rte_lpm_delete(tbase->l3.ipv4_lpm, rte_bswap32(ip), prefix);
+ if (ret < 0) {
+ plog_err("Failed to add rule\n");
+ }
+ plog_info("Deleting route to "IPv4_BYTES_FMT"/%d\n", IP4(ip), prefix);
+ }
+ break;
case UPDATE_FROM_CTRL:
hdr = rte_pktmbuf_mtod(mbufs[j], struct ether_hdr_arp *);
ip = (mbufs[j]->udata64 >> 32) & 0xFFFFFFFF;
@@ -337,16 +470,33 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui
// This will cause us to send new ARP request
// However, as arp_timeout not touched, we should continue sending our regular IP packets
reset_arp_update_time(l3, ip);
- plogx_info("\tTimeout for MAC entry for IP "IPv4_BYTES_FMT"\n", IP4(ip));
return;
} else
plogx_dbg("\tUpdating MAC entry for IP "IPv4_BYTES_FMT" with MAC "MAC_BYTES_FMT"\n",
IP4(ip), MAC_BYTES(hdr->arp.data.sha.addr_bytes));
- if (ip == l3->gw.ip) {
+
+ if (l3->ipv4_lpm) {
+ uint32_t nh;
+ struct arp_table *entry;
+ ret = rte_hash_add_key(l3->ip_hash, (const void *)&ip);
+ if (ret < 0) {
+ plogx_info("Unable add ip "IPv4_BYTES_FMT" in mac_hash\n", IP4(ip));
+ } else if ((nh = l3->arp_table[ret].nh) != MAX_HOP_INDEX) {
+ entry = &l3->next_hops[nh];
+ memcpy(&entry->mac, &(hdr->arp.data.sha), sizeof(prox_rte_ether_addr));
+ entry->arp_timeout = tsc + arp_timeout;
+ update_arp_update_time(l3, &entry->arp_update_time, l3->arp_update_time);
+ } else {
+ memcpy(&l3->arp_table[ret].mac, &(hdr->arp.data.sha), sizeof(prox_rte_ether_addr));
+ l3->arp_table[ret].arp_timeout = tsc + arp_timeout;
+ update_arp_update_time(l3, &l3->arp_table[ret].arp_update_time, l3->arp_update_time);
+ }
+ }
+ else if (ip == l3->gw.ip) {
// MAC address of the gateway
memcpy(&l3->gw.mac, &hdr->arp.data.sha, 6);
l3->flags |= FLAG_DST_MAC_KNOWN;
- l3->gw.arp_timeout = tsc + update_time;
+ l3->gw.arp_timeout = tsc + arp_timeout;
update_arp_update_time(l3, &l3->gw.arp_update_time, l3->arp_update_time);
} else if (l3->n_pkts < 4) {
// Few packets tracked - should be faster to loop through them thean using a hash table
@@ -357,28 +507,40 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui
}
if (idx < l3->n_pkts) {
memcpy(&l3->optimized_arp_table[idx].mac, &(hdr->arp.data.sha), sizeof(prox_rte_ether_addr));
- l3->optimized_arp_table[idx].arp_timeout = tsc + update_time;
+ l3->optimized_arp_table[idx].arp_timeout = tsc + arp_timeout;
update_arp_update_time(l3, &l3->optimized_arp_table[idx].arp_update_time, l3->arp_update_time);
}
} else {
- int ret = rte_hash_add_key(l3->ip_hash, (const void *)&ip);
+ ret = rte_hash_add_key(l3->ip_hash, (const void *)&ip);
if (ret < 0) {
- plogx_info("Unable add ip %d.%d.%d.%d in mac_hash\n", IP4(ip));
+ plogx_info("Unable add ip "IPv4_BYTES_FMT" in mac_hash\n", IP4(ip));
} else {
memcpy(&l3->arp_table[ret].mac, &(hdr->arp.data.sha), sizeof(prox_rte_ether_addr));
- l3->arp_table[ret].arp_timeout = tsc + update_time;
+ l3->arp_table[ret].arp_timeout = tsc + arp_timeout;
update_arp_update_time(l3, &l3->arp_table[ret].arp_update_time, l3->arp_update_time);
}
}
tx_drop(mbufs[j]);
break;
case ARP_REPLY_FROM_CTRL:
- case ICMP_FROM_CTRL:
case ARP_REQ_FROM_CTRL:
+ out[0] = 0;
+ // tx_ctrlplane_pkt does not drop packets
+ plogx_dbg("\tForwarding (ARP) packet from master\n");
+ tbase->aux->tx_ctrlplane_pkt(tbase, &mbufs[j], 1, out);
+ TASK_STATS_ADD_TX_NON_DP(&tbase->aux->stats, 1);
+ break;
+ case ICMP_FROM_CTRL:
+ out[0] = 0;
+ // tx_ctrlplane_pkt does not drop packets
+ plogx_dbg("\tForwarding (PING) packet from master\n");
+ tbase->aux->tx_ctrlplane_pkt(tbase, &mbufs[j], 1, out);
+ TASK_STATS_ADD_TX_NON_DP(&tbase->aux->stats, 1);
+ break;
case PKT_FROM_TAP:
out[0] = 0;
// tx_ctrlplane_pkt does not drop packets
- plogx_dbg("\tForwarding (ARP/PING) packet from master\n");
+ plogx_dbg("\tForwarding TAP packet from master\n");
tbase->aux->tx_ctrlplane_pkt(tbase, &mbufs[j], 1, out);
TASK_STATS_ADD_TX_NON_DP(&tbase->aux->stats, 1);
break;
diff --git a/VNFs/DPPD-PROX/packet_utils.h b/VNFs/DPPD-PROX/packet_utils.h
index a111b944..021528de 100644
--- a/VNFs/DPPD-PROX/packet_utils.h
+++ b/VNFs/DPPD-PROX/packet_utils.h
@@ -27,7 +27,7 @@
#define FLAG_DST_MAC_KNOWN 1
#define MAX_ARP_ENTRIES 65536
-#define IP4(x) x & 0xff, (x >> 8) & 0xff, (x >> 16) & 0xff, x >> 24
+#define IP4(x) x & 0xff, (x >> 8) & 0xff, (x >> 16) & 0xff, x >> 24 // From Network (BE)
enum {
SEND_MBUF_AND_ARP,
SEND_MBUF,
@@ -43,6 +43,7 @@ struct arp_table {
uint64_t arp_update_time;
uint64_t arp_timeout;
uint32_t ip;
+ uint32_t nh;
prox_rte_ether_addr mac;
};
struct l3_base {
@@ -55,12 +56,15 @@ struct l3_base {
uint8_t task_id;
uint32_t arp_timeout;
uint32_t arp_update_time;
+ uint seed;
+ prox_next_hop_index_type nb_gws;
struct arp_table gw;
struct arp_table optimized_arp_table[4];
struct rte_hash *ip_hash;
struct arp_table *arp_table;
struct rte_mempool *arp_pool;
- uint seed;
+ struct rte_lpm *ipv4_lpm;
+ struct arp_table *next_hops;
};
void task_init_l3(struct task_base *tbase, struct task_args *targ);
@@ -69,6 +73,7 @@ int write_dst_mac(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t *ip_d
void task_set_gateway_ip(struct task_base *tbase, uint32_t ip);
void task_set_local_ip(struct task_base *tbase, uint32_t ip);
void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts);
+
static inline void update_arp_update_time(struct l3_base *l3, uint64_t *ptr, uint32_t base)
{
// randomize timers - from 0.5 to 1.5 * configured time
@@ -77,5 +82,4 @@ static inline void update_arp_update_time(struct l3_base *l3, uint64_t *ptr, uin
uint64_t rand = 500 + (1000L * rand_r(&l3->seed)) / RAND_MAX;
*ptr = tsc + (base * rand / 1000) * hz / 1000;
}
-
#endif /* _PACKET_UTILS_H_ */
diff --git a/VNFs/DPPD-PROX/prox_args.c b/VNFs/DPPD-PROX/prox_args.c
index 41082209..30b4cbd7 100644
--- a/VNFs/DPPD-PROX/prox_args.c
+++ b/VNFs/DPPD-PROX/prox_args.c
@@ -1389,10 +1389,23 @@ static int get_core_cfg(unsigned sindex, char *str, void *data)
if (STR_EQ(str, "gateway ipv4")) { /* Gateway IP address used when generating */
if ((targ->flags & TASK_ARG_L3) == 0)
plog_warn("gateway ipv4 configured but L3 sub mode not enabled\n");
+ if (targ->local_ipv4)
+ targ->local_prefix = 32;
return parse_ip(&targ->gateway_ipv4, pkey);
}
if (STR_EQ(str, "local ipv4")) { /* source IP address to be used for packets */
- return parse_ip(&targ->local_ipv4, pkey);
+ struct ip4_subnet cidr;
+ if (parse_ip4_cidr(&cidr, pkey) != 0) {
+ if (targ->gateway_ipv4)
+ targ->local_prefix = 32;
+ else
+ targ->local_prefix = 0;
+ return parse_ip(&targ->local_ipv4, pkey);
+ } else {
+ targ->local_ipv4 = cidr.ip;
+ targ->local_prefix = cidr.prefix;
+ return 0;
+ }
}
if (STR_EQ(str, "remote ipv4")) { /* source IP address to be used for packets */
return parse_ip(&targ->remote_ipv4, pkey);
diff --git a/VNFs/DPPD-PROX/prox_compat.h b/VNFs/DPPD-PROX/prox_compat.h
index e181cd8e..bd059a6c 100644
--- a/VNFs/DPPD-PROX/prox_compat.h
+++ b/VNFs/DPPD-PROX/prox_compat.h
@@ -37,6 +37,12 @@ struct prox_rte_table_params {
uint64_t seed;
};
+#if RTE_VERSION < RTE_VERSION_NUM(16,4,0,1)
+typedef uint8_t prox_next_hop_index_type;
+#else
+typedef uint32_t prox_next_hop_index_type;
+#endif
+
#if RTE_VERSION < RTE_VERSION_NUM(17,11,0,0)
static void *prox_rte_table_create(struct prox_rte_table_params *params, int socket_id, uint32_t entry_size)
diff --git a/VNFs/DPPD-PROX/prox_lua_types.c b/VNFs/DPPD-PROX/prox_lua_types.c
index 3ef3d472..bc1671d4 100644
--- a/VNFs/DPPD-PROX/prox_lua_types.c
+++ b/VNFs/DPPD-PROX/prox_lua_types.c
@@ -437,11 +437,11 @@ int lua_to_next_hop(struct lua_State *L, enum lua_place from, const char *name,
while (lua_next(L, -2)) {
if (lua_to_int(L, TABLE, "id", &next_hop_index) ||
lua_to_int(L, TABLE, "port_id", &port_id) ||
- lua_to_ip(L, TABLE, "ip", &ip) ||
- lua_to_mac(L, TABLE, "mac", &mac) ||
- lua_to_int(L, TABLE, "mpls", &mpls))
+ lua_to_ip(L, TABLE, "ip", &ip))
return -1;
+ lua_to_mac(L, TABLE, "mac", &mac);
+ lua_to_int(L, TABLE, "mpls", &mpls);
PROX_PANIC(port_id >= PROX_MAX_PORTS, "Port id too high (only supporting %d ports)\n", PROX_MAX_PORTS);
PROX_PANIC(next_hop_index >= MAX_HOP_INDEX, "Next-hop to high (only supporting %d next hops)\n", MAX_HOP_INDEX);
@@ -504,6 +504,7 @@ int lua_to_next_hop6(struct lua_State *L, enum lua_place from, const char *name,
return 0;
}
+#define MAX_NEW_RULES 128
int lua_to_routes4(struct lua_State *L, enum lua_place from, const char *name, uint8_t socket, struct lpm4 *lpm)
{
struct ip4_subnet dst;
@@ -514,11 +515,12 @@ int lua_to_routes4(struct lua_State *L, enum lua_place from, const char *name, u
char lpm_name[64];
int ret;
int pop;
+ static int count = 1;
if ((pop = lua_getfrom(L, from, name)) < 0)
return -1;
- snprintf(lpm_name, sizeof(lpm_name), "IPv4_lpm_s%u", socket);
+ snprintf(lpm_name, sizeof(lpm_name), "IPv4_lpm_s%u_%d", socket, count++);
if (!lua_istable(L, -1)) {
set_err("Data is not a table\n");
@@ -531,12 +533,12 @@ int lua_to_routes4(struct lua_State *L, enum lua_place from, const char *name, u
lua_pop(L, 1);
#if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,1)
struct rte_lpm_config conf;
- conf.max_rules = 2 * n_tot_rules;
+ conf.max_rules = 2 * n_tot_rules + MAX_NEW_RULES;
conf.number_tbl8s = 256;
conf.flags = 0;
new_lpm = rte_lpm_create(lpm_name, socket, &conf);
#else
- new_lpm = rte_lpm_create(lpm_name, socket, 2 * n_tot_rules, 0);
+ new_lpm = rte_lpm_create(lpm_name, socket, 2 * n_tot_rules + MAX_NEW_RULES, 0);
#endif
PROX_PANIC(NULL == new_lpm, "Failed to allocate lpm\n");
diff --git a/VNFs/DPPD-PROX/rx_pkt.c b/VNFs/DPPD-PROX/rx_pkt.c
index 4832066a..6a6112b5 100644
--- a/VNFs/DPPD-PROX/rx_pkt.c
+++ b/VNFs/DPPD-PROX/rx_pkt.c
@@ -30,6 +30,8 @@
#include "handle_master.h"
#include "input.h" /* Needed for callback on dump */
+#define TCP_PORT_BGP rte_cpu_to_be_16(179)
+
/* _param version of the rx_pkt_hw functions are used to create two
instances of very similar variations of these functions. The
variations are specified by the "multi" parameter which significies
@@ -138,10 +140,15 @@ static uint16_t rx_pkt_hw_param(struct task_base *tbase, struct rte_mbuf ***mbuf
if (likely(hdr_arp[i]->ether_hdr.ether_type == ETYPE_IPv4)) {
hdr = (prox_rte_ether_hdr *)hdr_arp[i];
prox_rte_ipv4_hdr *pip = (prox_rte_ipv4_hdr *)(hdr + 1);
+ prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(pip + 1);
if (pip->next_proto_id == IPPROTO_ICMP) {
dump_l3(tbase, mbufs[i]);
tx_ring(tbase, tbase->l3.ctrl_plane_ring, ICMP_TO_CTRL, mbufs[i]);
skip++;
+ } else if ((tcp->src_port == TCP_PORT_BGP) || (tcp->dst_port == TCP_PORT_BGP)) {
+ dump_l3(tbase, mbufs[i]);
+ tx_ring(tbase, tbase->l3.ctrl_plane_ring, BGP_TO_CTRL, mbufs[i]);
+ skip++;
} else if (unlikely(skip)) {
mbufs[i - skip] = mbufs[i];
}
@@ -202,13 +209,19 @@ static inline uint16_t rx_pkt_hw1_param(struct task_base *tbase, struct rte_mbuf
PREFETCH0(hdr_arp[i]);
}
for (i = 0; i < nb_rx; i++) {
+ // plog_info("ether_type = %x\n", hdr_arp[i]->ether_hdr.ether_type);
if (likely(hdr_arp[i]->ether_hdr.ether_type == ETYPE_IPv4)) {
hdr = (prox_rte_ether_hdr *)hdr_arp[i];
prox_rte_ipv4_hdr *pip = (prox_rte_ipv4_hdr *)(hdr + 1);
+ prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(pip + 1);
if (pip->next_proto_id == IPPROTO_ICMP) {
dump_l3(tbase, mbufs[i]);
tx_ring(tbase, tbase->l3.ctrl_plane_ring, ICMP_TO_CTRL, mbufs[i]);
skip++;
+ } else if ((tcp->src_port == TCP_PORT_BGP) || (tcp->dst_port == TCP_PORT_BGP)) {
+ dump_l3(tbase, mbufs[i]);
+ tx_ring(tbase, tbase->l3.ctrl_plane_ring, BGP_TO_CTRL, mbufs[i]);
+ skip++;
} else if (unlikely(skip)) {
mbufs[i - skip] = mbufs[i];
}
diff --git a/VNFs/DPPD-PROX/task_init.h b/VNFs/DPPD-PROX/task_init.h
index 4108f54d..98c0a8dc 100644
--- a/VNFs/DPPD-PROX/task_init.h
+++ b/VNFs/DPPD-PROX/task_init.h
@@ -129,6 +129,7 @@ struct task_args {
uint32_t gateway_ipv4;
uint32_t local_ipv4;
uint32_t remote_ipv4;
+ uint32_t local_prefix;
uint32_t arp_timeout;
uint32_t arp_update_time;
struct ipv6_addr local_ipv6; /* For IPv6 Tunnel, it's the local tunnel endpoint address */
diff --git a/VNFs/DPPD-PROX/tx_pkt.c b/VNFs/DPPD-PROX/tx_pkt.c
index 8bf501f6..2a4f53b2 100644
--- a/VNFs/DPPD-PROX/tx_pkt.c
+++ b/VNFs/DPPD-PROX/tx_pkt.c
@@ -845,3 +845,73 @@ void tx_ring(struct task_base *tbase, struct rte_ring *ring, uint16_t command,
rte_pktmbuf_free(mbuf);
}
}
+
+void tx_ring_route(struct task_base *tbase, struct rte_ring *ring, int add, struct rte_mbuf *mbuf, uint32_t ip, uint32_t gateway_ip, uint32_t prefix)
+{
+ uint8_t command;
+ if (add)
+ command = ROUTE_ADD_FROM_CTRL;
+ else
+ command = ROUTE_DEL_FROM_CTRL;
+
+ plogx_dbg("\tSending command %s to ring %p using mbuf %p - ring size now %d\n", actions_string[command], ring, mbuf, rte_ring_free_count(ring));
+ ctrl_ring_set_command(mbuf, command);
+ ctrl_ring_set_ip(mbuf, ip);
+ ctrl_ring_set_gateway_ip(mbuf, gateway_ip);
+ ctrl_ring_set_prefix(mbuf, prefix);
+ if (tbase->aux->task_rt_dump.cur_trace) {
+ trace_one_rx_pkt(tbase, mbuf);
+ }
+ int ret = rte_ring_enqueue(ring, mbuf);
+ if (unlikely(ret != 0)) {
+ plogx_dbg("\tFail to send command %s to ring %p using mbuf %p - ring size now %d\n", actions_string[command], ring, mbuf, rte_ring_free_count(ring));
+ TASK_STATS_ADD_DROP_DISCARD(&tbase->aux->stats, 1);
+ rte_pktmbuf_free(mbuf);
+ }
+}
+
+void ctrl_ring_set_command(struct rte_mbuf *mbuf, uint64_t udata64)
+{
+ mbuf->udata64 = udata64;
+}
+
+uint64_t ctrl_ring_get_command(struct rte_mbuf *mbuf)
+{
+ return mbuf->udata64;
+}
+
+void ctrl_ring_set_ip(struct rte_mbuf *mbuf, uint32_t udata32)
+{
+ struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom));
+ prox_headroom->ip = udata32;
+}
+
+uint32_t ctrl_ring_get_ip(struct rte_mbuf *mbuf)
+{
+ struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom));
+ return prox_headroom->ip;
+}
+
+void ctrl_ring_set_gateway_ip(struct rte_mbuf *mbuf, uint32_t udata32)
+{
+ struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom));
+ prox_headroom->gateway_ip = udata32;
+}
+
+uint32_t ctrl_ring_get_gateway_ip(struct rte_mbuf *mbuf)
+{
+ struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom));
+ return prox_headroom->gateway_ip;
+}
+
+void ctrl_ring_set_prefix(struct rte_mbuf *mbuf, uint32_t udata32)
+{
+ struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom));
+ prox_headroom->prefix = udata32;
+}
+
+uint32_t ctrl_ring_get_prefix(struct rte_mbuf *mbuf)
+{
+ struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom));
+ return prox_headroom->prefix;
+}
diff --git a/VNFs/DPPD-PROX/tx_pkt.h b/VNFs/DPPD-PROX/tx_pkt.h
index 708a9837..f7443cf4 100644
--- a/VNFs/DPPD-PROX/tx_pkt.h
+++ b/VNFs/DPPD-PROX/tx_pkt.h
@@ -22,6 +22,13 @@
struct task_base;
struct rte_mbuf;
+struct prox_headroom {
+ uint64_t command;
+ uint32_t ip;
+ uint32_t prefix;
+ uint32_t gateway_ip;
+} __attribute__((packed));
+
void flush_queues_hw(struct task_base *tbase);
void flush_queues_sw(struct task_base *tbase);
@@ -86,4 +93,14 @@ int tx_ring_cti(struct task_base *tbase, struct rte_ring *ring, uint16_t command
void tx_ring_ip(struct task_base *tbase, struct rte_ring *ring, uint16_t command, struct rte_mbuf *mbuf, uint32_t ip);
void tx_ring(struct task_base *tbase, struct rte_ring *ring, uint16_t command, struct rte_mbuf *mbuf);
+void ctrl_ring_set_command(struct rte_mbuf *mbuf, uint64_t udata64);
+uint64_t ctrl_ring_get_command(struct rte_mbuf *mbuf);
+void ctrl_ring_set_ip(struct rte_mbuf *mbuf, uint32_t udata32);
+uint32_t ctrl_ring_get_ip(struct rte_mbuf *mbuf);
+void ctrl_ring_set_gateway_ip(struct rte_mbuf *mbuf, uint32_t udata32);
+uint32_t ctrl_ring_get_gateway_ip(struct rte_mbuf *mbuf);
+void ctrl_ring_set_prefix(struct rte_mbuf *mbuf, uint32_t udata32);
+uint32_t ctrl_ring_get_prefix(struct rte_mbuf *mbuf);
+void tx_ring_route(struct task_base *tbase, struct rte_ring *ring, int add, struct rte_mbuf *mbuf, uint32_t ip, uint32_t gateway_ip, uint32_t prefix);
+
#endif /* _TX_PKT_H_ */