From c871c361f9d69a93429ae385e7dbf21a46aa6857 Mon Sep 17 00:00:00 2001 From: Xavier Simonart Date: Mon, 11 May 2020 21:00:33 +0200 Subject: Added initial support for NDP (IPv6) Following messages are now handled by PROX - router_solicitation - neighbour_solicitation - router_advertisement - neighbour_advertisement The following parameters are supported (through the PROX config file) - sub mode=ndp This will enable handling of router and neighbour solicitation and advertisement. - local ipv6=xxxx:xxxx:xxxxx:xxxx:xxxx:xxxx:xxxx:xxxx This will configure the local IPv6 address of the port. This parameter is optional. If not specified, the local IPv6 will be calculated from the EUI. - global ipv6=xxxx:xxxx:xxxxx:xxxx:xxxx:xxxx:xxxx:xxxx This will configure the global IPv6 address of the port. This parameter is optional. If not specified, the global IPv6 will be calculated from the EUI and the router prefix received from the router. - ipv6 router=yes This will cause the core to behave as an IPv6 router i.e. it will generate Router Advertisement messages This is only useful in back to back cases, when no real IPv6 router is present in the setup. - router prefix=xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx The router prefix usedin the router advertisement The prefix will be used by the node to build an IPv6 global address in cases none were configured. "Unsollicited NA" parameter has been added within the core/task section. If set to yes (Unsollicited NA=yes), then an unsollicited neighbour Advertisement is sent at startup A same core/task cannot support both l3 and ndp mode. Those messages will be generated or handled when submode is set to "ndp": - neighbour sollicitation - neighbour advertisement - router sollicitation - router advertisement An example configuration is provided: config/ipv6.cfg in which port 0 / core 1 plays the role of the generator and port 1 / core 2 plays the role of the swap. Change-Id: Id0ab32d384448b4cf767fb4a1c486fc023f4f395 Signed-off-by: Xavier Simonart --- VNFs/DPPD-PROX/Makefile | 2 +- VNFs/DPPD-PROX/cmd_parser.c | 26 +- VNFs/DPPD-PROX/commands.c | 4 +- VNFs/DPPD-PROX/config/ipv6.cfg | 85 ++++++ VNFs/DPPD-PROX/defaults.c | 12 +- VNFs/DPPD-PROX/defines.h | 9 +- VNFs/DPPD-PROX/handle_gen.c | 158 ++++++++--- VNFs/DPPD-PROX/handle_master.c | 590 +++++++++++++++++++++++++++++++++------- VNFs/DPPD-PROX/handle_master.h | 91 +++++-- VNFs/DPPD-PROX/handle_routing.c | 1 - VNFs/DPPD-PROX/handle_swap.c | 70 ++++- VNFs/DPPD-PROX/ip6_addr.h | 4 +- VNFs/DPPD-PROX/lconf.c | 16 +- VNFs/DPPD-PROX/main.c | 52 ++-- VNFs/DPPD-PROX/packet_utils.c | 461 +++++++++++++++++++++++++------ VNFs/DPPD-PROX/packet_utils.h | 31 ++- VNFs/DPPD-PROX/parse_utils.c | 6 +- VNFs/DPPD-PROX/prox_args.c | 59 +++- VNFs/DPPD-PROX/prox_cfg.h | 14 +- VNFs/DPPD-PROX/prox_cksum.h | 10 +- VNFs/DPPD-PROX/prox_compat.c | 30 ++ VNFs/DPPD-PROX/prox_compat.h | 13 +- VNFs/DPPD-PROX/prox_ipv6.c | 302 ++++++++++++++++++++ VNFs/DPPD-PROX/prox_ipv6.h | 140 ++++++++++ VNFs/DPPD-PROX/prox_port_cfg.c | 2 +- VNFs/DPPD-PROX/prox_shared.c | 6 +- VNFs/DPPD-PROX/rx_pkt.c | 97 +++++-- VNFs/DPPD-PROX/rx_pkt.h | 8 +- VNFs/DPPD-PROX/task_base.h | 4 +- VNFs/DPPD-PROX/task_init.c | 23 +- VNFs/DPPD-PROX/task_init.h | 11 +- VNFs/DPPD-PROX/tx_pkt.c | 177 ++++++++---- VNFs/DPPD-PROX/tx_pkt.h | 121 +++++++- 33 files changed, 2215 insertions(+), 420 deletions(-) create mode 100644 VNFs/DPPD-PROX/config/ipv6.cfg create mode 100644 VNFs/DPPD-PROX/prox_compat.c create mode 100644 VNFs/DPPD-PROX/prox_ipv6.c create mode 100644 VNFs/DPPD-PROX/prox_ipv6.h (limited to 'VNFs/DPPD-PROX') diff --git a/VNFs/DPPD-PROX/Makefile b/VNFs/DPPD-PROX/Makefile index f8bde421..ff75c178 100644 --- a/VNFs/DPPD-PROX/Makefile +++ b/VNFs/DPPD-PROX/Makefile @@ -213,7 +213,7 @@ SRCS-y += stats_port.c stats_mempool.c stats_ring.c stats_l4gen.c SRCS-y += stats_latency.c stats_global.c stats_core.c stats_task.c stats_prio.c stats_irq.c SRCS-y += cmd_parser.c input.c prox_shared.c prox_lua_types.c SRCS-y += genl4_bundle.c heap.c genl4_stream_tcp.c genl4_stream_udp.c cdf.c -SRCS-y += stats.c stats_cons_log.c stats_cons_cli.c stats_parser.c hash_set.c prox_lua.c prox_malloc.c +SRCS-y += stats.c stats_cons_log.c stats_cons_cli.c stats_parser.c hash_set.c prox_lua.c prox_malloc.c prox_ipv6.c prox_compat.c ifeq ($(FIRST_PROX_MAKE),) MAKEFLAGS += --no-print-directory diff --git a/VNFs/DPPD-PROX/cmd_parser.c b/VNFs/DPPD-PROX/cmd_parser.c index 8c72f7bf..2d3b5704 100644 --- a/VNFs/DPPD-PROX/cmd_parser.c +++ b/VNFs/DPPD-PROX/cmd_parser.c @@ -1,5 +1,5 @@ /* -// Copyright (c) 2010-2019 Intel Corporation +// Copyright (c) 2010-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -384,7 +384,7 @@ static int parse_cmd_count(const char *str, struct input *input) if (cores_task_are_valid(lcores, task_id, nb_cores)) { for (unsigned int i = 0; i < nb_cores; i++) { lcore_id = lcores[i]; - if ((!task_is_mode_and_submode(lcore_id, task_id, "gen", "")) && (!task_is_mode_and_submode(lcore_id, task_id, "gen", "l3"))) { + if (!task_is_mode(lcore_id, task_id, "gen")) { plog_err("Core %u task %u is not generating packets\n", lcore_id, task_id); } else { @@ -413,7 +413,7 @@ static int parse_cmd_set_probability(const char *str, struct input *input) if (cores_task_are_valid(lcores, task_id, nb_cores)) { for (unsigned int i = 0; i < nb_cores; i++) { lcore_id = lcores[i]; - if ((!task_is_mode_and_submode(lcore_id, task_id, "impair", "")) && (!task_is_mode_and_submode(lcore_id, task_id, "impair", "l3"))){ + if (!task_is_mode(lcore_id, task_id, "impair")) { plog_err("Core %u task %u is not impairing packets\n", lcore_id, task_id); } else { struct task_base *tbase = lcore_cfg[lcore_id].tasks_all[task_id]; @@ -438,7 +438,7 @@ static int parse_cmd_delay_us(const char *str, struct input *input) if (cores_task_are_valid(lcores, task_id, nb_cores)) { for (unsigned int i = 0; i < nb_cores; i++) { lcore_id = lcores[i]; - if ((!task_is_mode_and_submode(lcore_id, task_id, "impair", "")) && (!task_is_mode_and_submode(lcore_id, task_id, "impair", "l3"))){ + if (!task_is_mode(lcore_id, task_id, "impair")) { plog_err("Core %u task %u is not impairing packets\n", lcore_id, task_id); } else { struct task_base *tbase = lcore_cfg[lcore_id].tasks_all[task_id]; @@ -463,7 +463,7 @@ static int parse_cmd_random_delay_us(const char *str, struct input *input) if (cores_task_are_valid(lcores, task_id, nb_cores)) { for (unsigned int i = 0; i < nb_cores; i++) { lcore_id = lcores[i]; - if ((!task_is_mode_and_submode(lcore_id, task_id, "impair", "")) && (!task_is_mode_and_submode(lcore_id, task_id, "impair", "l3"))){ + if (!task_is_mode(lcore_id, task_id, "impair")) { plog_err("Core %u task %u is not impairing packets\n", lcore_id, task_id); } else { struct task_base *tbase = lcore_cfg[lcore_id].tasks_all[task_id]; @@ -525,7 +525,7 @@ static int parse_cmd_pkt_size(const char *str, struct input *input) if (cores_task_are_valid(lcores, task_id, nb_cores)) { for (unsigned int i = 0; i < nb_cores; i++) { lcore_id = lcores[i]; - if ((!task_is_mode_and_submode(lcore_id, task_id, "gen", "")) && (!task_is_mode_and_submode(lcore_id, task_id, "gen", "l3"))) { + if (!task_is_mode(lcore_id, task_id, "gen")) { plog_err("Core %u task %u is not generating packets\n", lcore_id, task_id); } else { struct task_base *tbase = lcore_cfg[lcore_id].tasks_all[task_id]; @@ -597,7 +597,7 @@ static int parse_cmd_speed(const char *str, struct input *input) for (i = 0; i < nb_cores; i++) { lcore_id = lcores[i]; - if ((!task_is_mode_and_submode(lcore_id, task_id, "gen", "")) && (!task_is_mode_and_submode(lcore_id, task_id, "gen", "l3"))) { + if (!task_is_mode(lcore_id, task_id, "gen")) { plog_err("Core %u task %u is not generating packets\n", lcore_id, task_id); } else if (speed > 1000.0f || speed < 0.0f) { // Up to 100 Gbps @@ -631,7 +631,7 @@ static int parse_cmd_speed_byte(const char *str, struct input *input) for (unsigned int i = 0; i < nb_cores; i++) { lcore_id = lcores[i]; - if ((!task_is_mode_and_submode(lcore_id, task_id, "gen", "")) && (!task_is_mode_and_submode(lcore_id, task_id, "gen", "l3"))) { + if (!task_is_mode(lcore_id, task_id, "gen")) { plog_err("Core %u task %u is not generating packets\n", lcore_id, task_id); } else if (bps > 12500000000) { // Up to 100Gbps @@ -657,7 +657,7 @@ static int parse_cmd_reset_randoms_all(const char *str, struct input *input) unsigned task_id, lcore_id = -1; while (prox_core_next(&lcore_id, 0) == 0) { for (task_id = 0; task_id < lcore_cfg[lcore_id].n_tasks_all; task_id++) { - if ((task_is_mode_and_submode(lcore_id, task_id, "gen", "")) || (task_is_mode_and_submode(lcore_id, task_id, "gen", "l3"))) { + if (!task_is_mode(lcore_id, task_id, "gen")) { struct task_base *tbase = lcore_cfg[lcore_id].tasks_all[task_id]; uint32_t n_rands = task_gen_get_n_randoms(tbase); @@ -678,7 +678,7 @@ static int parse_cmd_reset_values_all(const char *str, struct input *input) unsigned task_id, lcore_id = -1; while (prox_core_next(&lcore_id, 0) == 0) { for (task_id = 0; task_id < lcore_cfg[lcore_id].n_tasks_all; task_id++) { - if ((task_is_mode_and_submode(lcore_id, task_id, "gen", "")) || (task_is_mode_and_submode(lcore_id, task_id, "gen", "l3"))) { + if (!task_is_mode(lcore_id, task_id, "gen")) { struct task_base *tbase = lcore_cfg[lcore_id].tasks_all[task_id]; plog_info("Resetting values on core %d task %d\n", lcore_id, task_id); @@ -699,7 +699,7 @@ static int parse_cmd_reset_values(const char *str, struct input *input) if (cores_task_are_valid(lcores, task_id, nb_cores)) { for (unsigned int i = 0; i < nb_cores; i++) { lcore_id = lcores[i]; - if ((!task_is_mode_and_submode(lcore_id, task_id, "gen", "")) && (!task_is_mode_and_submode(lcore_id, task_id, "gen", "l3"))) { + if (!task_is_mode(lcore_id, task_id, "gen")) { plog_err("Core %u task %u is not generating packets\n", lcore_id, task_id); } else { @@ -730,7 +730,7 @@ static int parse_cmd_set_value(const char *str, struct input *input) if (cores_task_are_valid(lcores, task_id, nb_cores)) { for (unsigned int i = 0; i < nb_cores; i++) { lcore_id = lcores[i]; - if ((!task_is_mode_and_submode(lcore_id, task_id, "gen", "")) && (!task_is_mode_and_submode(lcore_id, task_id, "gen", "l3"))) { + if (!task_is_mode(lcore_id, task_id, "gen")) { plog_err("Core %u task %u is not generating packets\n", lcore_id, task_id); } // do not check offset here - gen knows better than us the maximum frame size @@ -769,7 +769,7 @@ static int parse_cmd_set_random(const char *str, struct input *input) if (cores_task_are_valid(lcores, task_id, nb_cores)) { for (unsigned int i = 0; i < nb_cores; i++) { lcore_id = lcores[i]; - if ((!task_is_mode_and_submode(lcore_id, task_id, "gen", "")) && (!task_is_mode_and_submode(lcore_id, task_id, "gen", "l3"))) { + if (!task_is_mode(lcore_id, task_id, "gen")) { plog_err("Core %u task %u is not generating packets\n", lcore_id, task_id); } else if (offset > PROX_RTE_ETHER_MAX_LEN) { diff --git a/VNFs/DPPD-PROX/commands.c b/VNFs/DPPD-PROX/commands.c index df5e69cd..32b974cb 100644 --- a/VNFs/DPPD-PROX/commands.c +++ b/VNFs/DPPD-PROX/commands.c @@ -1,5 +1,5 @@ /* -// Copyright (c) 2010-2017 Intel Corporation +// Copyright (c) 2010-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -114,7 +114,7 @@ static inline void start_l3(struct task_args *targ) { if (!task_is_master(targ)) { if ((targ->nb_txrings != 0) || (targ->nb_txports != 0)) { - if (targ->flags & TASK_ARG_L3) + if (targ->flags & (TASK_ARG_L3|TASK_ARG_NDP)) task_start_l3(targ->tbase, targ); } } diff --git a/VNFs/DPPD-PROX/config/ipv6.cfg b/VNFs/DPPD-PROX/config/ipv6.cfg new file mode 100644 index 00000000..6ad4725a --- /dev/null +++ b/VNFs/DPPD-PROX/config/ipv6.cfg @@ -0,0 +1,85 @@ +;; +;; Copyright (c) 2020 Intel Corporation +;; +;; Licensed under the Apache License, Version 2.0 (the "License"); +;; you may not use this file except in compliance with the License. +;; You may obtain a copy of the License at +;; +;; http://www.apache.org/licenses/LICENSE-2.0 +;; +;; Unless required by applicable law or agreed to in writing, software +;; distributed under the License is distributed on an "AS IS" BASIS, +;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +;; See the License for the specific language governing permissions and +;; limitations under the License. +;; + +[eal options] +-n=6 ; force number of memory channels +no-output=no ; disable DPDK debug output + +[port 0] +name=p0 + +[port 2] +name=p1 + +[defaults] +mempool size=8K + +[global] +start time=5 +name=Basic IPv6 + +[variables] +$loc_gen_hex_ip1=fe 80 00 00 00 00 00 00 00 00 00 00 00 00 00 01 +$loc_swp_hex_ip1=fe 80 00 00 00 00 00 00 00 00 00 00 00 00 00 02 +$glob_gen_hex_ip1=20 01 db 80 00 00 00 00 00 00 00 00 00 00 00 01 +$glob_swp_hex_ip1=20 01 db 80 00 00 00 00 00 00 00 00 00 00 00 02 +$loc_gen_ip1=fe80::0001 +$glob_gen_ip1=2001:db80::0001 +$loc_swp_ip1=fe80::0002 +$glob_swp_ip1=2001:db80::0002 + +[core 0s0] +mode=master + +; 84 bytes packet to include latency related data. +; for 64 bytes packets, comment out lat pos, packet id pos, signature pos, accuracy pos; set pkt size to 60 and +; set payload & udp length to 8 (bytes 19 and 59 changed from 1a to 08) +[core 1s0] +name=TX0 +task=0 +mode=gen +sub mode=ndp +local ipv6=${loc_gen_ip1} +global ipv6=${glob_gen_ip1} +tx port=p0 +bps=1000 +pkt inline=00 00 01 00 00 01 00 00 02 00 00 02 86 dd 60 00 00 00 00 1a 11 40 ${glob_gen_hex_ip1} ${glob_swp_hex_ip1} 13 88 13 88 00 1a 55 7b 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +lat pos=62 +packet id pos=66 +signature pos=72 +accuracy pos=76 +pkt size=80 + +task=1 +mode=lat +sub mode=ndp +rx port=p0 +local ipv6=${loc_gen_ip1} +global ipv6=${glob_gen_ip1} +lat pos=62 +packet id pos=66 +signature pos=72 +accuracy pos=76 + +[core 2s0] +name=SWAP +task=0 +mode=swap +sub mode=ndp +rx port=p1 +tx port=p1 +local ipv6=${loc_swp_ip1} +global ipv6=${glob_swp_ip1} diff --git a/VNFs/DPPD-PROX/defaults.c b/VNFs/DPPD-PROX/defaults.c index bb359cfb..ac611d0c 100644 --- a/VNFs/DPPD-PROX/defaults.c +++ b/VNFs/DPPD-PROX/defaults.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "lconf.h" @@ -28,6 +29,7 @@ #include "toeplitz.h" #include "handle_master.h" #include "prox_compat.h" +#include "prox_ipv6.h" #define TEN_GIGABIT 1250000000 #define QUEUE_SIZES 128 @@ -115,8 +117,16 @@ static struct rte_sched_subport_params subport_params_default = { #endif }; -void set_global_defaults(__attribute__((unused)) struct prox_cfg *prox_cfg) +void set_global_defaults(struct prox_cfg *prox_cfg) { + if (parse_ip6(&prox_cfg->all_routers_ipv6_mcast_addr, ALL_ROUTERS_IPV6_MCAST_ADDR) != 0) + plog_err("Failed to parse %s\n", ALL_ROUTERS_IPV6_MCAST_ADDR); + if (parse_ip6(&prox_cfg->all_nodes_ipv6_mcast_addr, ALL_NODES_IPV6_MCAST_ADDR) != 0) + plog_err("Failed to parse %s\n", ALL_NODES_IPV6_MCAST_ADDR); + if (parse_ip6(&prox_cfg->random_ip, RANDOM_IPV6) != 0) + plog_err("Failed to parse %s\n", RANDOM_IPV6); + set_mcast_mac_from_ipv6(&prox_cfg->all_routers_mac_addr, &prox_cfg->all_routers_ipv6_mcast_addr); + set_mcast_mac_from_ipv6(&prox_cfg->all_nodes_mac_addr, &prox_cfg->all_nodes_ipv6_mcast_addr); } void set_task_defaults(struct prox_cfg* prox_cfg, struct lcore_cfg* lcore_cfg_init) diff --git a/VNFs/DPPD-PROX/defines.h b/VNFs/DPPD-PROX/defines.h index c2309be1..3be1a963 100644 --- a/VNFs/DPPD-PROX/defines.h +++ b/VNFs/DPPD-PROX/defines.h @@ -1,5 +1,5 @@ /* -// Copyright (c) 2010-2017 Intel Corporation +// Copyright (c) 2010-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -42,6 +42,13 @@ addr[12], addr[13], addr[14], addr[15] #endif +#ifndef IPv6_PREFIX +#define IPv6_PREFIX_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x" +#define IPv6_PREFIX(addr) \ + addr[0], addr[1], addr[2], addr[3], \ + addr[4], addr[5], addr[6], addr[7] +#endif + #ifndef MAC_BYTES #define MAC_BYTES_FMT "%02x:%02x:%02x:%02x:%02x:%02x" diff --git a/VNFs/DPPD-PROX/handle_gen.c b/VNFs/DPPD-PROX/handle_gen.c index 1546dce7..ac75f221 100644 --- a/VNFs/DPPD-PROX/handle_gen.c +++ b/VNFs/DPPD-PROX/handle_gen.c @@ -1,5 +1,5 @@ /* -// Copyright (c) 2010-2017 Intel Corporation +// Copyright (c) 2010-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -47,6 +47,8 @@ #include "arp.h" #include "tx_pkt.h" #include "handle_master.h" +#include "defines.h" +#include "prox_ipv6.h" struct pkt_template { uint16_t len; @@ -172,11 +174,11 @@ static void parse_l2_l3_len(uint8_t *pkt, uint16_t *l2_len, uint16_t *l3_len, ui case ETYPE_MPLSM: *l2_len +=4; break; + case ETYPE_IPv6: case ETYPE_IPv4: break; case ETYPE_EoGRE: case ETYPE_ARP: - case ETYPE_IPv6: *l2_len = 0; break; default: @@ -187,7 +189,8 @@ static void parse_l2_l3_len(uint8_t *pkt, uint16_t *l2_len, uint16_t *l3_len, ui if (*l2_len) { prox_rte_ipv4_hdr *ip = (prox_rte_ipv4_hdr *)(pkt + *l2_len); - *l3_len = ipv4_get_hdr_len(ip); + if (ip->version_ihl >> 4 == 4) + *l3_len = ipv4_get_hdr_len(ip); } } @@ -196,9 +199,20 @@ static void checksum_packet(uint8_t *hdr, struct rte_mbuf *mbuf, struct pkt_temp uint16_t l2_len = pkt_template->l2_len; uint16_t l3_len = pkt_template->l3_len; - if (l2_len) { - prox_rte_ipv4_hdr *ip = (prox_rte_ipv4_hdr*)(hdr + l2_len); + prox_rte_ipv4_hdr *ip = (prox_rte_ipv4_hdr*)(hdr + l2_len); + if (l3_len) { prox_ip_udp_cksum(mbuf, ip, l2_len, l3_len, cksum_offload); + } else if (ip->version_ihl >> 4 == 6) { + prox_rte_ipv6_hdr *ip6 = (prox_rte_ipv6_hdr *)(hdr + l2_len); + if (ip6->proto == IPPROTO_UDP) { + prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(ip6 + 1); + udp->dgram_cksum = 0; + udp->dgram_cksum = rte_ipv6_udptcp_cksum(ip6, udp); + } else if (ip6->proto == IPPROTO_TCP) { + prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(ip6 + 1); + tcp->cksum = 0; + tcp->cksum = rte_ipv6_udptcp_cksum(ip6, tcp); + } } } @@ -758,23 +772,45 @@ static inline void build_value(struct task_gen *task, uint32_t mask, int bit_pos register_ip_to_ctrl_plane(tbase->l3.tmaster, rte_cpu_to_be_32(val | fixed_bits), tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id); } } + +static inline void build_value_ipv6(struct task_gen *task, uint32_t mask, int var_bit_pos, int init_var_bit_pos, struct ipv6_addr val, struct ipv6_addr fixed_bits) +{ + struct task_base *tbase = (struct task_base *)task; + if (var_bit_pos < 32) { + build_value_ipv6(task, mask >> 1, var_bit_pos + 1, init_var_bit_pos, val, fixed_bits); + if (mask & 1) { + int byte_pos = (var_bit_pos + init_var_bit_pos) / 8; + int bit_pos = (var_bit_pos + init_var_bit_pos) % 8; + val.bytes[byte_pos] = val.bytes[byte_pos] | (1 << bit_pos); + build_value_ipv6(task, mask >> 1, var_bit_pos + 1, init_var_bit_pos, val, fixed_bits); + } + } else { + for (uint i = 0; i < sizeof(struct ipv6_addr) / 8; i++) + val.bytes[i] = val.bytes[i] | fixed_bits.bytes[i]; + register_node_to_ctrl_plane(tbase->l3.tmaster, &null_addr, &val, tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id); + } +} + static inline void register_all_ip_to_ctrl_plane(struct task_gen *task) { struct task_base *tbase = (struct task_base *)task; int i, len, fixed; unsigned int offset; - uint32_t mask; + uint32_t mask, ip_len; + struct ipv6_addr *ip6_src = NULL; + uint32_t *ip_src; for (uint32_t i = 0; i < task->n_pkts; ++i) { struct pkt_template *pktpl = &task->pkt_template[i]; unsigned int ip_src_pos = 0; - int maybe_ipv4 = 0; + int ipv4 = 0; unsigned int l2_len = sizeof(prox_rte_ether_hdr); uint8_t *pkt = pktpl->buf; prox_rte_ether_hdr *eth_hdr = (prox_rte_ether_hdr*)pkt; uint16_t ether_type = eth_hdr->ether_type; prox_rte_vlan_hdr *vlan_hdr; + prox_rte_ipv4_hdr *ip; // Unstack VLAN tags while (((ether_type == ETYPE_8021ad) || (ether_type == ETYPE_VLAN)) && (l2_len + sizeof(prox_rte_vlan_hdr) < pktpl->len)) { @@ -784,19 +820,38 @@ static inline void register_all_ip_to_ctrl_plane(struct task_gen *task) } if ((ether_type == ETYPE_MPLSU) || (ether_type == ETYPE_MPLSM)) { l2_len +=4; - maybe_ipv4 = 1; - } - if ((ether_type != ETYPE_IPv4) && !maybe_ipv4) + ip = (prox_rte_ipv4_hdr *)(pkt + l2_len); + if (ip->version_ihl >> 4 == 4) + ipv4 = 1; + else if (ip->version_ihl >> 4 != 6) // Version field at same location for IPv4 and IPv6 + continue; + } else if (ether_type == ETYPE_IPv4) { + ip = (prox_rte_ipv4_hdr *)(pkt + l2_len); + PROX_PANIC(ip->version_ihl >> 4 != 4, "IPv4 ether_type but IP version = %d != 4", ip->version_ihl >> 4); // Invalid Packet + ipv4 = 1; + } else if (ether_type == ETYPE_IPv6) { + ip = (prox_rte_ipv4_hdr *)(pkt + l2_len); + PROX_PANIC(ip->version_ihl >> 4 != 6, "IPv6 ether_type but IP version = %d != 6", ip->version_ihl >> 4); // Invalid Packet + } else { continue; + } - prox_rte_ipv4_hdr *ip = (prox_rte_ipv4_hdr *)(pkt + l2_len); - PROX_PANIC(ip->version_ihl >> 4 != 4, "IPv4 ether_type but IP version = %d != 4", ip->version_ihl >> 4); - - // Even if IPv4 header contains options, options are after ip src and dst - ip_src_pos = l2_len + sizeof(prox_rte_ipv4_hdr) - 2 * sizeof(uint32_t); - uint32_t *ip_src = ((uint32_t *)(pktpl->buf + ip_src_pos)); - plog_info("\tip_src_pos = %d, ip_src = %x\n", ip_src_pos, *ip_src); - register_ip_to_ctrl_plane(tbase->l3.tmaster, *ip_src, tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id); + PROX_PANIC(ipv4 && ((prox_cfg.flags & DSF_L3_ENABLED) == 0), "Trying to generate an IPv4 packet in NDP mode => not supported\n"); + PROX_PANIC((ipv4 == 0) && ((prox_cfg.flags & DSF_NDP_ENABLED) == 0), "Trying to generate an IPv6 packet in L3 (IPv4) mode => not supported\n"); + if (ipv4) { + // Even if IPv4 header contains options, options are after ip src and dst + ip_src_pos = l2_len + sizeof(prox_rte_ipv4_hdr) - 2 * sizeof(uint32_t); + ip_src = ((uint32_t *)(pktpl->buf + ip_src_pos)); + plog_info("\tip_src_pos = %d, ip_src = %x\n", ip_src_pos, *ip_src); + register_ip_to_ctrl_plane(tbase->l3.tmaster, *ip_src, tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id); + ip_len = sizeof(uint32_t); + } else { + ip_src_pos = l2_len + sizeof(prox_rte_ipv6_hdr) - 2 * sizeof(struct ipv6_addr); + ip6_src = ((struct ipv6_addr *)(pktpl->buf + ip_src_pos)); + plog_info("\tip_src_pos = %d, ip6_src = "IPv6_BYTES_FMT"\n", ip_src_pos, IPv6_BYTES(ip6_src->bytes)); + register_node_to_ctrl_plane(tbase->l3.tmaster, ip6_src, &null_addr, tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id); + ip_len = sizeof(struct ipv6_addr); + } for (int j = 0; j < task->n_rands; j++) { offset = task->rand[j].rand_offset; @@ -804,7 +859,12 @@ static inline void register_all_ip_to_ctrl_plane(struct task_gen *task) mask = task->rand[j].rand_mask; fixed = task->rand[j].fixed_bits; plog_info("offset = %d, len = %d, mask = %x, fixed = %x\n", offset, len, mask, fixed); - if ((offset < ip_src_pos + 4) && (offset + len >= ip_src_pos)) { + if (offset >= ip_src_pos + ip_len) // First random bit after IP + continue; + if (offset + len < ip_src_pos) // Last random bit before IP + continue; + + if (ipv4) { if (offset >= ip_src_pos) { int32_t ip_src_mask = (1 << (4 + ip_src_pos - offset) * 8) - 1; mask = mask & ip_src_mask; @@ -816,6 +876,28 @@ static inline void register_all_ip_to_ctrl_plane(struct task_gen *task) fixed = (fixed << bits) | (rte_be_to_cpu_32(*ip_src) & ((1 << bits) - 1)); build_value(task, mask, 0, 0, fixed); } + } else { + // We do not support when random partially covers IP - either starting before or finishing after + if (offset + len >= ip_src_pos + ip_len) { // len over the ip + plog_err("Not supported: random_offset = %d, random_len = %d, ip_src_pos = %d, ip_len = %d\n", offset, len, ip_src_pos, ip_len); + continue; + } + if (offset < ip_src_pos) { + plog_err("Not supported: random_offset = %d, random_len = %d, ip_src_pos = %d, ip_len = %d\n", offset, len, ip_src_pos, ip_len); + continue; + } + // Even for IPv6 the random mask supported by PROX are 32 bits only + struct ipv6_addr fixed_ipv6; + uint init_var_byte_pos = (offset - ip_src_pos); + for (uint i = 0; i < sizeof(struct ipv6_addr); i++) { + if (i < init_var_byte_pos) + fixed_ipv6.bytes[i] = ip6_src->bytes[i]; + else if (i < init_var_byte_pos + len) + fixed_ipv6.bytes[i] = (fixed >> (i - init_var_byte_pos)) & 0xFF; + else + fixed_ipv6.bytes[i] = ip6_src->bytes[i]; + } + build_value_ipv6(task, mask, 0, init_var_byte_pos * 8, null_addr, fixed_ipv6); } } } @@ -999,17 +1081,29 @@ static void task_gen_pkt_template_recalc_checksum(struct task_gen *task) if (template->l2_len == 0) continue; ip = (prox_rte_ipv4_hdr *)(template->buf + template->l2_len); - - ip->hdr_checksum = 0; - prox_ip_cksum_sw(ip); - uint32_t l4_len = rte_bswap16(ip->total_length) - template->l3_len; - - if (ip->next_proto_id == IPPROTO_UDP) { - prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(((uint8_t *)ip) + template->l3_len); - prox_udp_cksum_sw(udp, l4_len, ip->src_addr, ip->dst_addr); - } else if (ip->next_proto_id == IPPROTO_TCP) { - prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(((uint8_t *)ip) + template->l3_len); - prox_tcp_cksum_sw(tcp, l4_len, ip->src_addr, ip->dst_addr); + if (ip->version_ihl >> 4 == 4) { + ip->hdr_checksum = 0; + prox_ip_cksum_sw(ip); + uint32_t l4_len = rte_bswap16(ip->total_length) - template->l3_len; + if (ip->next_proto_id == IPPROTO_UDP) { + prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(((uint8_t *)ip) + template->l3_len); + prox_udp_cksum_sw(udp, l4_len, ip->src_addr, ip->dst_addr); + } else if (ip->next_proto_id == IPPROTO_TCP) { + prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(((uint8_t *)ip) + template->l3_len); + prox_tcp_cksum_sw(tcp, l4_len, ip->src_addr, ip->dst_addr); + } + } else if (ip->version_ihl >> 4 == 6) { + prox_rte_ipv6_hdr *ip6; + ip6 = (prox_rte_ipv6_hdr *)(template->buf + template->l2_len); + if (ip6->proto == IPPROTO_UDP) { + prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(ip6 + 1); + udp->dgram_cksum = 0; + udp->dgram_cksum = rte_ipv6_udptcp_cksum(ip6, udp); + } else if (ip6->proto == IPPROTO_TCP) { + prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(ip6 + 1); + tcp->cksum = 0; + tcp->cksum = rte_ipv6_udptcp_cksum(ip6, tcp); + } } /* The current implementation avoids checksum @@ -1130,7 +1224,7 @@ static struct rte_mempool *task_gen_create_mempool(struct task_args *targ, uint1 uint32_t mbuf_size = TX_MBUF_SIZE; if (max_frame_size + (unsigned)sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM > mbuf_size) mbuf_size = max_frame_size + (unsigned)sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM; - plog_info("\t\tCreating mempool with name '%s'\n", name); + plog_info("\tCreating mempool with name '%s'\n", name); ret = rte_mempool_create(name, targ->nb_mbuf - 1, mbuf_size, targ->nb_cache_mbuf, sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, 0, @@ -1138,7 +1232,7 @@ static struct rte_mempool *task_gen_create_mempool(struct task_args *targ, uint1 PROX_PANIC(ret == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n", sock_id, targ->nb_mbuf - 1); - plog_info("\t\tMempool %p size = %u * %u cache %u, socket %d\n", ret, + plog_info("\tMempool %p size = %u * %u cache %u, socket %d\n", ret, targ->nb_mbuf - 1, mbuf_size, targ->nb_cache_mbuf, sock_id); return ret; diff --git a/VNFs/DPPD-PROX/handle_master.c b/VNFs/DPPD-PROX/handle_master.c index ce5c0bc5..a528a681 100644 --- a/VNFs/DPPD-PROX/handle_master.c +++ b/VNFs/DPPD-PROX/handle_master.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include @@ -44,6 +43,8 @@ #include "input.h" #include "tx_pkt.h" #include "defines.h" +#include "prox_ipv6.h" +#include "packet_utils.h" #define PROX_MAX_ARP_REQUESTS 32 // Maximum number of tasks requesting the same MAC address #define NETLINK_BUF_SIZE 16384 @@ -51,18 +52,24 @@ static char netlink_buf[NETLINK_BUF_SIZE]; const char *actions_string[] = { - "UPDATE_FROM_CTRL", // Controlplane sending a MAC update to dataplane - "ROUTE_ADD_FROM_CTRL", // Controlplane sending a new route to dataplane - "ROUTE_DEL_FROM_CTRL", // Controlplane deleting a new route from dataplane - "SEND_ARP_REQUEST_FROM_CTRL", // Controlplane requesting dataplane to send ARP request - "SEND_ARP_REPLY_FROM_CTRL", // Controlplane requesting dataplane to send ARP reply - "SEND_ICMP_FROM_CTRL", // Controlplane requesting dataplane to send ICMP message - "SEND_BGP_FROM_CTRL", // Controlplane requesting dataplane to send BGP message - "ARP_TO_CTRL", // ARP sent by datplane to Controlpane for handling - "ICMP_TO_CTRL", // ICMP sent by datplane to Controlpane for handling - "BGP_TO_CTRL", // BGP sent by datplane to Controlpane for handling - "REQ_MAC_TO_CTRL", // Dataplane requesting MAC resolution to Controlplane + "MAC_INFO_FROM_MASTER", // Controlplane sending a MAC update to dataplane + "MAC_INFO_FROM_MASTER_FOR_IPV6",// Controlplane sending a MAC update to dataplane + "IPV6_INFO_FROM_MASTER", // Controlplane IPv6 Global IP info to dataplane + "ROUTE_ADD_FROM_MASTER", // Controlplane sending a new route to dataplane + "ROUTE_DEL_FROM_MASTER", // Controlplane deleting a new route from dataplane + "SEND_ARP_REQUEST_FROM_MASTER", // Controlplane requesting dataplane to send ARP request + "SEND_ARP_REPLY_FROM_MASTER", // Controlplane requesting dataplane to send ARP reply + "SEND_NDP_FROM_MASTER", // Controlplane requesting dataplane to send NDP + "SEND_ICMP_FROM_MASTER", // Controlplane requesting dataplane to send ICMP message + "SEND_BGP_FROM_MASTER", // Controlplane requesting dataplane to send BGP message + "ARP_PKT_FROM_NET_TO_MASTER", // ARP sent by datplane to Controlpane for handling + "NDP_PKT_FROM_NET_TO_MASTER," // NDP sent by datplane to Controlpane for handling + "ICMP_TO_MASTER", // ICMP sent by datplane to Controlpane for handling + "BGP_TO_MASTER" // BGP sent by datplane to Controlpane for handling + "IP4_REQ_MAC_TO_MASTER", // Dataplane requesting MAC resolution to Controlplane + "IP6_REQ_MAC_TO_MASTER", // Dataplane requesting MAC resolution to Controlplane "PKT_FROM_TAP" // Packet received by Controlplane from kernel and forwarded to dataplane for sending + }; static struct my_arp_t arp_reply = { @@ -80,68 +87,64 @@ static struct my_arp_t arp_request = { .oper = 0x100 }; -struct ip_table { - prox_rte_ether_addr mac; - struct rte_ring *ring; -}; - -struct external_ip_table { - prox_rte_ether_addr mac; - struct rte_ring *rings[PROX_MAX_ARP_REQUESTS]; - uint16_t nb_requests; -}; - -struct port_table { - prox_rte_ether_addr mac; - struct rte_ring *ring; - uint32_t ip; - uint8_t port; - uint8_t flags; - uint64_t last_echo_req_rcvd_tsc; - uint64_t last_echo_rep_rcvd_tsc; - uint32_t n_echo_req; - uint32_t n_echo_rep; -}; - -struct task_master { - struct task_base base; - struct rte_ring *ctrl_rx_ring; - struct rte_ring **ctrl_tx_rings; - struct ip_table *internal_ip_table; - struct external_ip_table *external_ip_table; - struct rte_hash *external_ip_hash; - struct rte_hash *internal_ip_hash; - struct port_table internal_port_table[PROX_MAX_PORTS]; - struct vdev all_vdev[PROX_MAX_PORTS]; - int max_vdev_id; - struct pollfd arp_fds; - struct pollfd route_fds; -}; - struct ip_port { uint32_t ip; uint8_t port; } __attribute__((packed)); -static inline uint8_t get_command(struct rte_mbuf *mbuf) -{ - return mbuf->udata64 & 0xFF; -} -static inline uint8_t get_task(struct rte_mbuf *mbuf) -{ - return (mbuf->udata64 >> 8) & 0xFF; -} -static inline uint8_t get_core(struct rte_mbuf *mbuf) -{ - return (mbuf->udata64 >> 16) & 0xFF; -} -static inline uint8_t get_port(struct rte_mbuf *mbuf) +struct ip6_port { + struct ipv6_addr ip6; + uint8_t port; +} __attribute__((packed)); + +void register_router_to_ctrl_plane(struct task_base *tbase, uint8_t port_id, uint8_t core_id, uint8_t task_id, struct ipv6_addr *local_ipv6_addr, struct ipv6_addr *global_ipv6_addr, struct ipv6_addr *router_prefix) { - return mbuf->port; + struct task_master *task = (struct task_master *)tbase; + task->internal_port_table[port_id].flags |= IPV6_ROUTER; + memcpy(&task->internal_port_table[port_id].router_prefix, router_prefix, sizeof(struct ipv6_addr)); + register_node_to_ctrl_plane(tbase, local_ipv6_addr, global_ipv6_addr, port_id, core_id, task_id); } -static inline uint32_t get_ip(struct rte_mbuf *mbuf) + +void register_node_to_ctrl_plane(struct task_base *tbase, struct ipv6_addr *local_ipv6_addr, struct ipv6_addr *global_ipv6_addr, uint8_t port_id, uint8_t core_id, uint8_t task_id) { - return (mbuf->udata64 >> 32) & 0xFFFFFFFF; + struct task_master *task = (struct task_master *)tbase; + if (task->internal_port_table[port_id].flags & IPV6_ROUTER) + plogx_dbg("\tregistering router with port %d core %d and task %d\n", port_id, core_id, task_id); + else + plogx_dbg("\tregistering node with port %d core %d and task %d\n", port_id, core_id, task_id); + + if (port_id >= PROX_MAX_PORTS) { + plog_err("Unable to register router, port %d\n", port_id); + return; + } + task->internal_port_table[port_id].ring = task->ctrl_tx_rings[core_id * MAX_TASKS_PER_CORE + task_id]; + memcpy(&task->internal_port_table[port_id].mac, &prox_port_cfg[port_id].eth_addr, sizeof(prox_rte_ether_addr)); + memcpy(&task->internal_port_table[port_id].local_ipv6_addr, local_ipv6_addr, sizeof(struct ipv6_addr)); + if (memcmp(local_ipv6_addr, &prox_cfg.random_ip, sizeof(struct ipv6_addr)) == 0) { + task->internal_port_table[port_id].flags |= HANDLE_RANDOM_LOCAL_IP_FLAG; + return; + } + memcpy(&task->internal_port_table[port_id].global_ipv6_addr, global_ipv6_addr, sizeof(struct ipv6_addr)); + if (memcmp(global_ipv6_addr, &prox_cfg.random_ip, sizeof(struct ipv6_addr)) == 0) { + task->internal_port_table[port_id].flags |= HANDLE_RANDOM_GLOBAL_IP_FLAG; + return; + } + struct ip6_port key; + memcpy(&key.ip6, local_ipv6_addr, sizeof(struct ipv6_addr)); + key.port = port_id; + int ret = rte_hash_add_key(task->internal_ip6_hash, (const void *)&key); + if (unlikely(ret < 0)) { + plog_err("Unable to register ip "IPv6_BYTES_FMT"\n", IPv6_BYTES(local_ipv6_addr->bytes)); + return; + } + memcpy(&key.ip6, global_ipv6_addr, sizeof(struct ipv6_addr)); + ret = rte_hash_add_key(task->internal_ip6_hash, (const void *)&key); + if (unlikely(ret < 0)) { + plog_err("Unable to register ip "IPv6_BYTES_FMT"\n", IPv6_BYTES(global_ipv6_addr->bytes)); + return; + } + memcpy(&task->internal_ip6_table[ret].mac, &prox_port_cfg[port_id].eth_addr, sizeof(prox_rte_ether_addr)); + task->internal_ip6_table[ret].ring = task->ctrl_tx_rings[core_id * MAX_TASKS_PER_CORE + task_id]; } void master_init_vdev(struct task_base *tbase, uint8_t port_id, uint8_t core_id, uint8_t task_id) @@ -188,7 +191,7 @@ void register_ip_to_ctrl_plane(struct task_base *tbase, uint32_t ip, uint8_t por return; } - /* TODO - stoe multiple rings if multiple cores able to handle IP + /* TODO - store multiple rings if multiple cores able to handle IP Remove them when such cores are stopped and de-register IP */ task->internal_port_table[port_id].ring = task->ctrl_tx_rings[core_id * MAX_TASKS_PER_CORE + task_id]; @@ -209,7 +212,6 @@ void register_ip_to_ctrl_plane(struct task_base *tbase, uint32_t ip, uint8_t por } memcpy(&task->internal_ip_table[ret].mac, &prox_port_cfg[port_id].eth_addr, sizeof(prox_rte_ether_addr)); task->internal_ip_table[ret].ring = task->ctrl_tx_rings[core_id * MAX_TASKS_PER_CORE + task_id]; - } static inline void handle_arp_reply(struct task_base *tbase, struct rte_mbuf *mbuf) @@ -232,7 +234,7 @@ static inline void handle_arp_reply(struct task_base *tbase, struct rte_mbuf *mb rte_mbuf_refcnt_set(mbuf, nb_requests); for (int i = 0; i < nb_requests; i++) { struct rte_ring *ring = task->external_ip_table[ret].rings[i]; - tx_ring_ip(tbase, ring, UPDATE_FROM_CTRL, mbuf, key); + tx_ring_ip(tbase, ring, MAC_INFO_FROM_MASTER, mbuf, key); } task->external_ip_table[ret].nb_requests = 0; } else { @@ -258,7 +260,7 @@ static inline void handle_arp_request(struct task_base *tbase, struct rte_mbuf * create_mac(hdr_arp, &mac); mbuf->ol_flags &= ~(PKT_TX_IP_CKSUM|PKT_TX_UDP_CKSUM); build_arp_reply(hdr_arp, &mac); - tx_ring(tbase, ring, ARP_REPLY_FROM_CTRL, mbuf); + tx_ring(tbase, ring, SEND_ARP_REPLY_FROM_MASTER, mbuf); return; } @@ -273,7 +275,7 @@ static inline void handle_arp_request(struct task_base *tbase, struct rte_mbuf * struct rte_ring *ring = task->internal_ip_table[ret].ring; mbuf->ol_flags &= ~(PKT_TX_IP_CKSUM|PKT_TX_UDP_CKSUM); build_arp_reply(hdr_arp, &task->internal_ip_table[ret].mac); - tx_ring(tbase, ring, ARP_REPLY_FROM_CTRL, mbuf); + tx_ring(tbase, ring, SEND_ARP_REPLY_FROM_MASTER, mbuf); } } @@ -337,7 +339,7 @@ static inline void handle_unknown_ip(struct task_base *tbase, struct rte_mbuf *m // We send an ARP request even if one was just sent (and not yet answered) by another task mbuf->ol_flags &= ~(PKT_TX_IP_CKSUM|PKT_TX_UDP_CKSUM); build_arp_request(mbuf, &task->internal_port_table[port].mac, ip_dst, ip_src); - tx_ring(tbase, ring, ARP_REQ_FROM_CTRL, mbuf); + tx_ring(tbase, ring, SEND_ARP_REQUEST_FROM_MASTER, mbuf); } static inline void build_icmp_reply_message(struct task_base *tbase, struct rte_mbuf *mbuf) @@ -365,7 +367,7 @@ static inline void build_icmp_reply_message(struct task_base *tbase, struct rte_ } else { struct rte_ring *ring = task->internal_ip_table[ret].ring; mbuf->ol_flags &= ~(PKT_TX_IP_CKSUM|PKT_TX_UDP_CKSUM); - tx_ring(tbase, ring, ICMP_FROM_CTRL, mbuf); + tx_ring(tbase, ring, SEND_ICMP_FROM_MASTER, mbuf); } } @@ -411,10 +413,306 @@ static inline void handle_icmp(struct task_base *tbase, struct rte_mbuf *mbuf) return; } -static inline void handle_message(struct task_base *tbase, struct rte_mbuf *mbuf, int ring_id) +static inline void handle_unknown_ip6(struct task_base *tbase, struct rte_mbuf *mbuf) { struct task_master *task = (struct task_master *)tbase; struct ether_hdr_arp *hdr_arp = rte_pktmbuf_mtod(mbuf, struct ether_hdr_arp *); + uint8_t port = get_port(mbuf); + struct ipv6_addr *ip_dst = ctrl_ring_get_ipv6_addr(mbuf); + int ret1, ret2, i; + + plogx_dbg("\tMaster trying to find MAC of external IP "IPv6_BYTES_FMT" for port %d\n", IPv6_BYTES(ip_dst->bytes), port); + if (unlikely(port >= PROX_MAX_PORTS)) { + plogx_dbg("Port %d not found", port); + tx_drop(mbuf); + return; + } + struct ipv6_addr *local_ip_src = &task->internal_port_table[port].local_ipv6_addr; + struct ipv6_addr *global_ip_src = &task->internal_port_table[port].global_ipv6_addr; + struct ipv6_addr *ip_src; + if (memcmp(local_ip_src, ip_dst, 8) == 0) + ip_src = local_ip_src; + else if (memcmp(global_ip_src, &null_addr, 16)) + ip_src = global_ip_src; + else { + plogx_dbg("Unable to find a src ip for dst ip "IPv6_BYTES_FMT"\n", IPv6_BYTES(ip_dst->bytes)); + tx_drop(mbuf); + return; + } + struct rte_ring *ring = task->ctrl_tx_rings[get_core(mbuf) * MAX_TASKS_PER_CORE + get_task(mbuf)]; + + if (ring == NULL) { + plogx_dbg("Port %d not registered", port); + tx_drop(mbuf); + return; + } + + ret2 = rte_hash_add_key(task->external_ip6_hash, (const void *)ip_dst); + if (unlikely(ret2 < 0)) { + plogx_dbg("Unable to add IP "IPv6_BYTES_FMT" in external_ip6_hash\n", IPv6_BYTES(ip_dst->bytes)); + tx_drop(mbuf); + return; + } + + // If multiple tasks requesting the same info, we will need to send a reply to all of them + // However if one task sends multiple requests to the same IP (e.g. because it is not answering) + // then we should not send multiple replies to the same task + if (task->external_ip6_table[ret2].nb_requests >= PROX_MAX_ARP_REQUESTS) { + // This can only happen if really many tasks requests the same IP + plogx_dbg("Unable to add request for IP "IPv6_BYTES_FMT" in external_ip6_table\n", IPv6_BYTES(ip_dst->bytes)); + tx_drop(mbuf); + return; + } + for (i = 0; i < task->external_ip6_table[ret2].nb_requests; i++) { + if (task->external_ip6_table[ret2].rings[i] == ring) + break; + } + if (i >= task->external_ip6_table[ret2].nb_requests) { + // If this is a new request i.e. a new task requesting a new IP + task->external_ip6_table[ret2].rings[task->external_ip6_table[ret2].nb_requests] = ring; + task->external_ip6_table[ret2].nb_requests++; + // Only needed for first request - but avoid test and copy the same 6 bytes + // In most cases we will only have one request per IP. + //memcpy(&task->external_ip6_table[ret2].mac, &task->internal_port_table[port].mac, sizeof(prox_rte_ether_addr)); + } + + // As timers are not handled by master, we might send an NS request even if one was just sent + // (and not yet answered) by another task + build_neighbour_sollicitation(mbuf, &task->internal_port_table[port].mac, ip_dst, ip_src); + tx_ring(tbase, ring, SEND_NDP_FROM_MASTER, mbuf); +} + +static inline void handle_rs(struct task_base *tbase, struct rte_mbuf *mbuf) +{ + struct task_master *task = (struct task_master *)tbase; + prox_rte_ether_hdr *hdr = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *); + prox_rte_ipv6_hdr *ipv6_hdr = (prox_rte_ipv6_hdr *)(hdr + 1); + int i, ret; + uint8_t port = get_port(mbuf); + + if (task->internal_port_table[port].flags & IPV6_ROUTER) { + plogx_dbg("\tMaster handling Router Solicitation from ip "IPv6_BYTES_FMT" on port %d\n", IPv6_BYTES(ipv6_hdr->src_addr), port); + struct rte_ring *ring = task->internal_port_table[port].ring; + build_router_advertisement(mbuf, &prox_port_cfg[port].eth_addr, &task->internal_port_table[port].local_ipv6_addr, &task->internal_port_table[port].router_prefix); + tx_ring(tbase, ring, SEND_NDP_FROM_MASTER, mbuf); + return; + } +} + +static inline void handle_ra(struct task_base *tbase, struct rte_mbuf *mbuf) +{ + struct task_master *task = (struct task_master *)tbase; + prox_rte_ether_hdr *hdr = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *); + prox_rte_ipv6_hdr *ipv6_hdr = (prox_rte_ipv6_hdr *)(hdr + 1); + int i, ret, send = 0; + uint8_t port = get_port(mbuf); + struct rte_ring *ring = task->internal_port_table[port].ring; + + plog_dbg("Master handling Router Advertisement from ip "IPv6_BYTES_FMT" on port %d - len = %d; payload_len = %d\n", IPv6_BYTES(ipv6_hdr->src_addr), port, rte_pktmbuf_pkt_len(mbuf), rte_be_to_cpu_16(ipv6_hdr->payload_len)); + if (rte_be_to_cpu_16(ipv6_hdr->payload_len) + sizeof(prox_rte_ipv6_hdr) + sizeof(prox_rte_ether_hdr) > rte_pktmbuf_pkt_len(mbuf)) { + plog_err("Unexpected length received: pkt_len = %d, ipv6 hdr length = %ld, ipv6 payload len = %d\n", rte_pktmbuf_pkt_len(mbuf), sizeof(prox_rte_ipv6_hdr), rte_be_to_cpu_16(ipv6_hdr->payload_len)); + tx_drop(mbuf); + return; + } + if (ring == NULL) { + plog_info("TX side not initialized yet => dropping\n"); + tx_drop(mbuf); + return; + } + int16_t option_len = rte_be_to_cpu_16(ipv6_hdr->payload_len) - sizeof(struct icmpv6_RA) + sizeof(struct icmpv6_option); + struct icmpv6_RA *router_advertisement = (struct icmpv6_RA *)(ipv6_hdr + 1); + struct icmpv6_option *option = (struct icmpv6_option *)&router_advertisement->options; + struct icmpv6_prefix_option *prefix_option; + while(option_len > 0) { + uint8_t type = option->type; + switch(type) { + case ICMPv6_source_link_layer_address: + plog_dbg("\tOption %d = Source Link Layer Address\n", type); + break; + case ICMPv6_prefix_information: + prefix_option = (struct icmpv6_prefix_option *)option; + plog_dbg("\tOption %d = Prefix Information = %s\n", type, IP6_Canonical(&prefix_option->prefix)); + send = 1; + break; + case ICMPv6_mtu: + plog_dbg("\tOption %d = MTU\n", type); + break; + default: + plog_dbg("\tOption %d = Unknown Option\n", type); + break; + } + if ((option->length == 0) || (option->length *8 > option_len)) { + plog_err("Unexpected option length (%d) received in option %d: %d\n", option->length, option->type, option->length); + send = 0; + break; + } + option_len -=option->length * 8; + option = (struct icmpv6_option *)(((uint8_t *)option) + option->length * 8); + } + if (send) { + struct ipv6_addr global_ipv6; + memcpy(&global_ipv6, &prefix_option->prefix, sizeof(struct ipv6_addr)); + set_EUI(&global_ipv6, &task->internal_port_table[port].mac); + tx_ring_ip6(tbase, ring, IPV6_INFO_FROM_MASTER, mbuf, &global_ipv6); + } else + tx_drop(mbuf); +} + +static inline void handle_ns(struct task_base *tbase, struct rte_mbuf *mbuf) +{ + struct task_master *task = (struct task_master *)tbase; + prox_rte_ether_hdr *hdr = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *); + prox_rte_ipv6_hdr *ipv6_hdr = (prox_rte_ipv6_hdr *)(hdr + 1); + struct icmpv6_NS *neighbour_sollicitation = (struct icmpv6_NS *)(ipv6_hdr + 1); + int i, ret; + uint8_t port = get_port(mbuf); + struct rte_ring *ring = task->internal_port_table[port].ring; + + plog_dbg("Master handling Neighbour Sollicitation for ip "IPv6_BYTES_FMT" on port %d - len = %d; payload_len = %d\n", IPv6_BYTES(neighbour_sollicitation->target_address.bytes), port, rte_pktmbuf_pkt_len(mbuf), rte_be_to_cpu_16(ipv6_hdr->payload_len)); + if (rte_be_to_cpu_16(ipv6_hdr->payload_len) + sizeof(prox_rte_ipv6_hdr) + sizeof(prox_rte_ether_hdr) > rte_pktmbuf_pkt_len(mbuf)) { + plog_err("Unexpected length received: pkt_len = %d, ipv6 hdr length = %ld, ipv6 payload len = %d\n", rte_pktmbuf_pkt_len(mbuf), sizeof(prox_rte_ipv6_hdr), rte_be_to_cpu_16(ipv6_hdr->payload_len)); + tx_drop(mbuf); + return; + } + int16_t option_len = rte_be_to_cpu_16(ipv6_hdr->payload_len) - sizeof(struct icmpv6_NS) + sizeof(struct icmpv6_option); + struct icmpv6_option *option = (struct icmpv6_option *)&neighbour_sollicitation->options; + while(option_len > 0) { + uint8_t type = option->type; + switch(type) { + case ICMPv6_source_link_layer_address: + plog_dbg("Option %d = Source Link Layer Address\n", type); + break; + default: + plog_dbg("Option %d = Unknown Option\n", type); + break; + } + if ((option->length == 0) || (option->length *8 > option_len)) { + plog_err("Unexpected option length (%d) received in option %d: %d\n", option->length, option->type, option->length); + tx_drop(mbuf); + return; + } + option_len -=option->length * 8; + option = (struct icmpv6_option *)(((uint8_t *)option) + option->length * 8); + } + struct ip6_port key; + memcpy(&key.ip6, &neighbour_sollicitation->target_address, sizeof(struct ipv6_addr)); + key.port = port; + + if (memcmp(&neighbour_sollicitation->target_address, &task->internal_port_table[port].local_ipv6_addr, 8) == 0) { + // Local IP + if (task->internal_port_table[port].flags & HANDLE_RANDOM_LOCAL_IP_FLAG) { + prox_rte_ether_addr mac; + plogx_dbg("\tMaster handling NS request for ip "IPv6_BYTES_FMT" on port %d which supports random ip\n", IPv6_BYTES(key.ip6.bytes), key.port); + struct rte_ring *ring = task->internal_port_table[port].ring; + create_mac_from_EUI(&key.ip6, &mac); + build_neighbour_advertisement(tbase, mbuf, &mac, &task->internal_port_table[port].local_ipv6_addr, PROX_SOLLICITED); + tx_ring(tbase, ring, SEND_NDP_FROM_MASTER, mbuf); + return; + } + } else { + if (task->internal_port_table[port].flags & HANDLE_RANDOM_GLOBAL_IP_FLAG) { + prox_rte_ether_addr mac; + plogx_dbg("\tMaster handling NS request for ip "IPv6_BYTES_FMT" on port %d which supports random ip\n", IPv6_BYTES(key.ip6.bytes), key.port); + struct rte_ring *ring = task->internal_port_table[port].ring; + create_mac_from_EUI(&key.ip6, &mac); + build_neighbour_advertisement(tbase, mbuf, &mac, &task->internal_port_table[port].global_ipv6_addr, PROX_SOLLICITED); + tx_ring(tbase, ring, SEND_NDP_FROM_MASTER, mbuf); + return; + } + } + + ret = rte_hash_lookup(task->internal_ip6_hash, (const void *)&key); + if (unlikely(ret < 0)) { + // entry not found for this IP. + plogx_dbg("Master ignoring Neighbour Sollicitation received on un-registered IP "IPv6_BYTES_FMT" on port %d\n", IPv6_BYTES(key.ip6.bytes), port); + tx_drop(mbuf); + } else { + struct rte_ring *ring = task->internal_ip6_table[ret].ring; + build_neighbour_advertisement(tbase, mbuf, &task->internal_ip6_table[ret].mac, &key.ip6, PROX_SOLLICITED); + tx_ring(tbase, ring, SEND_NDP_FROM_MASTER, mbuf); + } +} + +static inline void handle_na(struct task_base *tbase, struct rte_mbuf *mbuf) +{ + struct task_master *task = (struct task_master *)tbase; + prox_rte_ether_hdr *hdr = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *); + prox_rte_ipv6_hdr *ipv6_hdr = (prox_rte_ipv6_hdr *)(hdr + 1); + struct icmpv6_NA *neighbour_advertisement = (struct icmpv6_NA *)(ipv6_hdr + 1); + int i, ret; + uint8_t port = get_port(mbuf); + struct rte_ring *ring = task->internal_port_table[port].ring; + + plog_dbg("Master handling Neighbour Advertisement for ip "IPv6_BYTES_FMT" on port %d - len = %d; payload_len = %d\n", IPv6_BYTES(neighbour_advertisement->destination_address.bytes), port, rte_pktmbuf_pkt_len(mbuf), rte_be_to_cpu_16(ipv6_hdr->payload_len)); + if (rte_be_to_cpu_16(ipv6_hdr->payload_len) + sizeof(prox_rte_ipv6_hdr) + sizeof(prox_rte_ether_hdr) > rte_pktmbuf_pkt_len(mbuf)) { + plog_err("Unexpected length received: pkt_len = %d, ipv6 hdr length = %ld, ipv6 payload len = %d\n", rte_pktmbuf_pkt_len(mbuf), sizeof(prox_rte_ipv6_hdr), rte_be_to_cpu_16(ipv6_hdr->payload_len)); + tx_drop(mbuf); + return; + } + int16_t option_len = rte_be_to_cpu_16(ipv6_hdr->payload_len) - sizeof(struct icmpv6_NA) + sizeof(struct icmpv6_option); + struct icmpv6_option *option = (struct icmpv6_option *)&neighbour_advertisement->options; + uint8_t *target_address = NULL; + while(option_len > 0) { + uint8_t type = option->type; + switch(type) { + case ICMPv6_source_link_layer_address: + plog_dbg("Option %d = Source Link Layer Address\n", type); + break; + case ICMPv6_target_link_layer_address: + if (option->length != 1) { + plog_err("Unexpected option length = %u for Target Link Layer Address\n", option->length); + break; + } + target_address = option->data; + plog_dbg("Option %d = Target Link Layer Address = "MAC_BYTES_FMT"\n", type, MAC_BYTES(target_address)); + break; + default: + plog_dbg("Option %d = Unknown Option\n", type); + break; + } + if ((option->length == 0) || (option->length *8 > option_len)) { + plog_err("Unexpected option length (%d) received in option %d: %d\n", option->length, option->type, option->length); + tx_drop(mbuf); + return; + } + option_len -=option->length * 8; + option = (struct icmpv6_option *)(((uint8_t *)option) + option->length * 8); + } + + if (target_address == NULL) { + tx_drop(mbuf); + } + struct ether_hdr_arp *hdr_arp = rte_pktmbuf_mtod(mbuf, struct ether_hdr_arp *); + struct ipv6_addr *key = &neighbour_advertisement->destination_address; + + ret = rte_hash_lookup(task->external_ip6_hash, (const void *)key); + if (unlikely(ret < 0)) { + // entry not found for this IP: we did not ask a request, delete the reply + tx_drop(mbuf); + } else { + // entry found for this IP + uint16_t nb_requests = task->external_ip6_table[ret].nb_requests; + //memcpy(&hdr->d_addr.addr_bytes, &task->external_ip6_table[ret].mac, sizeof(prox_rte_ether_addr)); + // If we receive a request from multiple task for the same IP, then we update all tasks + if (task->external_ip6_table[ret].nb_requests) { + rte_mbuf_refcnt_set(mbuf, nb_requests); + for (int i = 0; i < nb_requests; i++) { + struct rte_ring *ring = task->external_ip6_table[ret].rings[i]; + tx_ring_ip6_data(tbase, ring, MAC_INFO_FROM_MASTER_FOR_IPV6, mbuf, &neighbour_advertisement->destination_address, *(uint64_t *)target_address); + } + task->external_ip6_table[ret].nb_requests = 0; + } else { + tx_drop(mbuf); + } + } +} + +static inline void handle_message(struct task_base *tbase, struct rte_mbuf *mbuf, int ring_id) +{ + struct task_master *task = (struct task_master *)tbase; + struct ether_hdr_arp *hdr_arp; + prox_rte_ether_hdr *hdr; + struct icmpv6 *icmpv6; int command = get_command(mbuf); uint8_t port = get_port(mbuf); uint32_t ip; @@ -422,7 +720,7 @@ static inline void handle_message(struct task_base *tbase, struct rte_mbuf *mbuf plogx_dbg("\tMaster received %s (%x) from mbuf %p\n", actions_string[command], command, mbuf); switch(command) { - case BGP_TO_CTRL: + case BGP_TO_MASTER: if (vdev_port != NO_VDEV_PORT) { // If a virtual (net_tap) device is attached, send the (BGP) packet to this device // The kernel will receive and handle it. @@ -432,7 +730,7 @@ static inline void handle_message(struct task_base *tbase, struct rte_mbuf *mbuf } tx_drop(mbuf); break; - case ICMP_TO_CTRL: + case ICMP_TO_MASTER: if (vdev_port != NO_VDEV_PORT) { // If a virtual (net_tap) device is attached, send the (PING) packet to this device // The kernel will receive and handle it. @@ -442,7 +740,7 @@ static inline void handle_message(struct task_base *tbase, struct rte_mbuf *mbuf } handle_icmp(tbase, mbuf); break; - case ARP_TO_CTRL: + case ARP_PKT_FROM_NET_TO_MASTER: if (vdev_port != NO_VDEV_PORT) { // If a virtual (net_tap) device is attached, send the (ARP) packet to this device // The kernel will receive and handle it. @@ -450,8 +748,9 @@ static inline void handle_message(struct task_base *tbase, struct rte_mbuf *mbuf int n = rte_eth_tx_burst(prox_port_cfg[port].dpdk_mapping, 0, &mbuf, 1); return; } + hdr_arp = rte_pktmbuf_mtod(mbuf, struct ether_hdr_arp *); if (hdr_arp->ether_hdr.ether_type != ETYPE_ARP) { - plog_err("\tUnexpected message received: ARP_TO_CTRL with ether_type %x\n", hdr_arp->ether_hdr.ether_type); + plog_err("\tUnexpected message received: ARP_PKT_FROM_NET_TO_MASTER with ether_type %x\n", hdr_arp->ether_hdr.ether_type); tx_drop(mbuf); return; } else if (arp_is_gratuitous(hdr_arp)) { @@ -469,7 +768,7 @@ static inline void handle_message(struct task_base *tbase, struct rte_mbuf *mbuf return; } break; - case REQ_MAC_TO_CTRL: + case IP4_REQ_MAC_TO_MASTER: if (vdev_port != NO_VDEV_PORT) { // We send a packet to the kernel with the proper destnation IP address and our src IP address // This means that if a generator sends packets from many sources all ARP will still @@ -489,9 +788,9 @@ static inline void handle_message(struct task_base *tbase, struct rte_mbuf *mbuf int ret = rte_hash_lookup(task->external_ip_hash, (const void *)&ip); if ((ret >= 0) && (!prox_rte_is_zero_ether_addr(&task->external_ip_table[ret].mac))) { memcpy(&hdr_arp->arp.data.sha, &task->external_ip_table[ret].mac, sizeof(prox_rte_ether_addr)); - plogx_dbg("\tMaster ready to send UPDATE_FROM_CTRL ip "IPv4_BYTES_FMT" with mac "MAC_BYTES_FMT"\n", + plogx_dbg("\tMaster ready to send MAC_INFO_FROM_MASTER ip "IPv4_BYTES_FMT" with mac "MAC_BYTES_FMT"\n", IP4(ip), MAC_BYTES(hdr_arp->arp.data.sha.addr_bytes)); - tx_ring_ip(tbase, ring, UPDATE_FROM_CTRL, mbuf, ip); + tx_ring_ip(tbase, ring, MAC_INFO_FROM_MASTER, mbuf, ip); return; } @@ -508,6 +807,61 @@ static inline void handle_message(struct task_base *tbase, struct rte_mbuf *mbuf } handle_unknown_ip(tbase, mbuf); break; + case IP6_REQ_MAC_TO_MASTER: + handle_unknown_ip6(tbase, mbuf); + break; + case NDP_PKT_FROM_NET_TO_MASTER: + hdr = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *); + prox_rte_ipv6_hdr *ipv6_hdr = (prox_rte_ipv6_hdr *)(hdr + 1); + if (unlikely((hdr->ether_type != ETYPE_IPv6) || (ipv6_hdr->proto != ICMPv6))) { + // Should not happen + if (hdr->ether_type != ETYPE_IPv6) + plog_err("\tUnexpected message received: NDP_PKT_FROM_NET_TO_MASTER with ether_type %x\n", hdr->ether_type); + else + plog_err("\tUnexpected message received: NDP_PKT_FROM_NET_TO_MASTER with ether_type %x and proto %x\n", hdr->ether_type, ipv6_hdr->proto); + tx_drop(mbuf); + return; + } + icmpv6 = (struct icmpv6 *)(ipv6_hdr + 1); + switch (icmpv6->type) { + case ICMPv6_DU: + plog_err("IPV6 ICMPV6 Destination Unreachable\n"); + tx_drop(mbuf); + break; + case ICMPv6_PTB: + plog_err("IPV6 ICMPV6 packet too big\n"); + tx_drop(mbuf); + break; + case ICMPv6_TE: + plog_err("IPV6 ICMPV6 Time Exceeded\n"); + tx_drop(mbuf); + break; + case ICMPv6_PaPr: + plog_err("IPV6 ICMPV6 Parameter Problem\n"); + tx_drop(mbuf); + break; + case ICMPv6_RS: + handle_rs(tbase, mbuf); + break; + case ICMPv6_RA: + handle_ra(tbase, mbuf); + break; + case ICMPv6_NS: + handle_ns(tbase, mbuf); + break; + case ICMPv6_NA: + handle_na(tbase, mbuf); + break; + case ICMPv6_RE: + plog_err("IPV6 ICMPV6 Redirect not handled\n"); + tx_drop(mbuf); + break; + default: + plog_err("Unexpected type %d in IPV6 ICMPV6\n", icmpv6->type); + tx_drop(mbuf); + break; + } + break; default: plogx_dbg("\tMaster received unexpected message\n"); tx_drop(mbuf); @@ -517,7 +871,6 @@ static inline void handle_message(struct task_base *tbase, struct rte_mbuf *mbuf void init_ctrl_plane(struct task_base *tbase) { - prox_cfg.flags |= DSF_CTRL_PLANE_ENABLED; struct task_master *task = (struct task_master *)tbase; int socket_id = rte_lcore_to_socket_id(prox_cfg.master); uint32_t n_entries = MAX_ARP_ENTRIES * 4; @@ -527,25 +880,52 @@ void init_ctrl_plane(struct task_base *tbase) struct rte_hash_parameters hash_params = { .name = hash_name, .entries = n_entries, - .key_len = sizeof(uint32_t), .hash_func = rte_hash_crc, .hash_func_init_val = 0, }; - task->external_ip_hash = rte_hash_create(&hash_params); - PROX_PANIC(task->external_ip_hash == NULL, "Failed to set up external ip hash\n"); - plog_info("\texternal ip hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len); - task->external_ip_table = (struct external_ip_table *)prox_zmalloc(n_entries * sizeof(struct external_ip_table), socket_id); - PROX_PANIC(task->external_ip_table == NULL, "Failed to allocate memory for %u entries in external ip table\n", n_entries); - plog_info("\texternal ip table, with %d entries of size %ld\n", n_entries, sizeof(struct external_ip_table)); - - hash_name[0]++; - hash_params.key_len = sizeof(struct ip_port); - task->internal_ip_hash = rte_hash_create(&hash_params); - PROX_PANIC(task->internal_ip_hash == NULL, "Failed to set up internal ip hash\n"); - plog_info("\tinternal ip hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len); - task->internal_ip_table = (struct ip_table *)prox_zmalloc(n_entries * sizeof(struct ip_table), socket_id); - PROX_PANIC(task->internal_ip_table == NULL, "Failed to allocate memory for %u entries in internal ip table\n", n_entries); - plog_info("\tinternal ip table, with %d entries of size %ld\n", n_entries, sizeof(struct ip_table)); + if (prox_cfg.flags & DSF_L3_ENABLED) { + hash_params.key_len = sizeof(uint32_t); + task->external_ip_hash = rte_hash_create(&hash_params); + PROX_PANIC(task->external_ip_hash == NULL, "Failed to set up external ip hash\n"); + plog_info("\texternal ip hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len); + hash_name[0]++; + + task->external_ip_table = (struct external_ip_table *)prox_zmalloc(n_entries * sizeof(struct external_ip_table), socket_id); + PROX_PANIC(task->external_ip_table == NULL, "Failed to allocate memory for %u entries in external ip table\n", n_entries); + plog_info("\texternal ip table, with %d entries of size %ld\n", n_entries, sizeof(struct external_ip_table)); + + hash_params.key_len = sizeof(struct ip_port); + task->internal_ip_hash = rte_hash_create(&hash_params); + PROX_PANIC(task->internal_ip_hash == NULL, "Failed to set up internal ip hash\n"); + plog_info("\tinternal ip hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len); + hash_name[0]++; + + task->internal_ip_table = (struct ip_table *)prox_zmalloc(n_entries * sizeof(struct ip_table), socket_id); + PROX_PANIC(task->internal_ip_table == NULL, "Failed to allocate memory for %u entries in internal ip table\n", n_entries); + plog_info("\tinternal ip table, with %d entries of size %ld\n", n_entries, sizeof(struct ip_table)); + } + + if (prox_cfg.flags & DSF_NDP_ENABLED) { + hash_params.key_len = sizeof(struct ipv6_addr); + task->external_ip6_hash = rte_hash_create(&hash_params); + PROX_PANIC(task->external_ip6_hash == NULL, "Failed to set up external ip6 hash\n"); + plog_info("\texternal ip6 hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len); + hash_name[0]++; + + task->external_ip6_table = (struct external_ip_table *)prox_zmalloc(n_entries * sizeof(struct external_ip_table), socket_id); + PROX_PANIC(task->external_ip6_table == NULL, "Failed to allocate memory for %u entries in external ip6 table\n", n_entries); + plog_info("\texternal ip6_table, with %d entries of size %ld\n", n_entries, sizeof(struct external_ip_table)); + + hash_params.key_len = sizeof(struct ip6_port); + task->internal_ip6_hash = rte_hash_create(&hash_params); + PROX_PANIC(task->internal_ip6_hash == NULL, "Failed to set up internal ip6 hash\n"); + plog_info("\tinternal ip6 hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len); + hash_name[0]++; + + task->internal_ip6_table = (struct ip_table *)prox_zmalloc(n_entries * sizeof(struct ip_table), socket_id); + PROX_PANIC(task->internal_ip6_table == NULL, "Failed to allocate memory for %u entries in internal ip6 table\n", n_entries); + plog_info("\tinternal ip6 table, with %d entries of size %ld\n", n_entries, sizeof(struct ip_table)); + } int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); PROX_PANIC(fd < 0, "Failed to open netlink socket: %d\n", errno); @@ -574,7 +954,7 @@ void init_ctrl_plane(struct task_base *tbase) task->route_fds.events = POLL_IN; plog_info("\tRTMGRP_IPV4_ROUTE netlink group bound; fd = %d\n", fd); - static char name[] = "master_arp_pool"; + static char name[] = "master_arp_nd_pool"; const int NB_ARP_MBUF = 1024; const int ARP_MBUF_SIZE = 2048; const int NB_CACHE_ARP_MBUF = 256; @@ -585,7 +965,7 @@ void init_ctrl_plane(struct task_base *tbase) rte_socket_id(), NB_ARP_MBUF); plog_info("\t\tMempool %p (%s) size = %u * %u cache %u, socket %d\n", ret, name, NB_ARP_MBUF, ARP_MBUF_SIZE, NB_CACHE_ARP_MBUF, rte_socket_id()); - tbase->l3.arp_pool = ret; + tbase->l3.arp_nd_pool = ret; } static void handle_route_event(struct task_base *tbase) @@ -638,13 +1018,13 @@ static void handle_route_event(struct task_base *tbase) } } int dpdk_vdev_port = -1; - for (int i = 0; i< rte_eth_dev_count(); i++) { + for (int i = 0; i< prox_rte_eth_dev_count_avail(); i++) { if (strcmp(prox_port_cfg[i].name, interface_name) == 0) dpdk_vdev_port = i; } if (dpdk_vdev_port != -1) { plogx_info("Received netlink message on tap interface %s for IP "IPv4_BYTES_FMT"/%d, Gateway "IPv4_BYTES_FMT"\n", interface_name, IP4(ip), dst_len, IP4(gw_ip)); - int ret1 = rte_mempool_get(tbase->l3.arp_pool, (void **)mbufs); + int ret1 = rte_mempool_get(tbase->l3.arp_nd_pool, (void **)mbufs); if (unlikely(ret1 != 0)) { plog_err("Unable to allocate a mbuf for master to core communication\n"); return; @@ -726,7 +1106,7 @@ static void handle_arp_event(struct task_base *tbase) memcpy(&task->external_ip_table[ret].mac, &mac, sizeof(prox_rte_ether_addr)); // If we receive a request from multiple task for the same IP, then we update all tasks - int ret1 = rte_mempool_get(tbase->l3.arp_pool, (void **)mbufs); + int ret1 = rte_mempool_get(tbase->l3.arp_nd_pool, (void **)mbufs); if (unlikely(ret1 != 0)) { plog_err("Unable to allocate a mbuf for master to core communication\n"); return; @@ -736,8 +1116,8 @@ static void handle_arp_event(struct task_base *tbase) struct rte_ring *ring = task->external_ip_table[ret].rings[i]; struct ether_hdr_arp *hdr = rte_pktmbuf_mtod(mbufs[0], struct ether_hdr_arp *); memcpy(&hdr->arp.data.sha, &mac, sizeof(prox_rte_ether_addr)); - tx_ring_ip(tbase, ring, UPDATE_FROM_CTRL, mbufs[0], ip); - plog_dbg("UPDATE_FROM_CTRL ip "IPv4_BYTES_FMT" with mac "MAC_BYTES_FMT"\n", IP4(ip), MAC_BYTES(mac.addr_bytes)); + tx_ring_ip(tbase, ring, MAC_INFO_FROM_MASTER, mbufs[0], ip); + plog_dbg("MAC_INFO_FROM_MASTER ip "IPv4_BYTES_FMT" with mac "MAC_BYTES_FMT"\n", IP4(ip), MAC_BYTES(mac.addr_bytes)); } task->external_ip_table[ret].nb_requests = 0; return; diff --git a/VNFs/DPPD-PROX/handle_master.h b/VNFs/DPPD-PROX/handle_master.h index fc8706a8..518906ed 100644 --- a/VNFs/DPPD-PROX/handle_master.h +++ b/VNFs/DPPD-PROX/handle_master.h @@ -14,30 +14,91 @@ // limitations under the License. */ +#include #include "task_base.h" #include "task_init.h" enum arp_actions { - UPDATE_FROM_CTRL, - ROUTE_ADD_FROM_CTRL, - ROUTE_DEL_FROM_CTRL, - ARP_REQ_FROM_CTRL, - ARP_REPLY_FROM_CTRL, - ICMP_FROM_CTRL, - BGP_FROM_CTRL, - ARP_TO_CTRL, - ICMP_TO_CTRL, - BGP_TO_CTRL, - REQ_MAC_TO_CTRL, + MAC_INFO_FROM_MASTER, + MAC_INFO_FROM_MASTER_FOR_IPV6, + IPV6_INFO_FROM_MASTER, + ROUTE_ADD_FROM_MASTER, + ROUTE_DEL_FROM_MASTER, + SEND_ARP_REQUEST_FROM_MASTER, + SEND_ARP_REPLY_FROM_MASTER, + SEND_NDP_FROM_MASTER, + SEND_ICMP_FROM_MASTER, + SEND_BGP_FROM_MASTER, + ARP_PKT_FROM_NET_TO_MASTER, + NDP_PKT_FROM_NET_TO_MASTER, + ICMP_TO_MASTER, + BGP_TO_MASTER, + IP4_REQ_MAC_TO_MASTER, + IP6_REQ_MAC_TO_MASTER, PKT_FROM_TAP, MAX_ACTIONS }; -#define HANDLE_RANDOM_IP_FLAG 1 +#define PROX_MAX_ARP_REQUESTS 32 // Maximum number of tasks requesting the same MAC address + +#define HANDLE_RANDOM_IP_FLAG 1 +#define HANDLE_RANDOM_LOCAL_IP_FLAG 2 +#define HANDLE_RANDOM_GLOBAL_IP_FLAG 4 +#define IPV6_ROUTER 8 #define RANDOM_IP 0xffffffff #define PROX_PSEUDO_PKT_PORT 0xdead +struct port_table { + prox_rte_ether_addr mac; + struct rte_ring *ring; + uint32_t ip; + uint8_t port; + uint8_t flags; + struct ipv6_addr local_ipv6_addr; + struct ipv6_addr global_ipv6_addr; + struct ipv6_addr router_prefix; + uint64_t last_echo_req_rcvd_tsc; + uint64_t last_echo_rep_rcvd_tsc; + uint32_t n_echo_req; + uint32_t n_echo_rep; +}; + +struct ip_table { + prox_rte_ether_addr mac; + struct rte_ring *ring; +}; + +struct external_ip_table { + prox_rte_ether_addr mac; + struct rte_ring *rings[PROX_MAX_ARP_REQUESTS]; + uint16_t nb_requests; +}; + +struct vdev { + int port_id; + struct rte_ring *ring; +}; + +struct task_master { + struct task_base base; + struct rte_ring *ctrl_rx_ring; + struct rte_ring **ctrl_tx_rings; + struct ip_table *internal_ip_table; // Store mac address from our IP + struct external_ip_table *external_ip_table; // Store mac address from external systems + struct ip_table *internal_ip6_table; // Store mac address from our IP + struct external_ip_table *external_ip6_table; // Store mac address from external systems + struct rte_hash *external_ip_hash; + struct rte_hash *external_ip6_hash; + struct rte_hash *internal_ip_hash; + struct rte_hash *internal_ip6_hash; + struct port_table internal_port_table[PROX_MAX_PORTS]; + struct vdev all_vdev[PROX_MAX_PORTS]; + int max_vdev_id; + struct pollfd arp_fds; + struct pollfd route_fds; +}; + const char *actions_string[MAX_ACTIONS]; void init_ctrl_plane(struct task_base *tbase); @@ -49,9 +110,7 @@ static inline void tx_drop(struct rte_mbuf *mbuf) rte_pktmbuf_free(mbuf); } -struct vdev { - int port_id; - struct rte_ring *ring; -}; void register_ip_to_ctrl_plane(struct task_base *task, uint32_t ip, uint8_t port_id, uint8_t core_id, uint8_t task_id); void master_init_vdev(struct task_base *task, uint8_t port_id, uint8_t core_id, uint8_t task_id); +void register_router_to_ctrl_plane(struct task_base *tbase, uint8_t port_id, uint8_t core_id, uint8_t task_id, struct ipv6_addr *local_ipv6_addr, struct ipv6_addr *global_ipv6_addr, struct ipv6_addr *router_prefix); +void register_node_to_ctrl_plane(struct task_base *tbase, struct ipv6_addr *local_ipv6_addr, struct ipv6_addr *global_ipv6_addr, uint8_t port_id, uint8_t core_id, uint8_t task_id); diff --git a/VNFs/DPPD-PROX/handle_routing.c b/VNFs/DPPD-PROX/handle_routing.c index 29b84382..4683ede7 100644 --- a/VNFs/DPPD-PROX/handle_routing.c +++ b/VNFs/DPPD-PROX/handle_routing.c @@ -37,7 +37,6 @@ #include "mpls.h" #include "qinq.h" #include "prox_cfg.h" -#include "ip6_addr.h" #include "prox_shared.h" #include "prox_cksum.h" #include "mbuf_utils.h" diff --git a/VNFs/DPPD-PROX/handle_swap.c b/VNFs/DPPD-PROX/handle_swap.c index b9029b6b..a5abd891 100644 --- a/VNFs/DPPD-PROX/handle_swap.c +++ b/VNFs/DPPD-PROX/handle_swap.c @@ -142,6 +142,38 @@ static void stop_swap(struct task_base *tbase) } } +static void handle_ipv6(struct task_swap *task, struct rte_mbuf *mbufs, prox_rte_ipv6_hdr *ipv6_hdr, uint8_t *out) +{ + __m128i ip = _mm_loadu_si128((__m128i*)&(ipv6_hdr->src_addr)); + uint16_t port; + uint16_t payload_len; + prox_rte_udp_hdr *udp_hdr; + + rte_mov16((uint8_t *)&(ipv6_hdr->src_addr), (uint8_t *)&(ipv6_hdr->dst_addr)); // Copy dst into src + rte_mov16((uint8_t *)&(ipv6_hdr->dst_addr), (uint8_t *)&ip); // Copy src into dst + switch(ipv6_hdr->proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + payload_len = ipv6_hdr->payload_len; + udp_hdr = (prox_rte_udp_hdr *)(ipv6_hdr + 1); + if (unlikely(udp_hdr->dgram_len < payload_len)) { + plog_warn("Unexpected L4 len (%u) versus L3 payload len (%u) in IPv6 packet\n", udp_hdr->dgram_len, payload_len); + *out = OUT_DISCARD; + break; + } + port = udp_hdr->dst_port; + udp_hdr->dst_port = udp_hdr->src_port; + udp_hdr->src_port = port; + write_src_and_dst_mac(task, mbufs); + *out = 0; + break; + default: + plog_warn("Unsupported next hop %u in IPv6 packet\n", ipv6_hdr->proto); + *out = OUT_DISCARD; + break; + } +} + static int handle_swap_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts) { struct task_swap *task = (struct task_swap *)tbase; @@ -149,6 +181,7 @@ static int handle_swap_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, ui prox_rte_ether_addr mac; prox_rte_ipv4_hdr *ip_hdr; prox_rte_udp_hdr *udp_hdr; + prox_rte_ipv6_hdr *ipv6_hdr; struct gre_hdr *pgre; prox_rte_ipv4_hdr *inner_ip_hdr; uint32_t ip; @@ -183,6 +216,11 @@ static int handle_swap_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, ui } mpls_len += sizeof(struct mpls_hdr); ip_hdr = (prox_rte_ipv4_hdr *)(mpls + 1); + if (unlikely((ip_hdr->version_ihl >> 4) == 6)) { + ipv6_hdr = (prox_rte_ipv6_hdr *)(ip_hdr); + handle_ipv6(task, mbufs[j], ipv6_hdr, &out[j]); + continue; + } break; case ETYPE_8021ad: qinq = (struct qinq_hdr *)hdr; @@ -191,20 +229,34 @@ static int handle_swap_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, ui out[j] = OUT_DISCARD; continue; } - ip_hdr = (prox_rte_ipv4_hdr *)(qinq + 1); + if (qinq->ether_type == ETYPE_IPv4) { + ip_hdr = (prox_rte_ipv4_hdr *)(qinq + 1); + } else if (qinq->ether_type == ETYPE_IPv6) { + ipv6_hdr = (prox_rte_ipv6_hdr *)(qinq + 1); + handle_ipv6(task, mbufs[j], ipv6_hdr, &out[j]); + continue; + } else { + plog_warn("Unsupported packet type\n"); + out[j] = OUT_DISCARD; + continue; + } break; case ETYPE_VLAN: vlan = (prox_rte_vlan_hdr *)(hdr + 1); if (vlan->eth_proto == ETYPE_IPv4) { ip_hdr = (prox_rte_ipv4_hdr *)(vlan + 1); + } else if (vlan->eth_proto == ETYPE_IPv6) { + ipv6_hdr = (prox_rte_ipv6_hdr *)(vlan + 1); + handle_ipv6(task, mbufs[j], ipv6_hdr, &out[j]); + continue; } else if (vlan->eth_proto == ETYPE_VLAN) { vlan = (prox_rte_vlan_hdr *)(vlan + 1); if (vlan->eth_proto == ETYPE_IPv4) { ip_hdr = (prox_rte_ipv4_hdr *)(vlan + 1); } else if (vlan->eth_proto == ETYPE_IPv6) { - plog_warn("Unsupported IPv6\n"); - out[j] = OUT_DISCARD; + ipv6_hdr = (prox_rte_ipv6_hdr *)(vlan + 1); + handle_ipv6(task, mbufs[j], ipv6_hdr, &out[j]); continue; } else { @@ -222,8 +274,8 @@ static int handle_swap_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, ui ip_hdr = (prox_rte_ipv4_hdr *)(hdr + 1); break; case ETYPE_IPv6: - plog_warn("Unsupported IPv6\n"); - out[j] = OUT_DISCARD; + ipv6_hdr = (prox_rte_ipv6_hdr *)(hdr + 1); + handle_ipv6(task, mbufs[j], ipv6_hdr, &out[j]); continue; case ETYPE_LLDP: out[j] = OUT_DISCARD; @@ -234,7 +286,13 @@ static int handle_swap_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, ui continue; } // TODO 2 : check packet is long enough for Ethernet + IP + UDP + extra header (VLAN, MPLS, ...) + // IPv4 packet + ip = ip_hdr->dst_addr; + if (unlikely((ip_hdr->version_ihl >> 4) != 4)) { + out[j] = OUT_DISCARD; + continue; + } switch (ip_hdr->next_proto_id) { case IPPROTO_GRE: @@ -256,7 +314,7 @@ static int handle_swap_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, ui break; case IPPROTO_UDP: case IPPROTO_TCP: - if (task->igmp_address && PROX_RTE_IS_IPV4_MCAST(rte_be_to_cpu_32(ip))) { + if (unlikely(task->igmp_address && PROX_RTE_IS_IPV4_MCAST(rte_be_to_cpu_32(ip)))) { out[j] = OUT_DISCARD; continue; } diff --git a/VNFs/DPPD-PROX/ip6_addr.h b/VNFs/DPPD-PROX/ip6_addr.h index f9b56c19..3279ded1 100644 --- a/VNFs/DPPD-PROX/ip6_addr.h +++ b/VNFs/DPPD-PROX/ip6_addr.h @@ -1,5 +1,5 @@ /* -// Copyright (c) 2010-2017 Intel Corporation +// Copyright (c) 2010-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,8 +17,6 @@ #ifndef _IP6_ADDR_H_ #define _IP6_ADDR_H_ -#include - struct ipv6_addr { uint8_t bytes[16]; }; diff --git a/VNFs/DPPD-PROX/lconf.c b/VNFs/DPPD-PROX/lconf.c index 63e4763e..be2486e7 100644 --- a/VNFs/DPPD-PROX/lconf.c +++ b/VNFs/DPPD-PROX/lconf.c @@ -1,5 +1,5 @@ /* -// Copyright (c) 2010-2017 Intel Corporation +// Copyright (c) 2010-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -246,7 +246,7 @@ int lconf_do_flags(struct lcore_cfg *lconf) if (lconf->msg.type == LCONF_MSG_DUMP || lconf->msg.type == LCONF_MSG_DUMP_TX) { t->aux->task_rt_dump.n_print_tx = lconf->msg.val; - if (t->tx_pkt == tx_pkt_l3) { + if ((t->tx_pkt == tx_pkt_l3) || (t->tx_pkt == tx_pkt_ndp)) { if (t->aux->tx_pkt_orig) t->aux->tx_pkt_l2 = t->aux->tx_pkt_orig; t->aux->tx_pkt_orig = t->aux->tx_pkt_l2; @@ -267,7 +267,7 @@ int lconf_do_flags(struct lcore_cfg *lconf) if (task_base_get_original_rx_pkt_function(t) != rx_pkt_dummy) { t->aux->task_rt_dump.n_trace = lconf->msg.val; task_base_add_rx_pkt_function(t, rx_pkt_trace); - if (t->tx_pkt == tx_pkt_l3) { + if ((t->tx_pkt == tx_pkt_l3) || (t->tx_pkt == tx_pkt_ndp)) { if (t->aux->tx_pkt_orig) t->aux->tx_pkt_l2 = t->aux->tx_pkt_orig; t->aux->tx_pkt_orig = t->aux->tx_pkt_l2; @@ -280,7 +280,7 @@ int lconf_do_flags(struct lcore_cfg *lconf) } } else { t->aux->task_rt_dump.n_print_tx = lconf->msg.val; - if (t->tx_pkt == tx_pkt_l3) { + if ((t->tx_pkt == tx_pkt_l3) || (t->tx_pkt == tx_pkt_ndp)) { if (t->aux->tx_pkt_orig) t->aux->tx_pkt_l2 = t->aux->tx_pkt_orig; t->aux->tx_pkt_orig = t->aux->tx_pkt_l2; @@ -306,7 +306,7 @@ int lconf_do_flags(struct lcore_cfg *lconf) for (uint8_t task_id = 0; task_id < lconf->n_tasks_all; ++task_id) { t = lconf->tasks_all[task_id]; - if (t->tx_pkt == tx_pkt_l3) { + if ((t->tx_pkt == tx_pkt_l3) || (t->tx_pkt == tx_pkt_ndp)) { t->aux->tx_pkt_orig = t->aux->tx_pkt_l2; t->aux->tx_pkt_l2 = tx_pkt_distr; } else { @@ -328,7 +328,7 @@ int lconf_do_flags(struct lcore_cfg *lconf) for (uint8_t task_id = 0; task_id < lconf->n_tasks_all; ++task_id) { t = lconf->tasks_all[task_id]; if (t->aux->tx_pkt_orig) { - if (t->tx_pkt == tx_pkt_l3) { + if ((t->tx_pkt == tx_pkt_l3) || (t->tx_pkt == tx_pkt_ndp)) { t->aux->tx_pkt_l2 = t->aux->tx_pkt_orig; t->aux->tx_pkt_orig = NULL; } else { @@ -371,7 +371,7 @@ int lconf_do_flags(struct lcore_cfg *lconf) for (uint8_t task_id = 0; task_id < lconf->n_tasks_all; ++task_id) { t = lconf->tasks_all[task_id]; - if (t->tx_pkt == tx_pkt_l3) { + if ((t->tx_pkt == tx_pkt_l3) || (t->tx_pkt == tx_pkt_ndp)) { t->aux->tx_pkt_orig = t->aux->tx_pkt_l2; t->aux->tx_pkt_l2 = tx_pkt_bw; } else { @@ -385,7 +385,7 @@ int lconf_do_flags(struct lcore_cfg *lconf) for (uint8_t task_id = 0; task_id < lconf->n_tasks_all; ++task_id) { t = lconf->tasks_all[task_id]; if (t->aux->tx_pkt_orig) { - if (t->tx_pkt == tx_pkt_l3) { + if ((t->tx_pkt == tx_pkt_l3) || (t->tx_pkt == tx_pkt_ndp)) { t->aux->tx_pkt_l2 = t->aux->tx_pkt_orig; t->aux->tx_pkt_orig = NULL; } else { diff --git a/VNFs/DPPD-PROX/main.c b/VNFs/DPPD-PROX/main.c index 1af49b7d..f6fa3e80 100644 --- a/VNFs/DPPD-PROX/main.c +++ b/VNFs/DPPD-PROX/main.c @@ -1,5 +1,5 @@ /* -// Copyright (c) 2010-2017 Intel Corporation +// Copyright (c) 2010-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -155,7 +155,7 @@ static void check_missing_rx(void) { struct lcore_cfg *lconf = NULL, *rx_lconf = NULL, *tx_lconf = NULL; struct task_args *targ, *rx_targ = NULL, *tx_targ = NULL; - uint8_t port_id, rx_port_id, ok; + uint8_t port_id, rx_port_id, ok, l3, ndp; while (core_targ_next(&lconf, &targ, 0) == 0) { PROX_PANIC((targ->flags & TASK_ARG_RX_RING) && targ->rx_rings[0] == 0 && !targ->tx_opt_ring_task, @@ -168,12 +168,17 @@ static void check_missing_rx(void) lconf = NULL; while (core_targ_next(&lconf, &targ, 0) == 0) { - if (strcmp(targ->sub_mode_str, "l3") != 0) + l3 = ndp = 0; + if (strcmp(targ->sub_mode_str, "l3") == 0) + l3 = 1; + else if (strcmp(targ->sub_mode_str, "ndp") == 0) + ndp = 1; + else continue; - PROX_PANIC((targ->nb_rxports == 0) && (targ->nb_txports == 0), "L3 task must have a RX or a TX port\n"); - // If the L3 sub_mode receives from a port, check that there is at least one core/task - // transmitting to this port in L3 sub_mode + PROX_PANIC((targ->nb_rxports == 0) && (targ->nb_txports == 0), "L3/NDP task must have a RX or a TX port\n"); + // If the L3/NDP sub_mode receives from a port, check that there is at least one core/task + // transmitting to this port in L3/NDP sub_mode for (uint8_t i = 0; i < targ->nb_rxports; ++i) { rx_port_id = targ->rx_port_queue[i].port; ok = 0; @@ -181,35 +186,40 @@ static void check_missing_rx(void) while (core_targ_next(&tx_lconf, &tx_targ, 0) == 0) { if ((port_id = tx_targ->tx_port_queue[0].port) == OUT_DISCARD) continue; - if ((rx_port_id == port_id) && (tx_targ->flags & TASK_ARG_L3)){ + if ((rx_port_id == port_id) && + ( ((tx_targ->flags & TASK_ARG_L3) && l3) || + ((tx_targ->flags & TASK_ARG_NDP) && ndp) ) ) { ok = 1; break; } } - PROX_PANIC(ok == 0, "RX L3 sub mode for port %d on core %d task %d, but no core/task transmitting on that port\n", rx_port_id, lconf->id, targ->id); + PROX_PANIC(ok == 0, "RX %s sub mode for port %d on core %d task %d, but no core/task transmitting on that port\n", l3 ? "l3":"ndp", rx_port_id, lconf->id, targ->id); } - // If the L3 sub_mode transmits to a port, check that there is at least one core/task - // receiving from that port in L3 sub_mode. + // If the L3/NDP sub_mode transmits to a port, check that there is at least one core/task + // receiving from that port in L3/NDP sub_mode. if ((port_id = targ->tx_port_queue[0].port) == OUT_DISCARD) continue; rx_lconf = NULL; ok = 0; - plog_info("\tCore %d task %d transmitting to port %d in L3 mode\n", lconf->id, targ->id, port_id); + plog_info("\tCore %d task %d transmitting to port %d in %s submode\n", lconf->id, targ->id, port_id, l3 ? "l3":"ndp"); while (core_targ_next(&rx_lconf, &rx_targ, 0) == 0) { for (uint8_t i = 0; i < rx_targ->nb_rxports; ++i) { rx_port_id = rx_targ->rx_port_queue[i].port; - if ((rx_port_id == port_id) && (rx_targ->flags & TASK_ARG_L3)){ + if ((rx_port_id == port_id) && + ( ((rx_targ->flags & TASK_ARG_L3) && l3) || + ((rx_targ->flags & TASK_ARG_NDP) && ndp) ) ){ ok = 1; break; } } if (ok == 1) { - plog_info("\tCore %d task %d has found core %d task %d receiving from port %d\n", lconf->id, targ->id, rx_lconf->id, rx_targ->id, port_id); + plog_info("\tCore %d task %d has found core %d task %d receiving from port %d in %s submode\n", lconf->id, targ->id, rx_lconf->id, rx_targ->id, port_id, + ((rx_targ->flags & TASK_ARG_L3) && l3) ? "l3":"ndp"); break; } } - PROX_PANIC(ok == 0, "L3 sub mode for port %d on core %d task %d, but no core/task receiving on that port\n", port_id, lconf->id, targ->id); + PROX_PANIC(ok == 0, "%s sub mode for port %d on core %d task %d, but no core/task receiving on that port\n", l3 ? "l3":"ndp", port_id, lconf->id, targ->id); } } @@ -629,7 +639,7 @@ static void init_rings(void) lconf = NULL; struct prox_port_cfg *port; while (core_targ_next(&lconf, &starg, 1) == 0) { - if ((starg->task_init) && (starg->flags & TASK_ARG_L3)) { + if ((starg->task_init) && (starg->flags & (TASK_ARG_L3|TASK_ARG_NDP))) { struct core_task ct; ct.core = prox_cfg.master; ct.task = 0; @@ -750,12 +760,12 @@ static void setup_mempools_unique_per_socket(void) sprintf(name, "socket_%u_pool", i); if ((pool[i] = rte_mempool_lookup(name)) == NULL) { pool[i] = rte_mempool_create(name, - mbuf_count[i] - 1, mbuf_size[i], - nb_cache_mbuf[i], - sizeof(struct rte_pktmbuf_pool_private), - rte_pktmbuf_pool_init, NULL, - prox_pktmbuf_init, NULL, - i, flags); + mbuf_count[i] - 1, mbuf_size[i], + nb_cache_mbuf[i], + sizeof(struct rte_pktmbuf_pool_private), + rte_pktmbuf_pool_init, NULL, + prox_pktmbuf_init, NULL, + i, flags); PROX_PANIC(pool[i] == NULL, "\t\tError: cannot create mempool for socket %u\n", i); plog_info("\tMempool %p size = %u * %u cache %u, socket %d\n", pool[i], mbuf_count[i], mbuf_size[i], nb_cache_mbuf[i], i); diff --git a/VNFs/DPPD-PROX/packet_utils.c b/VNFs/DPPD-PROX/packet_utils.c index 08178d82..466dd481 100644 --- a/VNFs/DPPD-PROX/packet_utils.c +++ b/VNFs/DPPD-PROX/packet_utils.c @@ -31,6 +31,9 @@ #include "prox_lua.h" #include "hash_entry_types.h" #include "prox_compat.h" +#include "prox_cfg.h" +#include "defines.h" +#include "prox_ipv6.h" #include "tx_pkt.h" static inline int find_ip(struct ether_hdr_arp *pkt, uint16_t len, uint32_t *ip_dst) @@ -76,15 +79,91 @@ static inline int find_ip(struct ether_hdr_arp *pkt, uint16_t len, uint32_t *ip_ return -1; } +static inline struct ipv6_addr *find_ip6(prox_rte_ether_hdr *pkt, uint16_t len, struct ipv6_addr *ip_dst) +{ + prox_rte_vlan_hdr *vlan_hdr; + prox_rte_ipv6_hdr *ip; + uint16_t ether_type = pkt->ether_type; + uint16_t l2_len = sizeof(prox_rte_ether_hdr); + + // Unstack VLAN tags + while (((ether_type == ETYPE_8021ad) || (ether_type == ETYPE_VLAN)) && (l2_len + sizeof(prox_rte_vlan_hdr) < len)) { + vlan_hdr = (prox_rte_vlan_hdr *)((uint8_t *)pkt + l2_len); + l2_len +=4; + ether_type = vlan_hdr->eth_proto; + } + + switch (ether_type) { + case ETYPE_MPLSU: + case ETYPE_MPLSM: + // In case of MPLS, next hop MAC is based on MPLS, not destination IP + l2_len = 0; + break; + case ETYPE_IPv4: + case ETYPE_EoGRE: + case ETYPE_ARP: + l2_len = 0; + break; + case ETYPE_IPv6: + break; + default: + l2_len = 0; + plog_warn("Unsupported packet type %x - CRC might be wrong\n", ether_type); + break; + } + + if (l2_len && (l2_len + sizeof(prox_rte_ipv6_hdr) <= len)) { + prox_rte_ipv6_hdr *ip = (prox_rte_ipv6_hdr *)((uint8_t *)pkt + l2_len); + // TODO: implement LPM => replace ip_dst by next hop IP DST + memcpy(ip_dst, &ip->dst_addr, sizeof(struct ipv6_addr)); + return (struct ipv6_addr *)&ip->src_addr; + } + return NULL; +} + +static void send_unsollicited_neighbour_advertisement(struct task_base *tbase, struct task_args *targ) +{ + int ret; + uint8_t out = 0, port_id = tbase->l3.reachable_port_id; + struct rte_mbuf *mbuf; + + ret = rte_mempool_get(tbase->l3.arp_nd_pool, (void **)&mbuf); + if (likely(ret == 0)) { + mbuf->port = port_id; + build_neighbour_advertisement(tbase->l3.tmaster, mbuf, &prox_port_cfg[port_id].eth_addr, &targ->local_ipv6, PROX_UNSOLLICITED); + tbase->aux->tx_ctrlplane_pkt(tbase, &mbuf, 1, &out); + TASK_STATS_ADD_TX_NON_DP(&tbase->aux->stats, 1); + } else { + plog_err("Failed to get a mbuf from arp/ndp mempool\n"); + } +} + +static void send_router_sollicitation(struct task_base *tbase, struct task_args *targ) +{ + int ret; + uint8_t out = 0, port_id = tbase->l3.reachable_port_id; + struct rte_mbuf *mbuf; + + ret = rte_mempool_get(tbase->l3.arp_nd_pool, (void **)&mbuf); + if (likely(ret == 0)) { + mbuf->port = port_id; + build_router_sollicitation(mbuf, &prox_port_cfg[port_id].eth_addr, &targ->local_ipv6); + tbase->aux->tx_ctrlplane_pkt(tbase, &mbuf, 1, &out); + TASK_STATS_ADD_TX_NON_DP(&tbase->aux->stats, 1); + } else { + plog_err("Failed to get a mbuf from arp/ndp mempool\n"); + } +} + /* This implementation could be improved: instead of checking each time we send a packet whether we need also to send an ARP, we should only check whether the MAC is valid. - We should check arp_update_time in the master process. This would also require the generating task to clear its arp ring + We should check arp_ndp_retransmit_timeout in the master process. This would also require the generating task to clear its arp ring to avoid sending many ARP while starting after a long stop. - We could also check for arp_timeout in the master so that dataplane has only to check whether MAC is available - but this would require either thread safety, or the exchange of information between master and generating core. -*/ + We could also check for reachable_timeout in the master so that dataplane has only to check whether MAC is available + but this would require either thread safety, or the the exchange of information between master and generating core. + */ -static inline int add_key_and_send_arp(struct rte_hash *ip_hash, uint32_t *ip_dst, struct arp_table *entries, uint64_t tsc, uint64_t hz, uint32_t arp_update_time, prox_next_hop_index_type nh, uint64_t **time) +static inline int add_key_and_send_arp(struct rte_hash *ip_hash, uint32_t *ip_dst, struct arp_table *entries, uint64_t tsc, uint64_t hz, uint32_t arp_ndp_retransmit_timeout, prox_next_hop_index_type nh, uint64_t **time) { int ret = rte_hash_add_key(ip_hash, (const void *)ip_dst); if (unlikely(ret < 0)) { @@ -94,26 +173,26 @@ static inline int add_key_and_send_arp(struct rte_hash *ip_hash, uint32_t *ip_ds } else { entries[ret].ip = *ip_dst; entries[ret].nh = nh; - *time = &entries[ret].arp_update_time; + *time = &entries[ret].arp_ndp_retransmit_timeout; } - return SEND_ARP; + return SEND_ARP_ND; } -static inline int update_mac_and_send_mbuf(struct arp_table *entry, prox_rte_ether_addr *mac, uint64_t tsc, uint64_t hz, uint32_t arp_update_time, uint64_t **time) +static inline int update_mac_and_send_mbuf(struct arp_table *entry, prox_rte_ether_addr *mac, uint64_t tsc, uint64_t hz, uint32_t arp_ndp_retransmit_timeout, uint64_t **time) { - if (likely((tsc < entry->arp_update_time) && (tsc < entry->arp_timeout))) { + if (likely((tsc < entry->arp_ndp_retransmit_timeout) && (tsc < entry->reachable_timeout))) { memcpy(mac, &entry->mac, sizeof(prox_rte_ether_addr)); return SEND_MBUF; - } else if (tsc > entry->arp_update_time) { + } else if (tsc > entry->arp_ndp_retransmit_timeout) { // long time since we have sent an arp, send arp - *time = &entry->arp_update_time; - if (tsc < entry->arp_timeout){ + *time = &entry->arp_ndp_retransmit_timeout; + if (tsc < entry->reachable_timeout){ // MAC is valid in the table => send also the mbuf memcpy(mac, &entry->mac, sizeof(prox_rte_ether_addr)); - return SEND_MBUF_AND_ARP; + return SEND_MBUF_AND_ARP_ND; } else { // MAC still unknown, or timed out => only send ARP - return SEND_ARP; + return SEND_ARP_ND; } } // MAC is unknown and we already sent an ARP recently, drop mbuf and wait for ARP reply @@ -154,7 +233,7 @@ int write_dst_mac(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t *ip_d if (entry->ip) { *ip_dst = entry->ip; - return update_mac_and_send_mbuf(entry, mac, tsc, hz, l3->arp_update_time, time); + return update_mac_and_send_mbuf(entry, mac, tsc, hz, l3->arp_ndp_retransmit_timeout, time); } // no next ip: this is a local route @@ -162,30 +241,32 @@ int write_dst_mac(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t *ip_d int ret = rte_hash_lookup(l3->ip_hash, (const void *)ip_dst); if (unlikely(ret < 0)) { // IP not found, try to send an ARP - return add_key_and_send_arp(l3->ip_hash, ip_dst, l3->arp_table, tsc, hz, l3->arp_update_time, MAX_HOP_INDEX, time); + return add_key_and_send_arp(l3->ip_hash, ip_dst, l3->arp_table, tsc, hz, l3->arp_ndp_retransmit_timeout, MAX_HOP_INDEX, time); } else { - return update_mac_and_send_mbuf(&l3->arp_table[ret], mac, tsc, hz, l3->arp_update_time, time); + return update_mac_and_send_mbuf(&l3->arp_table[ret], mac, tsc, hz, l3->arp_ndp_retransmit_timeout, time); } return 0; } // No Routing table specified: only a local ip and maybe a gateway // Old default behavior: if a gw is specified, ALL packets go to this gateway (even those we could send w/o the gw if (l3->gw.ip) { - if (likely((l3->flags & FLAG_DST_MAC_KNOWN) && (tsc < l3->gw.arp_update_time) && (tsc < l3->gw.arp_timeout))) { + if (likely((l3->flags & FLAG_DST_MAC_KNOWN) && (tsc < l3->gw.arp_ndp_retransmit_timeout) && (tsc < l3->gw.reachable_timeout))) { memcpy(mac, &l3->gw.mac, sizeof(prox_rte_ether_addr)); return SEND_MBUF; - } else if (tsc > l3->gw.arp_update_time) { + } else if (tsc > l3->gw.arp_ndp_retransmit_timeout) { // long time since we have successfully sent an arp, send arp - // If sending ARP failed (ring full) then arp_update_time is not updated to avoid having to wait 1 sec to send ARP REQ again - *time = &l3->gw.arp_update_time; + // If sending ARP failed (ring full) then arp_ndp_retransmit_timeout is not updated to avoid having to wait 1 sec to send ARP REQ again + *time = &l3->gw.arp_ndp_retransmit_timeout; + l3->gw.arp_ndp_retransmit_timeout = tsc + l3->arp_ndp_retransmit_timeout * hz / 1000; + *ip_dst = l3->gw.ip; - if ((l3->flags & FLAG_DST_MAC_KNOWN) && (tsc < l3->gw.arp_timeout)){ + if ((l3->flags & FLAG_DST_MAC_KNOWN) && (tsc < l3->gw.reachable_timeout)){ // MAC is valid in the table => send also the mbuf memcpy(mac, &l3->gw.mac, sizeof(prox_rte_ether_addr)); - return SEND_MBUF_AND_ARP; + return SEND_MBUF_AND_ARP_ND; } else { // MAC still unknown, or timed out => only send ARP - return SEND_ARP; + return SEND_ARP_ND; } } else { // MAC is unknown and we already sent an ARP recently, drop mbuf and wait for ARP reply @@ -201,17 +282,16 @@ int write_dst_mac(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t *ip_d if (likely(l3->n_pkts < 4)) { for (unsigned int idx = 0; idx < l3->n_pkts; idx++) { if (*ip_dst == l3->optimized_arp_table[idx].ip) { - // IP address already in table - return update_mac_and_send_mbuf(&l3->optimized_arp_table[idx], mac, tsc, hz, l3->arp_update_time, time); + return update_mac_and_send_mbuf(&l3->optimized_arp_table[idx], mac, tsc, hz, l3->arp_ndp_retransmit_timeout, time); } } // IP address not found in table l3->optimized_arp_table[l3->n_pkts].ip = *ip_dst; - *time = &l3->optimized_arp_table[l3->n_pkts].arp_update_time; + *time = &l3->optimized_arp_table[l3->n_pkts].arp_ndp_retransmit_timeout; l3->n_pkts++; if (l3->n_pkts < 4) { - return SEND_ARP; + return SEND_ARP_ND; } // We have too many IP addresses to search linearly; lets use hash table instead => copy all entries in hash table @@ -228,16 +308,137 @@ int write_dst_mac(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t *ip_d memcpy(&l3->arp_table[ret], &l3->optimized_arp_table[idx], sizeof(struct arp_table)); } } - return SEND_ARP; + return SEND_ARP_ND; } else { // Find IP in lookup table. Send ARP if not found int ret = rte_hash_lookup(l3->ip_hash, (const void *)ip_dst); if (unlikely(ret < 0)) { // IP not found, try to send an ARP - return add_key_and_send_arp(l3->ip_hash, ip_dst, &l3->arp_table[ret], tsc, hz, l3->arp_update_time, MAX_HOP_INDEX, time); + return add_key_and_send_arp(l3->ip_hash, ip_dst, &l3->arp_table[ret], tsc, hz, l3->arp_ndp_retransmit_timeout, MAX_HOP_INDEX, time); + } else { + // IP has been found + return update_mac_and_send_mbuf(&l3->arp_table[ret], mac, tsc, hz, l3->arp_ndp_retransmit_timeout, time); + } + } + // Should not happen + return DROP_MBUF; +} + +int write_ip6_dst_mac(struct task_base *tbase, struct rte_mbuf *mbuf, struct ipv6_addr *ip_dst) +{ + const uint64_t hz = rte_get_tsc_hz(); + prox_rte_ether_hdr *packet = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *); + prox_rte_ether_addr *mac = &packet->d_addr; + struct ipv6_addr *used_ip_src; + + uint64_t tsc = rte_rdtsc(); + uint16_t len = rte_pktmbuf_pkt_len(mbuf); + + struct ipv6_addr *pkt_src_ip6; + if ((pkt_src_ip6 = find_ip6(packet, len, ip_dst)) == NULL) { + // Unable to find IP address => non IP packet => send it as it + return SEND_MBUF; + } + struct l3_base *l3 = &(tbase->l3); + if (memcmp(&l3->local_ipv6, ip_dst, 8) == 0) { + // Same prefix as local -> use local + used_ip_src = &l3->local_ipv6; + } else if (memcmp(&l3->global_ipv6 , &null_addr, 16) != 0) { + // Global IP is defined -> use it + used_ip_src = &l3->global_ipv6; + } else { + plog_info("Error as trying to send a packet to "IPv6_BYTES_FMT" using "IPv6_BYTES_FMT" (local)\n", IPv6_BYTES(ip_dst->bytes), IPv6_BYTES(l3->local_ipv6.bytes)); + return DROP_MBUF; + } + + memcpy(pkt_src_ip6, used_ip_src, sizeof(struct ipv6_addr)); + if (likely(l3->n_pkts < 4)) { + for (unsigned int idx = 0; idx < l3->n_pkts; idx++) { + if (memcmp(ip_dst, &l3->optimized_arp_table[idx].ip6, sizeof(struct ipv6_addr)) == 0) { + // IP address already in table + if ((tsc < l3->optimized_arp_table[idx].arp_ndp_retransmit_timeout) && (tsc < l3->optimized_arp_table[idx].reachable_timeout)) { + // MAC address was recently updated in table, use it + // plog_dbg("Valid MAC address found => send packet\n"); + memcpy(mac, &l3->optimized_arp_table[idx].mac, sizeof(prox_rte_ether_addr)); + return SEND_MBUF; + } else if (tsc > l3->optimized_arp_table[idx].arp_ndp_retransmit_timeout) { + // NDP not sent since a long time, send NDP + l3->optimized_arp_table[idx].arp_ndp_retransmit_timeout = tsc + l3->arp_ndp_retransmit_timeout * hz / 1000; + if (tsc < l3->optimized_arp_table[idx].reachable_timeout) { + // MAC still valid => also send mbuf + plog_dbg("Valid MAC found but NDP retransmit timeout => send packet and NDP\n"); + memcpy(mac, &l3->optimized_arp_table[idx].mac, sizeof(prox_rte_ether_addr)); + return SEND_MBUF_AND_ARP_ND; + } else { + plog_dbg("Unknown MAC => send NDP but cannot send packet\n"); + // MAC unvalid => only send NDP + return SEND_ARP_ND; + } + } else { + // NDP timeout elapsed, MAC not valid anymore but waiting for NDP reply + // plog_dbg("NDP reachable timeout elapsed - waiting for NDP reply\n"); + return DROP_MBUF; + } + } + } + // IP address not found in table + memcpy(&l3->optimized_arp_table[l3->n_pkts].ip6, ip_dst, sizeof(struct ipv6_addr)); + l3->optimized_arp_table[l3->n_pkts].arp_ndp_retransmit_timeout = tsc + l3->arp_ndp_retransmit_timeout * hz / 1000; + l3->n_pkts++; + + if (l3->n_pkts < 4) { + return SEND_ARP_ND; + } + + // We have too many IP addresses to search linearly; lets use hash table instead => copy all entries in hash table + for (uint32_t idx = 0; idx < l3->n_pkts; idx++) { + struct ipv6_addr *ip6 = &l3->optimized_arp_table[idx].ip6; + int ret = rte_hash_add_key(l3->ip6_hash, (const void *)ip6); + if (ret < 0) { + // This should not happen as few entries so far. + // If it happens, we still send the NDP as easier: + // If the NDP corresponds to this error, the NDP reply will be ignored + // If NDP does not correspond to this error/ip, then NDP reply will be handled. + plogx_err("Unable add ip "IPv6_BYTES_FMT" in mac_hash (already %d entries)\n", IPv6_BYTES(ip6->bytes), idx); + } else { + memcpy(&l3->arp_table[ret], &l3->optimized_arp_table[idx], sizeof(struct arp_table)); + } + } + return SEND_ARP_ND; + } else { + // Find IP in lookup table. Send ND if not found + int ret = rte_hash_lookup(l3->ip6_hash, (const void *)ip_dst); + if (unlikely(ret < 0)) { + // IP not found, try to send an ND + int ret = rte_hash_add_key(l3->ip6_hash, (const void *)ip_dst); + if (ret < 0) { + // No reason to send NDP, as reply would be anyhow ignored + plogx_err("Unable to add ip "IPv6_BYTES_FMT" in mac_hash\n", IPv6_BYTES(ip_dst->bytes)); + return DROP_MBUF; + } else { + memcpy(&l3->arp_table[ret].ip6, ip_dst, sizeof(struct ipv6_addr)); + l3->arp_table[ret].arp_ndp_retransmit_timeout = tsc + l3->arp_ndp_retransmit_timeout * hz / 1000; + } + return SEND_ARP_ND; } else { // IP has been found - return update_mac_and_send_mbuf(&l3->arp_table[ret], mac, tsc, hz, l3->arp_update_time, time); + if (likely((tsc < l3->arp_table[ret].arp_ndp_retransmit_timeout) && (tsc < l3->arp_table[ret].reachable_timeout))) { + // MAC still valid and NDP sent recently + memcpy(mac, &l3->arp_table[ret].mac, sizeof(prox_rte_ether_addr)); + return SEND_MBUF; + } else if (tsc > l3->arp_table[ret].arp_ndp_retransmit_timeout) { + // NDP not sent since a long time, send NDP + l3->arp_table[ret].arp_ndp_retransmit_timeout = tsc + l3->arp_ndp_retransmit_timeout * hz / 1000; + if (tsc < l3->arp_table[ret].reachable_timeout) { + // MAC still valid => send also MBUF + memcpy(mac, &l3->arp_table[ret].mac, sizeof(prox_rte_ether_addr)); + return SEND_MBUF_AND_ARP_ND; + } else { + return SEND_ARP_ND; + } + } else { + return DROP_MBUF; + } } } // Should not happen @@ -260,12 +461,22 @@ void task_init_l3(struct task_base *tbase, struct task_args *targ) .hash_func = rte_hash_crc, .hash_func_init_val = 0, }; - tbase->l3.ip_hash = rte_hash_create(&hash_params); - PROX_PANIC(tbase->l3.ip_hash == NULL, "Failed to set up ip hash table\n"); + if (targ->flags & TASK_ARG_L3) { + plog_info("\tInitializing L3 (IPv4)\n"); + tbase->l3.ip_hash = rte_hash_create(&hash_params); + PROX_PANIC(tbase->l3.ip_hash == NULL, "Failed to set up ip hash table\n"); + hash_name[0]++; + } + if (targ->flags & TASK_ARG_NDP) { + plog_info("\tInitializing NDP (IPv6)\n"); + hash_params.key_len = sizeof(struct ipv6_addr); + tbase->l3.ip6_hash = rte_hash_create(&hash_params); + PROX_PANIC(tbase->l3.ip6_hash == NULL, "Failed to set up ip hash table\n"); + } tbase->l3.arp_table = (struct arp_table *)prox_zmalloc(n_entries * sizeof(struct arp_table), socket_id); - PROX_PANIC(tbase->l3.arp_table == NULL, "Failed to allocate memory for %u entries in arp table\n", n_entries); - plog_info("\tarp table, with %d entries of size %ld\n", n_entries, sizeof(struct l3_base)); + PROX_PANIC(tbase->l3.arp_table == NULL, "Failed to allocate memory for %u entries in arp/ndp table\n", n_entries); + plog_info("\tarp/ndp table, with %d entries of size %ld\n", n_entries, sizeof(struct l3_base)); targ->lconf->ctrl_func_p[targ->task] = handle_ctrl_plane_pkts; targ->lconf->ctrl_timeout = freq_to_tsc(targ->ctrl_freq); @@ -275,36 +486,36 @@ void task_init_l3(struct task_base *tbase, struct task_args *targ) tbase->l3.task_id = targ->id; tbase->l3.tmaster = targ->tmaster; tbase->l3.seed = (uint)rte_rdtsc(); - if (targ->arp_timeout != 0) - tbase->l3.arp_timeout = targ->arp_timeout; + if (targ->reachable_timeout != 0) + tbase->l3.reachable_timeout = targ->reachable_timeout; else - tbase->l3.arp_timeout = DEFAULT_ARP_TIMEOUT; - if (targ->arp_update_time != 0) - tbase->l3.arp_update_time = targ->arp_update_time; + tbase->l3.reachable_timeout = DEFAULT_ARP_TIMEOUT; + if (targ->arp_ndp_retransmit_timeout != 0) + tbase->l3.arp_ndp_retransmit_timeout = targ->arp_ndp_retransmit_timeout; else - tbase->l3.arp_update_time = DEFAULT_ARP_UPDATE_TIME; + tbase->l3.arp_ndp_retransmit_timeout = DEFAULT_ARP_UPDATE_TIME; } void task_start_l3(struct task_base *tbase, struct task_args *targ) { - const int NB_ARP_MBUF = 1024; - const int ARP_MBUF_SIZE = 2048; - const int NB_CACHE_ARP_MBUF = 256; const int socket_id = rte_lcore_to_socket_id(targ->lconf->id); + const int NB_ARP_ND_MBUF = 1024; + const int ARP_ND_MBUF_SIZE = 2048; + const int NB_CACHE_ARP_ND_MBUF = 256; struct prox_port_cfg *port = find_reachable_port(targ); - if (port && (tbase->l3.arp_pool == NULL)) { + if (port && (tbase->l3.arp_nd_pool == NULL)) { static char name[] = "arp0_pool"; tbase->l3.reachable_port_id = port - prox_port_cfg; if (targ->local_ipv4) { - tbase->local_ipv4 = rte_be_to_cpu_32(targ->local_ipv4); - register_ip_to_ctrl_plane(tbase->l3.tmaster, tbase->local_ipv4, tbase->l3.reachable_port_id, targ->lconf->id, targ->id); + tbase->l3.local_ipv4 = rte_be_to_cpu_32(targ->local_ipv4); + register_ip_to_ctrl_plane(tbase->l3.tmaster, tbase->l3.local_ipv4, tbase->l3.reachable_port_id, targ->lconf->id, targ->id); } if (strcmp(targ->route_table, "") != 0) { struct lpm4 *lpm; int ret; - PROX_PANIC(tbase->local_ipv4 == 0, "missing local_ipv4 will route table is specified in L3 mode\n"); + PROX_PANIC(tbase->l3.local_ipv4 == 0, "missing local_ipv4 will route table is specified in L3 mode\n"); // LPM might be modified runtime => do not share with other cores ret = lua_to_lpm4(prox_lua(), GLOBAL, targ->route_table, socket_id, &lpm); @@ -330,7 +541,7 @@ void task_start_l3(struct task_base *tbase, struct task_args *targ) // Last but one "next_hop_index" is not a gateway but direct routes tbase->l3.next_hops[tbase->l3.nb_gws].ip = 0; ret = rte_lpm_add(tbase->l3.ipv4_lpm, targ->local_ipv4, targ->local_prefix, tbase->l3.nb_gws++); - PROX_PANIC(ret, "Failed to add local_ipv4 "IPv4_BYTES_FMT"/%d to lpm\n", IP4(tbase->local_ipv4), targ->local_prefix); + PROX_PANIC(ret, "Failed to add local_ipv4 "IPv4_BYTES_FMT"/%d to lpm\n", IP4(tbase->l3.local_ipv4), targ->local_prefix); // Last "next_hop_index" is default gw tbase->l3.next_hops[tbase->l3.nb_gws].ip = rte_bswap32(targ->gateway_ipv4); if (targ->gateway_ipv4) { @@ -340,15 +551,50 @@ void task_start_l3(struct task_base *tbase, struct task_args *targ) } master_init_vdev(tbase->l3.tmaster, tbase->l3.reachable_port_id, targ->lconf->id, targ->id); + + // Create IPv6 addr if none were configured + if (targ->flags & TASK_ARG_NDP) { + if (!memcmp(&targ->local_ipv6, &null_addr, 16)) { + set_link_local(&targ->local_ipv6); + set_EUI(&targ->local_ipv6, &port->eth_addr); + } + plog_info("\tCore %d, task %d, local IPv6 addr is "IPv6_BYTES_FMT" (%s)\n", + targ->lconf->id, targ->id, + IPv6_BYTES(targ->local_ipv6.bytes), + IP6_Canonical(&targ->local_ipv6)); + memcpy(&tbase->l3.local_ipv6, &targ->local_ipv6, sizeof(struct ipv6_addr)); + + if (memcmp(&targ->global_ipv6, &null_addr, sizeof(struct ipv6_addr))) { + memcpy(&tbase->l3.global_ipv6, &targ->global_ipv6, sizeof(struct ipv6_addr)); + plog_info("\tCore %d, task %d, global IPv6 addr is "IPv6_BYTES_FMT" (%s)\n", + targ->lconf->id, targ->id, + IPv6_BYTES(targ->global_ipv6.bytes), + IP6_Canonical(&targ->global_ipv6)); + } + if (targ->ipv6_router) + register_router_to_ctrl_plane(tbase->l3.tmaster, tbase->l3.reachable_port_id, targ->lconf->id, targ->id, &targ->local_ipv6, &targ->global_ipv6, &targ->router_prefix); + else + register_node_to_ctrl_plane(tbase->l3.tmaster, &targ->local_ipv6, &targ->global_ipv6, tbase->l3.reachable_port_id, targ->lconf->id, targ->id); + } + name[3]++; - struct rte_mempool *ret = rte_mempool_create(name, NB_ARP_MBUF, ARP_MBUF_SIZE, NB_CACHE_ARP_MBUF, + struct rte_mempool *ret = rte_mempool_create(name, NB_ARP_ND_MBUF, ARP_ND_MBUF_SIZE, NB_CACHE_ARP_ND_MBUF, sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, 0, rte_socket_id(), 0); - PROX_PANIC(ret == NULL, "Failed to allocate ARP memory pool on socket %u with %u elements\n", - rte_socket_id(), NB_ARP_MBUF); - plog_info("\t\tMempool %p (%s) size = %u * %u cache %u, socket %d\n", ret, name, NB_ARP_MBUF, - ARP_MBUF_SIZE, NB_CACHE_ARP_MBUF, rte_socket_id()); - tbase->l3.arp_pool = ret; + PROX_PANIC(ret == NULL, "Failed to allocate ARP/ND memory pool on socket %u with %u elements\n", + rte_socket_id(), NB_ARP_ND_MBUF); + plog_info("\tMempool %p (%s) size = %u * %u cache %u, socket %d (for ARP/ND)\n", ret, name, NB_ARP_ND_MBUF, + ARP_ND_MBUF_SIZE, NB_CACHE_ARP_ND_MBUF, rte_socket_id()); + tbase->l3.arp_nd_pool = ret; + if ((targ->flags & TASK_ARG_NDP) && (!targ->ipv6_router)) { + plog_info("Sending Router Sollicitation\n"); + send_router_sollicitation(tbase, targ); + } + if ((targ->flags & TASK_ARG_NDP) && (targ->flags & TASK_ARG_SEND_NA_AT_STARTUP)) { + plog_info("Sending unsollicited Neighbour Advertisement\n"); + send_unsollicited_neighbour_advertisement(tbase, targ); + + } } } @@ -360,10 +606,10 @@ void task_set_gateway_ip(struct task_base *tbase, uint32_t ip) void task_set_local_ip(struct task_base *tbase, uint32_t ip) { - tbase->local_ipv4 = ip; + tbase->l3.local_ipv4 = ip; } -static void reset_arp_update_time(struct l3_base *l3, uint32_t ip) +static void reset_arp_ndp_retransmit_timeout(struct l3_base *l3, uint32_t ip) { uint32_t idx; plogx_dbg("MAC entry for IP "IPv4_BYTES_FMT" timeout in kernel\n", IP4(ip)); @@ -371,9 +617,9 @@ static void reset_arp_update_time(struct l3_base *l3, uint32_t ip) if (l3->ipv4_lpm) { int ret = rte_hash_lookup(l3->ip_hash, (const void *)&ip); if (ret >= 0) - l3->arp_table[ret].arp_update_time = 0; + l3->arp_table[ret].arp_ndp_retransmit_timeout = 0; } else if (ip == l3->gw.ip) { - l3->gw.arp_update_time = 0; + l3->gw.arp_ndp_retransmit_timeout = 0; } else if (l3->n_pkts < 4) { for (idx = 0; idx < l3->n_pkts; idx++) { uint32_t ip_dst = l3->optimized_arp_table[idx].ip; @@ -381,12 +627,12 @@ static void reset_arp_update_time(struct l3_base *l3, uint32_t ip) break; } if (idx < l3->n_pkts) { - l3->optimized_arp_table[idx].arp_update_time = 0; + l3->optimized_arp_table[idx].arp_ndp_retransmit_timeout = 0; } } else { int ret = rte_hash_lookup(l3->ip_hash, (const void *)&ip); if (ret >= 0) - l3->arp_table[ret].arp_update_time = 0; + l3->arp_table[ret].arp_ndp_retransmit_timeout = 0; } return; } @@ -413,15 +659,18 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui uint32_t ip, ip_dst, idx, gateway_ip, prefix; prox_next_hop_index_type gateway_index; int j, ret, modified_route; + uint64_t addr; + struct ipv6_addr *ip6, *ip6_dst; uint16_t command; prox_rte_ether_hdr *hdr; struct ether_hdr_arp *hdr_arp; struct l3_base *l3 = &tbase->l3; uint64_t tsc= rte_rdtsc(); - uint64_t arp_timeout = l3->arp_timeout * hz / 1000; + uint64_t reachable_timeout = l3->reachable_timeout * hz / 1000; uint32_t nh; prox_rte_ipv4_hdr *pip; prox_rte_udp_hdr *udp_hdr; + uint8_t port = tbase->l3.reachable_port_id; for (j = 0; j < n_pkts; ++j) { PREFETCH0(mbufs[j]); @@ -434,10 +683,10 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui pip = NULL; udp_hdr = NULL; out[0] = OUT_HANDLED; - command = mbufs[j]->udata64 & 0xFFFF; + command = get_command(mbufs[j]); plogx_dbg("\tReceived %s mbuf %p\n", actions_string[command], mbufs[j]); switch(command) { - case ROUTE_ADD_FROM_CTRL: + case ROUTE_ADD_FROM_MASTER: ip = ctrl_ring_get_ip(mbufs[j]); gateway_ip = ctrl_ring_get_gateway_ip(mbufs[j]); prefix = ctrl_ring_get_prefix(mbufs[j]); @@ -457,7 +706,7 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui } tx_drop(mbufs[j]); break; - case ROUTE_DEL_FROM_CTRL: + case ROUTE_DEL_FROM_MASTER: ip = ctrl_ring_get_ip(mbufs[j]); prefix = ctrl_ring_get_prefix(mbufs[j]); @@ -471,15 +720,15 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui } tx_drop(mbufs[j]); break; - case UPDATE_FROM_CTRL: + case MAC_INFO_FROM_MASTER: hdr_arp = rte_pktmbuf_mtod(mbufs[j], struct ether_hdr_arp *); - ip = (mbufs[j]->udata64 >> 32) & 0xFFFFFFFF; + ip = get_ip(mbufs[j]); if (prox_rte_is_zero_ether_addr(&hdr_arp->arp.data.sha)) { // MAC timeout or deleted from kernel table => reset update_time // This will cause us to send new ARP request - // However, as arp_timeout not touched, we should continue sending our regular IP packets - reset_arp_update_time(l3, ip); + // However, as reachable_timeout not touched, we should continue sending our regular IP packets + reset_arp_ndp_retransmit_timeout(l3, ip); return; } else plogx_dbg("\tUpdating MAC entry for IP "IPv4_BYTES_FMT" with MAC "MAC_BYTES_FMT"\n", @@ -494,20 +743,20 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui } else if ((nh = l3->arp_table[ret].nh) != MAX_HOP_INDEX) { entry = &l3->next_hops[nh]; memcpy(&entry->mac, &(hdr_arp->arp.data.sha), sizeof(prox_rte_ether_addr)); - entry->arp_timeout = tsc + arp_timeout; - update_arp_update_time(l3, &entry->arp_update_time, l3->arp_update_time); + entry->reachable_timeout = tsc + reachable_timeout; + update_arp_ndp_retransmit_timeout(l3, &entry->arp_ndp_retransmit_timeout, l3->arp_ndp_retransmit_timeout); } else { memcpy(&l3->arp_table[ret].mac, &(hdr_arp->arp.data.sha), sizeof(prox_rte_ether_addr)); - l3->arp_table[ret].arp_timeout = tsc + arp_timeout; - update_arp_update_time(l3, &l3->arp_table[ret].arp_update_time, l3->arp_update_time); + l3->arp_table[ret].reachable_timeout = tsc + reachable_timeout; + update_arp_ndp_retransmit_timeout(l3, &l3->arp_table[ret].arp_ndp_retransmit_timeout, l3->arp_ndp_retransmit_timeout); } } else if (ip == l3->gw.ip) { // MAC address of the gateway memcpy(&l3->gw.mac, &hdr_arp->arp.data.sha, 6); l3->flags |= FLAG_DST_MAC_KNOWN; - l3->gw.arp_timeout = tsc + arp_timeout; - update_arp_update_time(l3, &l3->gw.arp_update_time, l3->arp_update_time); + l3->gw.reachable_timeout = tsc + reachable_timeout; + update_arp_ndp_retransmit_timeout(l3, &l3->gw.arp_ndp_retransmit_timeout, l3->arp_ndp_retransmit_timeout); } else if (l3->n_pkts < 4) { // Few packets tracked - should be faster to loop through them thean using a hash table for (idx = 0; idx < l3->n_pkts; idx++) { @@ -517,8 +766,8 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui } if (idx < l3->n_pkts) { memcpy(&l3->optimized_arp_table[idx].mac, &(hdr_arp->arp.data.sha), sizeof(prox_rte_ether_addr)); - l3->optimized_arp_table[idx].arp_timeout = tsc + arp_timeout; - update_arp_update_time(l3, &l3->optimized_arp_table[idx].arp_update_time, l3->arp_update_time); + l3->optimized_arp_table[idx].reachable_timeout = tsc + reachable_timeout; + update_arp_ndp_retransmit_timeout(l3, &l3->optimized_arp_table[idx].arp_ndp_retransmit_timeout, l3->arp_ndp_retransmit_timeout); } } else { ret = rte_hash_add_key(l3->ip_hash, (const void *)&ip); @@ -526,21 +775,49 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui plogx_info("Unable add ip "IPv4_BYTES_FMT" in mac_hash\n", IP4(ip)); } else { memcpy(&l3->arp_table[ret].mac, &(hdr_arp->arp.data.sha), sizeof(prox_rte_ether_addr)); - l3->arp_table[ret].arp_timeout = tsc + arp_timeout; - update_arp_update_time(l3, &l3->arp_table[ret].arp_update_time, l3->arp_update_time); + l3->arp_table[ret].reachable_timeout = tsc + reachable_timeout; + update_arp_ndp_retransmit_timeout(l3, &l3->arp_table[ret].arp_ndp_retransmit_timeout, l3->arp_ndp_retransmit_timeout); + } + } + tx_drop(mbufs[j]); + break; + case MAC_INFO_FROM_MASTER_FOR_IPV6: + ip6 = ctrl_ring_get_ipv6_addr(mbufs[j]); + uint64_t data = ctrl_ring_get_data(mbufs[j]); + + if (l3->n_pkts < 4) { + // Few packets tracked - should be faster to loop through them thean using a hash table + for (idx = 0; idx < l3->n_pkts; idx++) { + ip6_dst = &l3->optimized_arp_table[idx].ip6; + if (memcmp(ip6_dst, ip6, sizeof(struct ipv6_addr)) == 0) + break; + } + if (idx < l3->n_pkts) { + // IP found; this is a reply for one of our requests! + memcpy(&l3->optimized_arp_table[idx].mac, &data, sizeof(prox_rte_ether_addr)); + l3->optimized_arp_table[idx].reachable_timeout = tsc + l3->reachable_timeout * hz / 1000; + } + } else { + int ret = rte_hash_add_key(l3->ip6_hash, (const void *)ip6); + if (ret < 0) { + plogx_info("Unable add ip "IPv6_BYTES_FMT" in mac_hash\n", IPv6_BYTES(ip6->bytes)); + } else { + memcpy(&l3->arp_table[ret].mac, &data, sizeof(prox_rte_ether_addr)); + l3->arp_table[ret].reachable_timeout = tsc + l3->reachable_timeout * hz / 1000; } } tx_drop(mbufs[j]); break; - case ARP_REPLY_FROM_CTRL: - case ARP_REQ_FROM_CTRL: + case SEND_NDP_FROM_MASTER: + case SEND_ARP_REQUEST_FROM_MASTER: + case SEND_ARP_REPLY_FROM_MASTER: out[0] = 0; // tx_ctrlplane_pkt does not drop packets plogx_dbg("\tForwarding (ARP) packet from master\n"); tbase->aux->tx_ctrlplane_pkt(tbase, &mbufs[j], 1, out); TASK_STATS_ADD_TX_NON_DP(&tbase->aux->stats, 1); break; - case ICMP_FROM_CTRL: + case SEND_ICMP_FROM_MASTER: out[0] = 0; // tx_ctrlplane_pkt does not drop packets plogx_dbg("\tForwarding (PING) packet from master\n"); @@ -584,6 +861,26 @@ void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, ui tbase->aux->tx_ctrlplane_pkt(tbase, &mbufs[j], 1, out); TASK_STATS_ADD_TX_NON_DP(&tbase->aux->stats, 1); break; + case IPV6_INFO_FROM_MASTER: + // addr = ctrl_ring_get_data(mbufs[j]); + ip6 = ctrl_ring_get_ipv6_addr(mbufs[j]); + if (memcmp(&l3->global_ipv6 , &null_addr, 16) == 0) { + memcpy(&l3->global_ipv6, ip6, sizeof(struct ipv6_addr)); + plog_info("Core %d task %d received global IP "IPv6_BYTES_FMT"\n", l3->core_id, l3->task_id, IPv6_BYTES(ip6->bytes)); + } else if (memcmp(&l3->global_ipv6, ip6, 8) == 0) { + if (l3->prefix_printed == 0) { + plog_info("Core %d task %d received expected prefix "IPv6_PREFIX_FMT"\n", l3->core_id, l3->task_id, IPv6_PREFIX(ip6->bytes)); + l3->prefix_printed = 1; + } + } else { + plog_warn("Core %d task %d received unexpected prefix "IPv6_PREFIX_FMT", IP = "IPv6_PREFIX_FMT"\n", l3->core_id, l3->task_id, IPv6_PREFIX(ip6->bytes), IPv6_PREFIX(l3->global_ipv6.bytes)); + } + tx_drop(mbufs[j]); + break; + default: + plog_err("Unexpected message received: %d\n", command); + tx_drop(mbufs[j]); + break; } } } diff --git a/VNFs/DPPD-PROX/packet_utils.h b/VNFs/DPPD-PROX/packet_utils.h index 0a1ef9d8..ca4d449c 100644 --- a/VNFs/DPPD-PROX/packet_utils.h +++ b/VNFs/DPPD-PROX/packet_utils.h @@ -1,5 +1,5 @@ /* -// Copyright (c) 2010-2017 Intel Corporation +// Copyright (c) 2010-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -29,9 +29,9 @@ #define IP4(x) x & 0xff, (x >> 8) & 0xff, (x >> 16) & 0xff, x >> 24 // From Network (BE) enum { - SEND_MBUF_AND_ARP, + SEND_MBUF_AND_ARP_ND, SEND_MBUF, - SEND_ARP, + SEND_ARP_ND, DROP_MBUF }; #define DEFAULT_ARP_TIMEOUT (1000 * 3600 * 24 * 15) // ~15 days = disabled by default @@ -39,42 +39,50 @@ enum { struct task_base; struct task_args; +struct task_master; struct arp_table { - uint64_t arp_update_time; - uint64_t arp_timeout; + uint64_t arp_ndp_retransmit_timeout; + uint64_t reachable_timeout; uint32_t ip; uint32_t nh; prox_rte_ether_addr mac; + struct ipv6_addr ip6; }; struct l3_base { struct rte_ring *ctrl_plane_ring; struct task_base *tmaster; uint32_t flags; uint32_t n_pkts; + uint32_t local_ipv4; uint8_t reachable_port_id; uint8_t core_id; uint8_t task_id; - uint32_t arp_timeout; - uint32_t arp_update_time; uint seed; prox_next_hop_index_type nb_gws; + uint32_t arp_ndp_retransmit_timeout; + uint32_t reachable_timeout; struct arp_table gw; struct arp_table optimized_arp_table[4]; struct rte_hash *ip_hash; + struct rte_hash *ip6_hash; struct arp_table *arp_table; - struct rte_mempool *arp_pool; struct rte_lpm *ipv4_lpm; struct arp_table *next_hops; + struct rte_mempool *arp_nd_pool; + struct ipv6_addr local_ipv6; + struct ipv6_addr global_ipv6; + uint8_t prefix_printed; }; void task_init_l3(struct task_base *tbase, struct task_args *targ); void task_start_l3(struct task_base *tbase, struct task_args *targ); int write_dst_mac(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t *ip_dst, uint64_t **time, uint64_t tsc); +int write_ip6_dst_mac(struct task_base *tbase, struct rte_mbuf *mbuf, struct ipv6_addr *ip_dst); void task_set_gateway_ip(struct task_base *tbase, uint32_t ip); void task_set_local_ip(struct task_base *tbase, uint32_t ip); void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts); -static inline void update_arp_update_time(struct l3_base *l3, uint64_t *ptr, uint32_t base) +static inline void update_arp_ndp_retransmit_timeout(struct l3_base *l3, uint64_t *ptr, uint32_t base) { // randomize timers - from 0.5 to 1.5 * configured time const uint64_t hz = rte_get_tsc_hz(); @@ -82,4 +90,9 @@ static inline void update_arp_update_time(struct l3_base *l3, uint64_t *ptr, uin uint64_t rand = 500 + (1000L * rand_r(&l3->seed)) / RAND_MAX; *ptr = tsc + (base * rand / 1000) * hz / 1000; } +static inline uint8_t get_port(struct rte_mbuf *mbuf) +{ + return mbuf->port; +} + #endif /* _PACKET_UTILS_H_ */ diff --git a/VNFs/DPPD-PROX/parse_utils.c b/VNFs/DPPD-PROX/parse_utils.c index 887de803..ab0e03b7 100644 --- a/VNFs/DPPD-PROX/parse_utils.c +++ b/VNFs/DPPD-PROX/parse_utils.c @@ -27,13 +27,13 @@ #include "quit.h" #include "cfgfile.h" -#include "ip6_addr.h" #include "parse_utils.h" #include "prox_globals.h" #include "prox_cfg.h" #include "log.h" #include "prox_lua.h" #include "prox_lua_types.h" +#include "prox_ipv6.h" #include "prox_compat.h" #define MAX_NB_PORT_NAMES PROX_MAX_PORTS @@ -406,12 +406,12 @@ int parse_ip6(struct ipv6_addr *addr, const char *str2) for (uint8_t i = 0, j = 0; i < ret; ++i, ++j) { if (*addr_parts[i] == 0) { - if (omitted == 0) { + if (omitted) { set_errf("Can only omit zeros once"); return -1; } omitted = 1; - j += 8 - ret; + j += 2 * (8 - ret) + 1; } else { uint16_t w = strtoll(addr_parts[i], NULL, 16); diff --git a/VNFs/DPPD-PROX/prox_args.c b/VNFs/DPPD-PROX/prox_args.c index cc8b3b25..25599cb7 100644 --- a/VNFs/DPPD-PROX/prox_args.c +++ b/VNFs/DPPD-PROX/prox_args.c @@ -35,6 +35,8 @@ #include "defaults.h" #include "prox_lua.h" #include "cqm.h" +#include "defines.h" +#include "prox_ipv6.h" #include "prox_compat.h" #define MAX_RTE_ARGV 64 @@ -976,6 +978,9 @@ static int get_core_cfg(unsigned sindex, char *str, void *data) if (STR_EQ(str, "streams")) { return parse_str(targ->streams, pkey, sizeof(targ->streams)); } + if (STR_EQ(str, "Unsollicited NA")) { + return parse_flag(&targ->flags, TASK_ARG_SEND_NA_AT_STARTUP, pkey); + } if (STR_EQ(str, "local lpm")) { return parse_flag(&targ->flags, TASK_ARG_LOCAL_LPM, pkey); } @@ -1381,7 +1386,7 @@ static int get_core_cfg(unsigned sindex, char *str, void *data) targ->task_init = to_task_init(mode_str, sub_mode_str); if (!targ->task_init) { - if (strcmp(sub_mode_str, "l3") != 0) { + if ((strcmp(sub_mode_str, "l3") != 0) && (strcmp(sub_mode_str, "ndp") != 0)) { set_errf("sub mode %s not supported for mode %s", sub_mode_str, mode_str); return -1; } @@ -1392,9 +1397,13 @@ static int get_core_cfg(unsigned sindex, char *str, void *data) } } if (strcmp(sub_mode_str, "l3") == 0) { - prox_cfg.flags |= DSF_CTRL_PLANE_ENABLED; + prox_cfg.flags |= DSF_L3_ENABLED; targ->flags |= TASK_ARG_L3; strcpy(targ->sub_mode_str, "l3"); + } else if (strcmp(sub_mode_str, "ndp") == 0) { + prox_cfg.flags |= DSF_NDP_ENABLED; + targ->flags |= TASK_ARG_NDP; + strcpy(targ->sub_mode_str, "ndp"); } else { strcpy(targ->sub_mode_str, targ->task_init->sub_mode_str); } @@ -1453,6 +1462,16 @@ static int get_core_cfg(unsigned sindex, char *str, void *data) targ->local_prefix = 32; return parse_ip(&targ->gateway_ipv4, pkey); } + if (STR_EQ(str, "ipv6 router")) { /* we simulate an IPV6 router */ + int rc = parse_flag(&targ->ipv6_router, 1, pkey); + if (!rc && targ->ipv6_router) { + plog_info("\tipv6 router configured => NDP enabled\n"); + prox_cfg.flags |= DSF_NDP_ENABLED; + targ->flags |= TASK_ARG_NDP; + strcpy(targ->sub_mode_str, "ndp"); + } + return 0; + } if (STR_EQ(str, "local ipv4")) { /* source IP address to be used for packets */ struct ip4_subnet cidr; if (parse_ip4_cidr(&cidr, pkey) != 0) { @@ -1470,13 +1489,43 @@ static int get_core_cfg(unsigned sindex, char *str, void *data) if (STR_EQ(str, "remote ipv4")) { /* source IP address to be used for packets */ return parse_ip(&targ->remote_ipv4, pkey); } + if (STR_EQ(str, "global ipv6")) { + if (parse_ip6(&targ->global_ipv6, pkey) == 0) { + plog_info("\tglobal ipv6 configured => NDP enabled\n"); + targ->flags |= TASK_ARG_NDP; + prox_cfg.flags |= DSF_NDP_ENABLED; + strcpy(targ->sub_mode_str, "ndp"); + } else { + plog_err("Unable to parse content of local ipv6: %s\n", pkey); + return -1; + } + return 0; + } if (STR_EQ(str, "local ipv6")) { /* source IPv6 address to be used for packets */ - return parse_ip6(&targ->local_ipv6, pkey); + if (parse_ip6(&targ->local_ipv6, pkey) == 0) { + plog_info("\tlocal ipv6 configured => NDP enabled\n"); + targ->flags |= TASK_ARG_NDP; + prox_cfg.flags |= DSF_NDP_ENABLED; + strcpy(targ->sub_mode_str, "ndp"); + } else { + plog_err("Unable to parse content of local ipv6: %s\n", pkey); + return -1; + } + return 0; } + if (STR_EQ(str, "router prefix")) { + if (parse_ip6(&targ->router_prefix, pkey) == 0) { + plog_info("\trouter prefix set to "IPv6_BYTES_FMT" (%s)\n", IPv6_BYTES(targ->router_prefix.bytes), IP6_Canonical(&targ->router_prefix)); + } else { + plog_err("Unable to parse content of router prefix: %s\n", pkey); + return -1; + } + return 0; + } if (STR_EQ(str, "arp timeout")) - return parse_int(&targ->arp_timeout, pkey); + return parse_int(&targ->reachable_timeout, pkey); if (STR_EQ(str, "arp update time")) - return parse_int(&targ->arp_update_time, pkey); + return parse_int(&targ->arp_ndp_retransmit_timeout, pkey); if (STR_EQ(str, "number of packets")) return parse_int(&targ->n_pkts, pkey); if (STR_EQ(str, "pipes")) { diff --git a/VNFs/DPPD-PROX/prox_cfg.h b/VNFs/DPPD-PROX/prox_cfg.h index 8c4bd6ca..e23c8ed9 100644 --- a/VNFs/DPPD-PROX/prox_cfg.h +++ b/VNFs/DPPD-PROX/prox_cfg.h @@ -1,5 +1,5 @@ /* -// Copyright (c) 2010-2017 Intel Corporation +// Copyright (c) 2010-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,8 +18,11 @@ #define _PROX_CFG_H #include +#include #include "prox_globals.h" +#include "ip6_addr.h" +#include "prox_compat.h" #define PROX_CM_STR_LEN (2 + 2 * sizeof(prox_cfg.core_mask) + 1) #define PROX_CM_DIM (RTE_MAX_LCORE/(sizeof(uint64_t) * 8)) @@ -40,7 +43,9 @@ #define DSF_DISABLE_CMT 0x00002000 /* CMT disabled */ #define DSF_LIST_TASK_MODES 0x00004000 /* list supported task modes and exit */ #define DSF_ENABLE_BYPASS 0x00008000 /* Use Multi Producer rings to enable ring bypass */ -#define DSF_CTRL_PLANE_ENABLED 0x00010000 /* ctrl plane enabled */ +#define DSF_L3_ENABLED 0x00010000 /* ctrl plane enabled for IPv4 */ +#define DSF_NDP_ENABLED 0x00020000 /* ctrl plane enabled for IPv6 */ +#define DSF_CTRL_PLANE_ENABLED (DSF_L3_ENABLED|DSF_NDP_ENABLED) /* ctrl plane enabled */ #define MAX_PATH_LEN 1024 @@ -69,6 +74,11 @@ struct prox_cfg { uint32_t heartbeat_timeout; uint32_t poll_timeout; uint64_t heartbeat_tsc; + struct ipv6_addr all_routers_ipv6_mcast_addr; + struct ipv6_addr all_nodes_ipv6_mcast_addr; + struct ipv6_addr random_ip; + prox_rte_ether_addr all_routers_mac_addr; + prox_rte_ether_addr all_nodes_mac_addr; }; extern struct prox_cfg prox_cfg; diff --git a/VNFs/DPPD-PROX/prox_cksum.h b/VNFs/DPPD-PROX/prox_cksum.h index 6ba50268..03be595a 100644 --- a/VNFs/DPPD-PROX/prox_cksum.h +++ b/VNFs/DPPD-PROX/prox_cksum.h @@ -1,5 +1,5 @@ /* -// Copyright (c) 2010-2017 Intel Corporation +// Copyright (c) 2010-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -27,6 +27,7 @@ #include #include "igmp.h" #include "prox_compat.h" +#include "prox_ipv6.h" #if RTE_VERSION >= RTE_VERSION_NUM(1,8,0,0) #define CALC_TX_OL(l2_len, l3_len) ((uint64_t)(l2_len) | (uint64_t)(l3_len) << 7) @@ -68,4 +69,11 @@ void prox_udp_cksum_sw(prox_rte_udp_hdr *udp, uint16_t len, uint32_t src_ip_addr void prox_tcp_cksum_sw(prox_rte_tcp_hdr *tcp, uint16_t len, uint32_t src_ip_addr, uint32_t dst_ip_addr); void prox_igmp_cksum_sw(struct igmpv2_hdr *igmp, uint16_t len); +struct ipv6_pseudo_hdr { + struct ipv6_addr src; + struct ipv6_addr dst; + uint32_t length; + uint32_t protocl:8; + uint32_t reserved:24; +} __attribute__((__packed__)); #endif /* _PROX_CKSUM_H_ */ diff --git a/VNFs/DPPD-PROX/prox_compat.c b/VNFs/DPPD-PROX/prox_compat.c new file mode 100644 index 00000000..572872ee --- /dev/null +++ b/VNFs/DPPD-PROX/prox_compat.c @@ -0,0 +1,30 @@ +/* +// Copyright (c) 2010-2017 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include +#include "quit.h" +#include "prox_compat.h" + +char *prox_strncpy(char * dest, const char * src, size_t count) +{ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpragmas" +#pragma GCC diagnostic ignored "-Wstringop-truncation" + strncpy(dest, src, count); +#pragma GCC diagnostic pop + PROX_PANIC(dest[count - 1] != 0, "\t\tError in strncpy: buffer overrun (%lu bytes)", count); + return dest; +} diff --git a/VNFs/DPPD-PROX/prox_compat.h b/VNFs/DPPD-PROX/prox_compat.h index bd059a6c..091c2ccd 100644 --- a/VNFs/DPPD-PROX/prox_compat.h +++ b/VNFs/DPPD-PROX/prox_compat.h @@ -20,7 +20,6 @@ #include #include #include "hash_utils.h" -#include "quit.h" /* This is a copy of the rte_table_hash_params from DPDK 17.11 * * So if DPDK decides to change the structure the modifications * @@ -221,16 +220,8 @@ typedef struct rte_icmp_hdr prox_rte_icmp_hdr; #endif -static inline char *prox_strncpy(char * dest, const char * src, size_t count) -{ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wpragmas" -#pragma GCC diagnostic ignored "-Wstringop-truncation" - strncpy(dest, src, count); -#pragma GCC diagnostic pop - PROX_PANIC(dest[count - 1] != 0, "\t\tError in strncpy: buffer overrun (%lu bytes)", count); - return dest; -} +char *prox_strncpy(char * dest, const char * src, size_t count); + #ifdef RTE_LIBRTE_PMD_AESNI_MB #if RTE_VERSION < RTE_VERSION_NUM(19,5,0,0) //RFC4303 diff --git a/VNFs/DPPD-PROX/prox_ipv6.c b/VNFs/DPPD-PROX/prox_ipv6.c new file mode 100644 index 00000000..9425f4a0 --- /dev/null +++ b/VNFs/DPPD-PROX/prox_ipv6.c @@ -0,0 +1,302 @@ +/* +// Copyright (c) 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "task_base.h" +#include "handle_master.h" +#include "prox_cfg.h" +#include "prox_ipv6.h" + +struct ipv6_addr null_addr = {{0}}; +char ip6_str[40]; // 8 blocks of 2 bytes (4 char) + 1x ":" between blocks + +void set_mcast_mac_from_ipv6(prox_rte_ether_addr *mac, struct ipv6_addr *ipv6_addr) +{ + mac->addr_bytes[0] = 0x33; + mac->addr_bytes[1] = 0x33; + memcpy(((uint32_t *)&mac->addr_bytes[2]), (uint32_t *)(&ipv6_addr->bytes[12]), sizeof(uint32_t)); +} + +// Note that this function is not Mthread safe and would result in garbage if called simultaneously from multiple threads +// This function is however only used for debugging, printing errors... +char *IP6_Canonical(struct ipv6_addr *addr) +{ + uint8_t *a = (uint8_t *)addr; + char *ptr = ip6_str; + int field = -1, len = 0, stored_field = 0, stored_len = 0; + + // Find longest run of consecutive 16-bit 0 fields + for (int i = 0; i < 8; i++) { + if (((int)a[i * 2] == 0) && ((int)a[i * 2 + 1] == 0)) { + len++; + if (field == -1) + field = i; // Store where the first 0 field started + } else { + if (len > stored_len) { + // the longest run of consecutive 16-bit 0 fields MUST be shortened + stored_len = len; + stored_field = field; + } + len = 0; + field = -1; + } + } + if (len > stored_len) { + // the longest run of consecutive 16-bit 0 fields MUST be shortened + stored_len = len; + stored_field = field; + } + if (stored_len <= 1) { + // The symbol "::" MUST NOT be used to shorten just one 16-bit 0 field. + stored_len = 0; + stored_field = -1; + } + for (int i = 0; i < 8; i++) { + if (i == stored_field) { + sprintf(ptr, ":"); + ptr++; + if (i == 0) { + sprintf(ptr, ":"); + ptr++; + } + i +=stored_len - 1; // ++ done in for loop + continue; + } + if ((int)a[i * 2] & 0xF0) { + sprintf(ptr, "%02x%02x", (int)a[i * 2], (int)a[i * 2 + 1]); + ptr+=4; + } else if ((int)a[i * 2] & 0x0F) { + sprintf(ptr, "%x%02x", (int)a[i * 2] >> 4, (int)a[i * 2] + 1); + ptr+=3; + } else if ((int)a[i * 2 + 1] & 0xF0) { + sprintf(ptr, "%02x", (int)a[i * 2 + 1]); + ptr+=2; + } else { + sprintf(ptr, "%x", ((int)a[i * 2 + 1]) & 0xF); + ptr++; + } + if (i != 7) { + sprintf(ptr, ":"); + ptr++; + } + } + return ip6_str; +} + +void set_link_local(struct ipv6_addr *ipv6_addr) +{ + ipv6_addr->bytes[0] = 0xfe; + ipv6_addr->bytes[1] = 0x80; +} + +// Create Extended Unique Identifier (RFC 2373) +// Store it in LSB of IPv6 address +void set_EUI(struct ipv6_addr *ipv6_addr, prox_rte_ether_addr *mac) +{ + memcpy(&ipv6_addr->bytes[8], mac, 3); // Copy first 3 bytes of MAC + ipv6_addr->bytes[8] = ipv6_addr->bytes[8] ^ 0x02; // Invert Universal/local bit + ipv6_addr->bytes[11] = 0xff; // Next 2 bytes are 0xfffe + ipv6_addr->bytes[12] = 0xfe; + memcpy(&ipv6_addr->bytes[13], &mac->addr_bytes[3], 3); // Copy last 3 bytes + // plog_info("mac = "MAC_BYTES_FMT", eui = "IPv6_BYTES_FMT"\n", MAC_BYTES(mac->addr_bytes), IPv6_BYTES(ipv6_addr->bytes)); +} + +void create_mac_from_EUI(struct ipv6_addr *ipv6_addr, prox_rte_ether_addr *mac) +{ + memcpy(mac, &ipv6_addr->bytes[8], 3); + mac->addr_bytes[0] = mac->addr_bytes[0] ^ 0x02; + memcpy(&mac->addr_bytes[3], &ipv6_addr->bytes[13], 3); +} +void build_router_advertisement(struct rte_mbuf *mbuf, prox_rte_ether_addr *s_addr, struct ipv6_addr *ipv6_s_addr, struct ipv6_addr *router_prefix) +{ + prox_rte_ether_hdr *peth = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *); + init_mbuf_seg(mbuf); + mbuf->ol_flags &= ~(PKT_TX_IP_CKSUM|PKT_TX_UDP_CKSUM); // Software calculates the checksum + + memcpy(peth->d_addr.addr_bytes, &prox_cfg.all_nodes_mac_addr, sizeof(prox_rte_ether_addr)); + memcpy(peth->s_addr.addr_bytes, s_addr, sizeof(prox_rte_ether_addr)); + peth->ether_type = ETYPE_IPv6; + + prox_rte_ipv6_hdr *ipv6_hdr = (prox_rte_ipv6_hdr *)(peth + 1); + ipv6_hdr->vtc_flow = 0x00000060; + ipv6_hdr->payload_len = rte_cpu_to_be_16(sizeof(struct icmpv6_RA) + sizeof(struct icmpv6_prefix_option)); + ipv6_hdr->proto = ICMPv6; + ipv6_hdr->hop_limits = 255; + memcpy(ipv6_hdr->src_addr, ipv6_s_addr, sizeof(struct ipv6_addr)); // 0 = "Unspecified address" if unknown + memcpy(ipv6_hdr->dst_addr, &prox_cfg.all_nodes_ipv6_mcast_addr, sizeof(struct ipv6_addr)); + + struct icmpv6_RA *router_advertisement = (struct icmpv6_RA *)(ipv6_hdr + 1); + router_advertisement->type = ICMPv6_RA; + router_advertisement->code = 0; + router_advertisement->hop_limit = 255; + router_advertisement->bits = 0; // M and O bits set to 0 => no dhcpv6 + router_advertisement->router_lifespan = rte_cpu_to_be_16(9000); // 9000 sec + router_advertisement->reachable_timeout = rte_cpu_to_be_32(30000); // 1 sec + router_advertisement->retrans_timeout = rte_cpu_to_be_32(1000); // 30 sec + + struct icmpv6_option *option = &router_advertisement->options; + option->type = ICMPv6_source_link_layer_address; + option->length = 1; // 8 bytes + memcpy(&option->data, s_addr, sizeof(prox_rte_ether_addr)); + + struct icmpv6_prefix_option *prefix_option = (struct icmpv6_prefix_option *)(option + 1); + prefix_option->type = ICMPv6_prefix_information; + prefix_option->length = 4; // 32 bytes + prefix_option->prefix_length = 64; // 64 bits in prefix + prefix_option->flag = 0xc0; // on-link flag & autonamous address-configuration flag are set + prefix_option->valid_lifetime = rte_cpu_to_be_32(86400); // 1 day + prefix_option->preferred_lifetime = rte_cpu_to_be_32(43200); // 12 hours + prefix_option->reserved = 0; + memcpy(&prefix_option->prefix, router_prefix, sizeof(struct ipv6_addr)); + // Could Add MTU Option + router_advertisement->checksum = 0; + router_advertisement->checksum = rte_ipv6_udptcp_cksum(ipv6_hdr, router_advertisement); + + uint16_t pktlen = rte_be_to_cpu_16(ipv6_hdr->payload_len) + sizeof(prox_rte_ipv6_hdr) + sizeof(prox_rte_ether_hdr); + rte_pktmbuf_pkt_len(mbuf) = pktlen; + rte_pktmbuf_data_len(mbuf) = pktlen; +} + +void build_router_sollicitation(struct rte_mbuf *mbuf, prox_rte_ether_addr *s_addr, struct ipv6_addr *ipv6_s_addr) +{ + prox_rte_ether_hdr *peth = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *); + + init_mbuf_seg(mbuf); + mbuf->ol_flags &= ~(PKT_TX_IP_CKSUM|PKT_TX_UDP_CKSUM); // Software calculates the checksum + + memcpy(peth->d_addr.addr_bytes, &prox_cfg.all_routers_mac_addr, sizeof(prox_rte_ether_addr)); + memcpy(peth->s_addr.addr_bytes, s_addr, sizeof(prox_rte_ether_addr)); + peth->ether_type = ETYPE_IPv6; + + prox_rte_ipv6_hdr *ipv6_hdr = (prox_rte_ipv6_hdr *)(peth + 1); + ipv6_hdr->vtc_flow = 0x00000060; + ipv6_hdr->payload_len = rte_cpu_to_be_16(sizeof(struct icmpv6_RS)); + ipv6_hdr->proto = ICMPv6; + ipv6_hdr->hop_limits = 255; + memcpy(ipv6_hdr->src_addr, ipv6_s_addr, sizeof(struct ipv6_addr)); // 0 = "Unspecified address" if unknown + memcpy(ipv6_hdr->dst_addr, &prox_cfg.all_routers_ipv6_mcast_addr, sizeof(struct ipv6_addr)); + + struct icmpv6_RS *router_sollicitation = (struct icmpv6_RS *)(ipv6_hdr + 1); + router_sollicitation->type = ICMPv6_RS; + router_sollicitation->code = 0; + router_sollicitation->options.type = ICMPv6_source_link_layer_address; + router_sollicitation->options.length = 1; // 8 bytes + memcpy(&router_sollicitation->options.data, s_addr, sizeof(prox_rte_ether_addr)); + + router_sollicitation->checksum = 0; + router_sollicitation->checksum = rte_ipv6_udptcp_cksum(ipv6_hdr, router_sollicitation); + uint16_t pktlen = rte_be_to_cpu_16(ipv6_hdr->payload_len) + sizeof(prox_rte_ipv6_hdr) + sizeof(prox_rte_ether_hdr); + rte_pktmbuf_pkt_len(mbuf) = pktlen; + rte_pktmbuf_data_len(mbuf) = pktlen; +} + +void build_neighbour_sollicitation(struct rte_mbuf *mbuf, prox_rte_ether_addr *s_addr, struct ipv6_addr *dst, struct ipv6_addr *src) +{ + prox_rte_ether_hdr *peth = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *); + prox_rte_ether_addr mac_dst; + set_mcast_mac_from_ipv6(&mac_dst, dst); + + init_mbuf_seg(mbuf); + mbuf->ol_flags &= ~(PKT_TX_IP_CKSUM|PKT_TX_UDP_CKSUM); // Software calculates the checksum + + memcpy(peth->d_addr.addr_bytes, &mac_dst, sizeof(prox_rte_ether_addr)); + memcpy(peth->s_addr.addr_bytes, s_addr, sizeof(prox_rte_ether_addr)); + peth->ether_type = ETYPE_IPv6; + + prox_rte_ipv6_hdr *ipv6_hdr = (prox_rte_ipv6_hdr *)(peth + 1); + ipv6_hdr->vtc_flow = 0x00000060; + ipv6_hdr->payload_len = rte_cpu_to_be_16(sizeof(struct icmpv6_NS)); + ipv6_hdr->proto = ICMPv6; + ipv6_hdr->hop_limits = 255; + memcpy(ipv6_hdr->src_addr, src, 16); + memcpy(ipv6_hdr->dst_addr, dst, 16); + + struct icmpv6_NS *neighbour_sollicitation = (struct icmpv6_NS *)(ipv6_hdr + 1); + neighbour_sollicitation->type = ICMPv6_NS; + neighbour_sollicitation->code = 0; + neighbour_sollicitation->reserved = 0; + memcpy(&neighbour_sollicitation->target_address, dst, sizeof(struct ipv6_addr)); + neighbour_sollicitation->options.type = ICMPv6_source_link_layer_address; + neighbour_sollicitation->options.length = 1; // 8 bytes + memcpy(&neighbour_sollicitation->options.data, s_addr, sizeof(prox_rte_ether_addr)); + neighbour_sollicitation->checksum = 0; + neighbour_sollicitation->checksum = rte_ipv6_udptcp_cksum(ipv6_hdr, neighbour_sollicitation); + + uint16_t pktlen = rte_be_to_cpu_16(ipv6_hdr->payload_len) + sizeof(prox_rte_ipv6_hdr) + sizeof(prox_rte_ether_hdr); + rte_pktmbuf_pkt_len(mbuf) = pktlen; + rte_pktmbuf_data_len(mbuf) = pktlen; +} + +void build_neighbour_advertisement(struct task_base *tbase, struct rte_mbuf *mbuf, prox_rte_ether_addr *target, struct ipv6_addr *src_ipv6_addr, int sollicited) +{ + struct task_master *task = (struct task_master *)tbase; + prox_rte_ether_hdr *peth = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *); + prox_rte_ipv6_hdr *ipv6_hdr = (prox_rte_ipv6_hdr *)(peth + 1); + + uint8_t port_id = get_port(mbuf); + + init_mbuf_seg(mbuf); + mbuf->ol_flags &= ~(PKT_TX_IP_CKSUM|PKT_TX_UDP_CKSUM); // Software calculates the checksum + + // If source mac is null, use all_nodes_mac_addr. + if ((!sollicited) || (memcmp(peth->s_addr.addr_bytes, &null_addr, sizeof(struct ipv6_addr)) == 0)) { + memcpy(peth->d_addr.addr_bytes, &prox_cfg.all_nodes_mac_addr, sizeof(prox_rte_ether_addr)); + memcpy(ipv6_hdr->dst_addr, &prox_cfg.all_nodes_ipv6_mcast_addr, sizeof(struct ipv6_addr)); + } else { + memcpy(peth->d_addr.addr_bytes, peth->s_addr.addr_bytes, sizeof(prox_rte_ether_addr)); + memcpy(ipv6_hdr->dst_addr, ipv6_hdr->src_addr, sizeof(struct ipv6_addr)); + } + + memcpy(peth->s_addr.addr_bytes, &task->internal_port_table[port_id].mac, sizeof(prox_rte_ether_addr)); + peth->ether_type = ETYPE_IPv6; + + ipv6_hdr->vtc_flow = 0x00000060; + ipv6_hdr->payload_len = rte_cpu_to_be_16(sizeof(struct icmpv6_NA)); + ipv6_hdr->proto = ICMPv6; + ipv6_hdr->hop_limits = 255; + memcpy(ipv6_hdr->src_addr, src_ipv6_addr, sizeof(struct ipv6_addr)); + + struct icmpv6_NA *neighbour_advertisement = (struct icmpv6_NA *)(ipv6_hdr + 1); + neighbour_advertisement->type = ICMPv6_NA; + neighbour_advertisement->code = 0; + neighbour_advertisement->reserved = 0; + if (task->internal_port_table[port_id].flags & IPV6_ROUTER) + neighbour_advertisement->bits = 0xC0; // R+S bit set + else + neighbour_advertisement->bits = 0x40; // S bit set + if (!sollicited) { + memcpy(&neighbour_advertisement->destination_address, src_ipv6_addr, sizeof(struct ipv6_addr)); + neighbour_advertisement->bits &= 0xBF; // Clear S bit + neighbour_advertisement->bits |= 0x20; // Overide bit + } + // else neighbour_advertisement->destination_address is already set to neighbour_sollicitation->target_address + + struct icmpv6_option *option = &neighbour_advertisement->options; + // Do not think this is necessary + // option->type = ICMPv6_source_link_layer_address; + // option->length = 1; // 8 bytes + // memcpy(&option->data, &task->internal_port_table[port_id].mac, sizeof(prox_rte_ether_addr)); + + // option = option + 1; + option->type = ICMPv6_target_link_layer_address; + option->length = 1; // 8 bytes + memcpy(&option->data, target, sizeof(prox_rte_ether_addr)); + + neighbour_advertisement->checksum = 0; + neighbour_advertisement->checksum = rte_ipv6_udptcp_cksum(ipv6_hdr, neighbour_advertisement); + uint16_t pktlen = rte_be_to_cpu_16(ipv6_hdr->payload_len) + sizeof(prox_rte_ipv6_hdr) + sizeof(prox_rte_ether_hdr); + rte_pktmbuf_pkt_len(mbuf) = pktlen; + rte_pktmbuf_data_len(mbuf) = pktlen; +} diff --git a/VNFs/DPPD-PROX/prox_ipv6.h b/VNFs/DPPD-PROX/prox_ipv6.h new file mode 100644 index 00000000..48030054 --- /dev/null +++ b/VNFs/DPPD-PROX/prox_ipv6.h @@ -0,0 +1,140 @@ +/* +// Copyright (c) 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#ifndef _PROX_IP_V6_H_ +#define _PROX_IP_V6_H_ + +#include "ip6_addr.h" + +#define ALL_NODES_IPV6_MCAST_ADDR "ff02:0000:0000:0000:0000:0000:0000:0001" // FF02::1 +#define ALL_ROUTERS_IPV6_MCAST_ADDR "ff02:0000:0000:0000:0000:0000:0000:0002" // FF02::2 + +#define RANDOM_IPV6 "1234:1234:1234:1234:1234:1234:1234:1234" // Used by PROX as a flag forrandom IP + +#define ALL_DHCP_RELAY_AGENTS_AND_SERVERS "ff02:0000:0000:0000:0000:0000:0001:0002" // FF02::1:2 +#define ALL_DHCP_SERVERS "ff05:0000:0000:0000:0000:0000:0001:0003" // FF02::1:2 + +#define DHCP_CLIENT_UDP_PORT 546 +#define DHCP_SERVER_UDP_PORT 547 + +#define PROX_UNSOLLICITED 0 +#define PROX_SOLLICITED 1 + +#define ICMPv6 0x3a + +#define ICMPv6_DU 0x01 +#define ICMPv6_PTB 0x02 +#define ICMPv6_TE 0x03 +#define ICMPv6_PaPr 0x04 +#define ICMPv6_RS 0x85 +#define ICMPv6_RA 0x86 +#define ICMPv6_NS 0x87 +#define ICMPv6_NA 0x88 +#define ICMPv6_RE 0x89 + +#define ICMPv6_source_link_layer_address 1 +#define ICMPv6_target_link_layer_address 2 +#define ICMPv6_prefix_information 3 +#define ICMPv6_redirect_header 4 +#define ICMPv6_mtu 5 + +extern struct ipv6_addr null_addr; + +struct icmpv6_prefix_option { + uint8_t type; + uint8_t length; + uint8_t prefix_length; + uint8_t flag; + uint32_t valid_lifetime; + uint32_t preferred_lifetime; + uint32_t reserved; + struct ipv6_addr prefix; +}; + +struct icmpv6_option { + uint8_t type; + uint8_t length; + uint8_t data[6]; +} __attribute__((__packed__)); + +struct icmpv6 { + uint8_t type; + uint8_t code; + uint16_t checksum; +}; + +struct icmpv6_RA { + uint8_t type; + uint8_t code; + uint16_t checksum; + uint8_t hop_limit; + uint8_t bits; + uint16_t router_lifespan; + uint32_t reachable_timeout; + uint32_t retrans_timeout; + struct icmpv6_option options; +} __attribute__((__packed__)); + +struct icmpv6_RS { + uint8_t type; + uint8_t code; + uint16_t checksum; + uint32_t reserved; + struct icmpv6_option options; +} __attribute__((__packed__)); + +struct icmpv6_NS { + uint8_t type; + uint8_t code; + uint16_t checksum; + uint32_t reserved; + struct ipv6_addr target_address; + struct icmpv6_option options; +} __attribute__((__packed__)); + +struct icmpv6_NA { + uint8_t type; + uint8_t code; + uint16_t checksum; + uint16_t bits; + uint16_t reserved; + struct ipv6_addr destination_address; + struct icmpv6_option options; +} __attribute__((__packed__)); + +struct icmpv6_RE { + uint8_t type; + uint8_t code; + uint16_t checksum; + uint32_t reserved; + struct ipv6_addr destination_address_hop; + struct ipv6_addr destination_address; + uint32_t Options; +} __attribute__((__packed__)); + +void set_mcast_mac_from_ipv6(prox_rte_ether_addr *mac, struct ipv6_addr *ipv6_addr); +char *IP6_Canonical(struct ipv6_addr *addr); +void set_link_local(struct ipv6_addr *ipv6_addr); +void set_EUI(struct ipv6_addr *ipv6_addr, prox_rte_ether_addr *mac); +void create_mac_from_EUI(struct ipv6_addr *ipv6_addr, prox_rte_ether_addr *mac); + +struct task_base; +void build_router_sollicitation(struct rte_mbuf *mbuf, prox_rte_ether_addr *s_addr, struct ipv6_addr *ipv6_s_addr); +void build_router_advertisement(struct rte_mbuf *mbuf, prox_rte_ether_addr *s_addr, struct ipv6_addr *ipv6_s_addr, struct ipv6_addr *router_prefix); +void build_neighbour_sollicitation(struct rte_mbuf *mbuf, prox_rte_ether_addr *s_addr, struct ipv6_addr *dst, struct ipv6_addr *src); +void build_neighbour_advertisement(struct task_base *tbase, struct rte_mbuf *mbuf, prox_rte_ether_addr *target_mac, struct ipv6_addr *ipv6_addr, int sollicited); + +#endif /* _PROX_IP_V6_H_ */ diff --git a/VNFs/DPPD-PROX/prox_port_cfg.c b/VNFs/DPPD-PROX/prox_port_cfg.c index 098d973b..a4f35260 100644 --- a/VNFs/DPPD-PROX/prox_port_cfg.c +++ b/VNFs/DPPD-PROX/prox_port_cfg.c @@ -208,7 +208,7 @@ void init_rte_dev(int use_dummy_devices) rc = eth_dev_null_create(tap, name, PROX_RTE_ETHER_MIN_LEN, 0); #endif PROX_PANIC(rc != 0, "Unable to create device %s %s\n", "net tap", port_cfg->vdev); - int vdev_port_id = rte_eth_dev_count() - 1; + int vdev_port_id = prox_rte_eth_dev_count_avail() - 1; PROX_PANIC(vdev_port_id >= PROX_MAX_PORTS, "Too many port defined %d >= %d\n", vdev_port_id, PROX_MAX_PORTS); plog_info("\tCreating device %s, port %d\n", port_cfg->vdev, vdev_port_id); prox_port_cfg[vdev_port_id].active = 1; diff --git a/VNFs/DPPD-PROX/prox_shared.c b/VNFs/DPPD-PROX/prox_shared.c index db381ff5..52f4eb18 100644 --- a/VNFs/DPPD-PROX/prox_shared.c +++ b/VNFs/DPPD-PROX/prox_shared.c @@ -1,5 +1,5 @@ /* -// Copyright (c) 2010-2017 Intel Corporation +// Copyright (c) 2010-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -59,9 +59,9 @@ static void prox_sh_create_hash(struct prox_shared *ps, size_t size) PROX_PANIC(ps->hash == NULL, "Failed to create hash table for shared data"); ps->size = size; if (ps->size == INIT_HASH_TABLE_SIZE) - plog_info("Shared data tracking hash table created with size %zu\n", ps->size); + plog_info("\tShared data tracking hash table created with size %zu\n", ps->size); else - plog_info("Shared data tracking hash table grew to %zu\n", ps->size); + plog_info("\tShared data tracking hash table grew to %zu\n", ps->size); } #if RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0) diff --git a/VNFs/DPPD-PROX/rx_pkt.c b/VNFs/DPPD-PROX/rx_pkt.c index da59fda7..17e39646 100644 --- a/VNFs/DPPD-PROX/rx_pkt.c +++ b/VNFs/DPPD-PROX/rx_pkt.c @@ -28,7 +28,8 @@ #include "arp.h" #include "tx_pkt.h" #include "handle_master.h" -#include "input.h" /* Needed for callback on dump */ +#include "input.h" +#include "prox_ipv6.h" /* Needed for callback on dump */ #define TCP_PORT_BGP rte_cpu_to_be_16(179) @@ -44,7 +45,9 @@ packets are received if the dequeue step involves finding 32 packets. */ -#define MIN_PMD_RX 32 +#define MIN_PMD_RX 32 +#define PROX_L3 1 +#define PROX_NDP 2 static uint16_t rx_pkt_hw_port_queue(struct port_queue *pq, struct rte_mbuf **mbufs, int multi) { @@ -112,11 +115,11 @@ static inline void handle_ipv4(struct task_base *tbase, struct rte_mbuf **mbufs, prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(pip + 1); if (pip->next_proto_id == IPPROTO_ICMP) { dump_l3(tbase, mbufs[i]); - tx_ring(tbase, tbase->l3.ctrl_plane_ring, ICMP_TO_CTRL, mbufs[i]); + tx_ring(tbase, tbase->l3.ctrl_plane_ring, ICMP_TO_MASTER, mbufs[i]); (*skip)++; } else if ((tcp->src_port == TCP_PORT_BGP) || (tcp->dst_port == TCP_PORT_BGP)) { dump_l3(tbase, mbufs[i]); - tx_ring(tbase, tbase->l3.ctrl_plane_ring, BGP_TO_CTRL, mbufs[i]); + tx_ring(tbase, tbase->l3.ctrl_plane_ring, BGP_TO_MASTER, mbufs[i]); (*skip)++; } else if (unlikely(*skip)) { mbufs[i - *skip] = mbufs[i]; @@ -155,13 +158,13 @@ static inline int handle_l3(struct task_base *tbase, uint16_t nb_rx, struct rte_ handle_ipv4(tbase, mbufs, i, pip, &skip); } else if (vlan->eth_proto == ETYPE_ARP) { dump_l3(tbase, mbufs[i]); - tx_ring(tbase, tbase->l3.ctrl_plane_ring, ARP_TO_CTRL, mbufs[i]); + tx_ring(tbase, tbase->l3.ctrl_plane_ring, ARP_PKT_FROM_NET_TO_MASTER, mbufs[i]); skip++; } break; case ETYPE_ARP: dump_l3(tbase, mbufs[i]); - tx_ring(tbase, tbase->l3.ctrl_plane_ring, ARP_TO_CTRL, mbufs[i]); + tx_ring(tbase, tbase->l3.ctrl_plane_ring, ARP_PKT_FROM_NET_TO_MASTER, mbufs[i]); skip++; break; default: @@ -174,8 +177,35 @@ static inline int handle_l3(struct task_base *tbase, uint16_t nb_rx, struct rte_ return skip; } +static inline int handle_ndp(struct task_base *tbase, uint16_t nb_rx, struct rte_mbuf ***mbufs_ptr) +{ + struct rte_mbuf **mbufs = *mbufs_ptr; + int i; + prox_rte_ether_hdr *hdr[MAX_PKT_BURST]; + int skip = 0; + + for (i = 0; i < nb_rx; i++) { + PREFETCH0(mbufs[i]); + } + for (i = 0; i < nb_rx; i++) { + hdr[i] = rte_pktmbuf_mtod(mbufs[i], prox_rte_ether_hdr *); + PREFETCH0(hdr[i]); + } + for (i = 0; i < nb_rx; i++) { + prox_rte_ipv6_hdr *ipv6_hdr = (prox_rte_ipv6_hdr *)(hdr[i] + 1); + if (unlikely((hdr[i]->ether_type == ETYPE_IPv6) && (ipv6_hdr->proto == ICMPv6))) { + dump_l3(tbase, mbufs[i]); + tx_ring(tbase, tbase->l3.ctrl_plane_ring, NDP_PKT_FROM_NET_TO_MASTER, mbufs[i]); + skip++; + } else if (unlikely(skip)) { + mbufs[i - skip] = mbufs[i]; + } + } + return skip; +} + static uint16_t rx_pkt_hw_param(struct task_base *tbase, struct rte_mbuf ***mbufs_ptr, int multi, - void (*next)(struct rx_params_hw *rx_param_hw), int l3) + void (*next)(struct rx_params_hw *rx_param_hw), int l3_ndp) { uint8_t last_read_portid; uint16_t nb_rx, ret; @@ -191,8 +221,10 @@ static uint16_t rx_pkt_hw_param(struct task_base *tbase, struct rte_mbuf ***mbuf nb_rx = rx_pkt_hw_port_queue(pq, *mbufs_ptr, multi); next(&tbase->rx_params_hw); - if (l3) + if (l3_ndp == PROX_L3) skip = handle_l3(tbase, nb_rx, mbufs_ptr); + else if (l3_ndp == PROX_NDP) + skip = handle_ndp(tbase, nb_rx, mbufs_ptr); if (skip) TASK_STATS_ADD_RX_NON_DP(&tbase->aux->stats, skip); @@ -204,7 +236,7 @@ static uint16_t rx_pkt_hw_param(struct task_base *tbase, struct rte_mbuf ***mbuf return 0; } -static inline uint16_t rx_pkt_hw1_param(struct task_base *tbase, struct rte_mbuf ***mbufs_ptr, int multi, int l3) +static inline uint16_t rx_pkt_hw1_param(struct task_base *tbase, struct rte_mbuf ***mbufs_ptr, int multi, int l3_ndp) { uint16_t nb_rx, n; int skip = 0; @@ -230,8 +262,11 @@ static inline uint16_t rx_pkt_hw1_param(struct task_base *tbase, struct rte_mbuf if (nb_rx == 0) return 0; - if (l3) + + if (l3_ndp == PROX_L3) skip = handle_l3(tbase, nb_rx, mbufs_ptr); + else if (l3_ndp == PROX_NDP) + skip = handle_ndp(tbase, nb_rx, mbufs_ptr); if (skip) TASK_STATS_ADD_RX_NON_DP(&tbase->aux->stats, skip); @@ -275,32 +310,62 @@ uint16_t rx_pkt_hw1_multi(struct task_base *tbase, struct rte_mbuf ***mbufs) uint16_t rx_pkt_hw_l3(struct task_base *tbase, struct rte_mbuf ***mbufs) { - return rx_pkt_hw_param(tbase, mbufs, 0, next_port, 1); + return rx_pkt_hw_param(tbase, mbufs, 0, next_port, PROX_L3); +} + +uint16_t rx_pkt_hw_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs) +{ + return rx_pkt_hw_param(tbase, mbufs, 0, next_port, PROX_NDP); } uint16_t rx_pkt_hw_pow2_l3(struct task_base *tbase, struct rte_mbuf ***mbufs) { - return rx_pkt_hw_param(tbase, mbufs, 0, next_port_pow2, 1); + return rx_pkt_hw_param(tbase, mbufs, 0, next_port_pow2, PROX_L3); +} + +uint16_t rx_pkt_hw_pow2_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs) +{ + return rx_pkt_hw_param(tbase, mbufs, 0, next_port_pow2, PROX_NDP); } uint16_t rx_pkt_hw1_l3(struct task_base *tbase, struct rte_mbuf ***mbufs) { - return rx_pkt_hw1_param(tbase, mbufs, 0, 1); + return rx_pkt_hw1_param(tbase, mbufs, 0, PROX_L3); +} + +uint16_t rx_pkt_hw1_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs) +{ + return rx_pkt_hw1_param(tbase, mbufs, 0, PROX_NDP); } uint16_t rx_pkt_hw_multi_l3(struct task_base *tbase, struct rte_mbuf ***mbufs) { - return rx_pkt_hw_param(tbase, mbufs, 1, next_port, 1); + return rx_pkt_hw_param(tbase, mbufs, 1, next_port, PROX_L3); +} + +uint16_t rx_pkt_hw_multi_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs) +{ + return rx_pkt_hw_param(tbase, mbufs, 1, next_port, PROX_NDP); } uint16_t rx_pkt_hw_pow2_multi_l3(struct task_base *tbase, struct rte_mbuf ***mbufs) { - return rx_pkt_hw_param(tbase, mbufs, 1, next_port_pow2, 1); + return rx_pkt_hw_param(tbase, mbufs, 1, next_port_pow2, PROX_L3); +} + +uint16_t rx_pkt_hw_pow2_multi_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs) +{ + return rx_pkt_hw_param(tbase, mbufs, 1, next_port_pow2, PROX_NDP); } uint16_t rx_pkt_hw1_multi_l3(struct task_base *tbase, struct rte_mbuf ***mbufs) { - return rx_pkt_hw1_param(tbase, mbufs, 1, 1); + return rx_pkt_hw1_param(tbase, mbufs, 1, PROX_L3); +} + +uint16_t rx_pkt_hw1_multi_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs) +{ + return rx_pkt_hw1_param(tbase, mbufs, 1, PROX_NDP); } /* The following functions implement ring access */ diff --git a/VNFs/DPPD-PROX/rx_pkt.h b/VNFs/DPPD-PROX/rx_pkt.h index 6d8f412c..c610ed98 100644 --- a/VNFs/DPPD-PROX/rx_pkt.h +++ b/VNFs/DPPD-PROX/rx_pkt.h @@ -1,5 +1,5 @@ /* -// Copyright (c) 2010-2017 Intel Corporation +// Copyright (c) 2010-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -29,6 +29,9 @@ uint16_t rx_pkt_hw1(struct task_base *tbase, struct rte_mbuf ***mbufs); uint16_t rx_pkt_hw_l3(struct task_base *tbase, struct rte_mbuf ***mbufs); uint16_t rx_pkt_hw_pow2_l3(struct task_base *tbase, struct rte_mbuf ***mbufs); uint16_t rx_pkt_hw1_l3(struct task_base *tbase, struct rte_mbuf ***mbufs); +uint16_t rx_pkt_hw_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs); +uint16_t rx_pkt_hw_pow2_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs); +uint16_t rx_pkt_hw1_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs); /* The _multi variation of the function is used to work-around the problem with QoS, multi-seg mbufs and vector PMD. When vector @@ -40,6 +43,9 @@ uint16_t rx_pkt_hw1_multi(struct task_base *tbase, struct rte_mbuf ***mbufs); uint16_t rx_pkt_hw_multi_l3(struct task_base *tbase, struct rte_mbuf ***mbufs); uint16_t rx_pkt_hw_pow2_multi_l3(struct task_base *tbase, struct rte_mbuf ***mbufs); uint16_t rx_pkt_hw1_multi_l3(struct task_base *tbase, struct rte_mbuf ***mbufs); +uint16_t rx_pkt_hw_multi_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs); +uint16_t rx_pkt_hw_pow2_multi_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs); +uint16_t rx_pkt_hw1_multi_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs); uint16_t rx_pkt_sw(struct task_base *tbase, struct rte_mbuf ***mbufs); uint16_t rx_pkt_sw_pow2(struct task_base *tbase, struct rte_mbuf ***mbufs); diff --git a/VNFs/DPPD-PROX/task_base.h b/VNFs/DPPD-PROX/task_base.h index ce70aca2..df876e9a 100644 --- a/VNFs/DPPD-PROX/task_base.h +++ b/VNFs/DPPD-PROX/task_base.h @@ -1,5 +1,5 @@ /* -// Copyright (c) 2010-2017 Intel Corporation +// Copyright (c) 2010-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -178,6 +178,7 @@ struct task_base_aux { void (*stop_last)(struct task_base *tbase); void (*start_first)(struct task_base *tbase); struct task_rt_dump task_rt_dump; + struct rte_mbuf *mbuf; }; /* The task_base is accessed for _all_ task types. In case @@ -208,7 +209,6 @@ struct task_base { struct tx_params_hw_sw tx_params_hw_sw; }; struct l3_base l3; - uint32_t local_ipv4; } __attribute__((packed)) __rte_cache_aligned; static void task_base_add_rx_pkt_function(struct task_base *tbase, rx_pkt_func to_add) diff --git a/VNFs/DPPD-PROX/task_init.c b/VNFs/DPPD-PROX/task_init.c index 8441561f..fc12eae7 100644 --- a/VNFs/DPPD-PROX/task_init.c +++ b/VNFs/DPPD-PROX/task_init.c @@ -1,5 +1,5 @@ /* -// Copyright (c) 2010-2017 Intel Corporation +// Copyright (c) 2010-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -176,6 +176,8 @@ static size_t init_rx_tx_rings_ports(struct task_args *targ, struct task_base *t if (targ->nb_rxports == 1) { if (targ->flags & TASK_ARG_L3) tbase->rx_pkt = (targ->task_init->flag_features & TASK_FEATURE_MULTI_RX)? rx_pkt_hw1_multi_l3 : rx_pkt_hw1_l3; + else if (targ->flags & TASK_ARG_NDP) + tbase->rx_pkt = (targ->task_init->flag_features & TASK_FEATURE_MULTI_RX)? rx_pkt_hw1_multi_ndp : rx_pkt_hw1_ndp; else tbase->rx_pkt = (targ->task_init->flag_features & TASK_FEATURE_MULTI_RX)? rx_pkt_hw1_multi : rx_pkt_hw1; tbase->rx_params_hw1.rx_pq.port = targ->rx_port_queue[0].port; @@ -185,6 +187,8 @@ static size_t init_rx_tx_rings_ports(struct task_args *targ, struct task_base *t PROX_ASSERT((targ->nb_rxports != 0) || (targ->task_init->flag_features & TASK_FEATURE_NO_RX)); if (targ->flags & TASK_ARG_L3) tbase->rx_pkt = (targ->task_init->flag_features & TASK_FEATURE_MULTI_RX)? rx_pkt_hw_multi_l3 : rx_pkt_hw_l3; + else if (targ->flags & TASK_ARG_NDP) + tbase->rx_pkt = (targ->task_init->flag_features & TASK_FEATURE_MULTI_RX)? rx_pkt_hw_multi_ndp : rx_pkt_hw_ndp; else tbase->rx_pkt = (targ->task_init->flag_features & TASK_FEATURE_MULTI_RX)? rx_pkt_hw_multi : rx_pkt_hw; tbase->rx_params_hw.nb_rxports = targ->nb_rxports; @@ -198,6 +202,8 @@ static size_t init_rx_tx_rings_ports(struct task_args *targ, struct task_base *t if (rte_is_power_of_2(targ->nb_rxports)) { if (targ->flags & TASK_ARG_L3) tbase->rx_pkt = (targ->task_init->flag_features & TASK_FEATURE_MULTI_RX)? rx_pkt_hw_pow2_multi_l3 : rx_pkt_hw_pow2_l3; + else if (targ->flags & TASK_ARG_NDP) + tbase->rx_pkt = (targ->task_init->flag_features & TASK_FEATURE_MULTI_RX)? rx_pkt_hw_pow2_multi_ndp : rx_pkt_hw_pow2_ndp; else tbase->rx_pkt = (targ->task_init->flag_features & TASK_FEATURE_MULTI_RX)? rx_pkt_hw_pow2_multi : rx_pkt_hw_pow2; tbase->rx_params_hw.rxport_mask = targ->nb_rxports - 1; @@ -359,15 +365,20 @@ struct task_base *init_task_struct(struct task_args *targ) tbase->handle_bulk = t->handle; - if (targ->flags & TASK_ARG_L3) { - plog_info("\tTask configured in L3 mode\n"); + if (targ->flags & (TASK_ARG_L3|TASK_ARG_NDP)) { + plog_info("\tTask (%d,%d) configured in L3/NDP mode\n", targ->lconf->id, targ->id); tbase->l3.ctrl_plane_ring = targ->ctrl_plane_ring; if (targ->nb_txports != 0) { tbase->aux->tx_pkt_l2 = tbase->tx_pkt; - tbase->tx_pkt = tx_pkt_l3; - // Make sure control plane packets such as arp are not dropped tbase->aux->tx_ctrlplane_pkt = targ->nb_txrings ? tx_ctrlplane_sw : tx_ctrlplane_hw; - task_init_l3(tbase, targ); + if (targ->flags & TASK_ARG_L3) { + tbase->tx_pkt = tx_pkt_l3; + task_init_l3(tbase, targ); + } else if (targ->flags & TASK_ARG_NDP) { + tbase->tx_pkt = tx_pkt_ndp; + task_init_l3(tbase, targ); + } + // Make sure control plane packets such as arp are not dropped } } diff --git a/VNFs/DPPD-PROX/task_init.h b/VNFs/DPPD-PROX/task_init.h index e6261c2a..30a15151 100644 --- a/VNFs/DPPD-PROX/task_init.h +++ b/VNFs/DPPD-PROX/task_init.h @@ -1,5 +1,5 @@ /* -// Copyright (c) 2010-2017 Intel Corporation +// Copyright (c) 2010-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -49,6 +49,8 @@ struct lcore_cfg; #define TASK_ARG_DO_NOT_SET_DST_MAC 0x400 #define TASK_ARG_HW_SRC_MAC 0x800 #define TASK_ARG_L3 0x1000 +#define TASK_ARG_NDP 0x2000 +#define TASK_ARG_SEND_NA_AT_STARTUP 0x4000 #define PROX_MODE_LEN 32 @@ -126,13 +128,16 @@ struct task_args { uint8_t tot_rxrings; uint8_t nb_rxports; uint32_t byte_offset; + uint32_t ipv6_router; uint32_t gateway_ipv4; uint32_t local_ipv4; uint32_t remote_ipv4; uint32_t local_prefix; - uint32_t arp_timeout; - uint32_t arp_update_time; + uint32_t reachable_timeout; + uint32_t arp_ndp_retransmit_timeout; struct ipv6_addr local_ipv6; /* For IPv6 Tunnel, it's the local tunnel endpoint address */ + struct ipv6_addr global_ipv6; + struct ipv6_addr router_prefix; struct rte_ring *rx_rings[MAX_RINGS_PER_TASK]; struct rte_ring *tx_rings[MAX_RINGS_PER_TASK]; struct rte_ring *ctrl_plane_ring; diff --git a/VNFs/DPPD-PROX/tx_pkt.c b/VNFs/DPPD-PROX/tx_pkt.c index 51c1afa7..60d6b514 100644 --- a/VNFs/DPPD-PROX/tx_pkt.c +++ b/VNFs/DPPD-PROX/tx_pkt.c @@ -1,5 +1,5 @@ /* -// Copyright (c) 2010-2017 Intel Corporation +// Copyright (c) 2010-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -26,6 +26,7 @@ #include "log.h" #include "mbuf_utils.h" #include "handle_master.h" +#include "defines.h" static void buf_pkt_single(struct task_base *tbase, struct rte_mbuf *mbuf, const uint8_t out) { @@ -50,6 +51,70 @@ static inline void buf_pkt_all(struct task_base *tbase, struct rte_mbuf **mbufs, } #define MAX_PMD_TX 32 +void store_packet(struct task_base *tbase, struct rte_mbuf *mbuf) +{ + // If buffer is full, drop the first mbuf + if (tbase->aux->mbuf) + tx_drop(tbase->aux->mbuf); + tbase->aux->mbuf = mbuf; +} + +int tx_pkt_ndp(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts, uint8_t *out) +{ + // TODO NDP + struct ipv6_addr ip_dst; + int first = 0, ret, ok = 0, rc; + const struct port_queue *port_queue = &tbase->tx_params_hw.tx_port_queue[0]; + struct rte_mbuf *mbuf = NULL; // used when one need to send both an ARP and a mbuf + + for (int j = 0; j < n_pkts; j++) { + if ((out) && (out[j] >= OUT_HANDLED)) + continue; + if (unlikely((rc = write_ip6_dst_mac(tbase, mbufs[j], &ip_dst)) != SEND_MBUF)) { + if (j - first) { + ret = tbase->aux->tx_pkt_l2(tbase, mbufs + first, j - first, out); + ok += ret; + } + first = j + 1; + switch(rc) { + case SEND_ARP_ND: + // Original mbuf (packet) is stored to be sent later -> need to allocate new mbuf + ret = rte_mempool_get(tbase->l3.arp_nd_pool, (void **)&mbuf); + if (likely(ret == 0)) { + store_packet(tbase, mbufs[j]); + mbuf->port = tbase->l3.reachable_port_id; + tx_ring_cti6(tbase, tbase->l3.ctrl_plane_ring, IP6_REQ_MAC_TO_MASTER, mbuf, tbase->l3.core_id, tbase->l3.task_id, &ip_dst); + } else { + plog_err("Failed to get a mbuf from arp/nd mempool\n"); + tx_drop(mbufs[j]); + TASK_STATS_ADD_DROP_DISCARD(&tbase->aux->stats, 1); + } + break; + case SEND_MBUF_AND_ARP_ND: + // We send the mbuf and an ND - we need to allocate another mbuf for ND + ret = rte_mempool_get(tbase->l3.arp_nd_pool, (void **)&mbuf); + if (likely(ret == 0)) { + mbuf->port = tbase->l3.reachable_port_id; + tx_ring_cti6(tbase, tbase->l3.ctrl_plane_ring, IP6_REQ_MAC_TO_MASTER, mbuf, tbase->l3.core_id, tbase->l3.task_id, &ip_dst); + } else { + plog_err("Failed to get a mbuf from arp/nd mempool\n"); + // We still send the initial mbuf + } + ret = tbase->aux->tx_pkt_l2(tbase, mbufs + j, 1, out); + break; + case DROP_MBUF: + tx_drop(mbufs[j]); + TASK_STATS_ADD_DROP_DISCARD(&tbase->aux->stats, 1); + break; + } + } + } + if (n_pkts - first) { + ret = tbase->aux->tx_pkt_l2(tbase, mbufs + first, n_pkts - first, out); + ok += ret; + } + return ok; +} int tx_pkt_l3(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts, uint8_t *out) { uint32_t ip_dst; @@ -69,23 +134,23 @@ int tx_pkt_l3(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts, } first = j + 1; switch(rc) { - case SEND_ARP: + case SEND_ARP_ND: // We re-use the mbuf - no need to create a arp_mbuf and delete the existing mbuf mbufs[j]->port = tbase->l3.reachable_port_id; - if (tx_ring_cti(tbase, tbase->l3.ctrl_plane_ring, REQ_MAC_TO_CTRL, mbufs[j], tbase->l3.core_id, tbase->l3.task_id, ip_dst) == 0) - update_arp_update_time(&tbase->l3, time, 1000); + if (tx_ring_cti(tbase, tbase->l3.ctrl_plane_ring, IP4_REQ_MAC_TO_MASTER, mbufs[j], tbase->l3.core_id, tbase->l3.task_id, ip_dst) == 0) + update_arp_ndp_retransmit_timeout(&tbase->l3, time, 1000); else - update_arp_update_time(&tbase->l3, time, 100); + update_arp_ndp_retransmit_timeout(&tbase->l3, time, 100); break; - case SEND_MBUF_AND_ARP: + case SEND_MBUF_AND_ARP_ND: // We send the mbuf and an ARP - we need to allocate another mbuf for ARP - ret = rte_mempool_get(tbase->l3.arp_pool, (void **)&arp_mbuf); + ret = rte_mempool_get(tbase->l3.arp_nd_pool, (void **)&arp_mbuf); if (likely(ret == 0)) { arp_mbuf->port = tbase->l3.reachable_port_id; - if (tx_ring_cti(tbase, tbase->l3.ctrl_plane_ring, REQ_MAC_TO_CTRL, arp_mbuf, tbase->l3.core_id, tbase->l3.task_id, ip_dst) == 0) - update_arp_update_time(&tbase->l3, time, 1000); + if (tx_ring_cti(tbase, tbase->l3.ctrl_plane_ring, IP4_REQ_MAC_TO_MASTER, arp_mbuf, tbase->l3.core_id, tbase->l3.task_id, ip_dst) == 0) + update_arp_ndp_retransmit_timeout(&tbase->l3, time, 1000); else - update_arp_update_time(&tbase->l3, time, 100); + update_arp_ndp_retransmit_timeout(&tbase->l3, time, 100); } else { plog_err("Failed to get a mbuf from arp mempool\n"); // We still send the initial mbuf @@ -644,7 +709,7 @@ static inline void trace_one_tx_pkt(struct task_base *tbase, struct rte_mbuf *mb static void unset_trace(struct task_base *tbase) { if (0 == tbase->aux->task_rt_dump.n_trace) { - if (tbase->tx_pkt == tx_pkt_l3) { + if ((tbase->tx_pkt == tx_pkt_l3) || (tbase->tx_pkt == tx_pkt_ndp)){ tbase->aux->tx_pkt_l2 = tbase->aux->tx_pkt_orig; tbase->aux->tx_pkt_orig = NULL; } else { @@ -713,7 +778,7 @@ int tx_pkt_dump(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkt ret = tbase->aux->tx_pkt_orig(tbase, mbufs, n_pkts, out); if (0 == tbase->aux->task_rt_dump.n_print_tx) { - if (tbase->tx_pkt == tx_pkt_l3) { + if ((tbase->tx_pkt == tx_pkt_l3) || (tbase->tx_pkt == tx_pkt_ndp)) { tbase->aux->tx_pkt_l2 = tbase->aux->tx_pkt_orig; tbase->aux->tx_pkt_orig = NULL; } else { @@ -804,16 +869,16 @@ int tx_ctrlplane_sw(struct task_base *tbase, struct rte_mbuf **mbufs, const uint return ring_enq_no_drop(tbase->tx_params_sw.tx_rings[0], mbufs, n_pkts, tbase); } -static inline int tx_ring_all(struct task_base *tbase, struct rte_ring *ring, uint16_t command, struct rte_mbuf *mbuf, uint8_t core_id, uint8_t task_id, uint32_t ip) +static inline int tx_ring_all(struct task_base *tbase, struct rte_ring *ring, uint8_t command, struct rte_mbuf *mbuf, uint8_t core_id, uint8_t task_id, uint32_t ip) { if (tbase->aux->task_rt_dump.cur_trace) { trace_one_rx_pkt(tbase, mbuf); } - mbuf->udata64 = ((uint64_t)ip << 32) | (core_id << 16) | (task_id << 8) | command; + ctrl_ring_set_command_core_task_ip(mbuf, ((uint64_t)ip << 32) | (core_id << 16) | (task_id << 8) | command); return rte_ring_enqueue(ring, mbuf); } -int tx_ring_cti(struct task_base *tbase, struct rte_ring *ring, uint16_t command, struct rte_mbuf *mbuf, uint8_t core_id, uint8_t task_id, uint32_t ip) +int tx_ring_cti(struct task_base *tbase, struct rte_ring *ring, uint8_t command, struct rte_mbuf *mbuf, uint8_t core_id, uint8_t task_id, uint32_t ip) { plogx_dbg("\tSending command %s with ip %d.%d.%d.%d to ring %p using mbuf %p, core %d and task %d - ring size now %d\n", actions_string[command], IP4(ip), ring, mbuf, core_id, task_id, rte_ring_free_count(ring)); int ret = tx_ring_all(tbase, ring, command, mbuf, core_id, task_id, ip); @@ -825,7 +890,7 @@ int tx_ring_cti(struct task_base *tbase, struct rte_ring *ring, uint16_t command return ret; } -void tx_ring_ip(struct task_base *tbase, struct rte_ring *ring, uint16_t command, struct rte_mbuf *mbuf, uint32_t ip) +void tx_ring_ip(struct task_base *tbase, struct rte_ring *ring, uint8_t command, struct rte_mbuf *mbuf, uint32_t ip) { plogx_dbg("\tSending command %s with ip %d.%d.%d.%d to ring %p using mbuf %p - ring size now %d\n", actions_string[command], IP4(ip), ring, mbuf, rte_ring_free_count(ring)); int ret = tx_ring_all(tbase, ring, command, mbuf, 0, 0, ip); @@ -851,9 +916,9 @@ void tx_ring_route(struct task_base *tbase, struct rte_ring *ring, int add, stru { uint8_t command; if (add) - command = ROUTE_ADD_FROM_CTRL; + command = ROUTE_ADD_FROM_MASTER; else - command = ROUTE_DEL_FROM_CTRL; + command = ROUTE_DEL_FROM_MASTER; plogx_dbg("\tSending command %s to ring %p using mbuf %p - ring size now %d\n", actions_string[command], ring, mbuf, rte_ring_free_count(ring)); ctrl_ring_set_command(mbuf, command); @@ -871,48 +936,58 @@ void tx_ring_route(struct task_base *tbase, struct rte_ring *ring, int add, stru } } -void ctrl_ring_set_command(struct rte_mbuf *mbuf, uint64_t udata64) +void tx_ring_cti6(struct task_base *tbase, struct rte_ring *ring, uint8_t command, struct rte_mbuf *mbuf, uint8_t core_id, uint8_t task_id, struct ipv6_addr *ip) { - mbuf->udata64 = udata64; -} - -uint64_t ctrl_ring_get_command(struct rte_mbuf *mbuf) -{ - return mbuf->udata64; -} + int ret; + plogx_dbg("\tSending command %s with ip "IPv6_BYTES_FMT" to ring %p using mbuf %p, core %d and task %d - ring size now %d\n", actions_string[command], IPv6_BYTES(ip->bytes), ring, mbuf, core_id, task_id, rte_ring_free_count(ring)); + if (tbase->aux->task_rt_dump.cur_trace) { + trace_one_rx_pkt(tbase, mbuf); + } + ctrl_ring_set_command_core_task_ip(mbuf, (core_id << 16) | (task_id << 8) | command); + ctrl_ring_set_ipv6_addr(mbuf, ip); + ret = rte_ring_enqueue(ring, mbuf); -void ctrl_ring_set_ip(struct rte_mbuf *mbuf, uint32_t udata32) -{ - struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom)); - prox_headroom->ip = udata32; + if (unlikely(ret != 0)) { + plogx_dbg("\tFail to send command %s with ip "IPv6_BYTES_FMT" to ring %p using mbuf %p, core %d and task %d - ring size now %d\n", actions_string[command], IPv6_BYTES(ip->bytes), ring, mbuf, core_id, task_id, rte_ring_free_count(ring)); + TASK_STATS_ADD_DROP_DISCARD(&tbase->aux->stats, 1); + rte_pktmbuf_free(mbuf); + } } -uint32_t ctrl_ring_get_ip(struct rte_mbuf *mbuf) +void tx_ring_ip6(struct task_base *tbase, struct rte_ring *ring, uint8_t command, struct rte_mbuf *mbuf, struct ipv6_addr *ip) { - struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom)); - return prox_headroom->ip; -} + int ret; + plogx_dbg("\tSending command %s with ip "IPv6_BYTES_FMT" to ring %p using mbuf %p - ring size now %d\n", actions_string[command], IPv6_BYTES(ip->bytes), ring, mbuf, rte_ring_free_count(ring)); + if (tbase->aux->task_rt_dump.cur_trace) { + trace_one_rx_pkt(tbase, mbuf); + } + ctrl_ring_set_command(mbuf, command); + ctrl_ring_set_ipv6_addr(mbuf, ip); + ret = rte_ring_enqueue(ring, mbuf); -void ctrl_ring_set_gateway_ip(struct rte_mbuf *mbuf, uint32_t udata32) -{ - struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom)); - prox_headroom->gateway_ip = udata32; + if (unlikely(ret != 0)) { + plogx_dbg("\tFail to send command %s with ip "IPv6_BYTES_FMT" to ring %p using mbuf %p - ring size now %d\n", actions_string[command], IPv6_BYTES(ip->bytes), ring, mbuf, rte_ring_free_count(ring)); + TASK_STATS_ADD_DROP_DISCARD(&tbase->aux->stats, 1); + rte_pktmbuf_free(mbuf); + } } -uint32_t ctrl_ring_get_gateway_ip(struct rte_mbuf *mbuf) +void tx_ring_ip6_data(struct task_base *tbase, struct rte_ring *ring, uint8_t command, struct rte_mbuf *mbuf, struct ipv6_addr *ip, uint64_t data) { - struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom)); - return prox_headroom->gateway_ip; -} + int ret; + plogx_dbg("\tSending command %s with ip "IPv6_BYTES_FMT" to ring %p using mbuf %p - ring size now %d\n", actions_string[command], IPv6_BYTES(ip->bytes), ring, mbuf, rte_ring_free_count(ring)); + if (tbase->aux->task_rt_dump.cur_trace) { + trace_one_rx_pkt(tbase, mbuf); + } + ctrl_ring_set_command(mbuf, command); + ctrl_ring_set_ipv6_addr(mbuf, ip); + ctrl_ring_set_data(mbuf, data); + ret = rte_ring_enqueue(ring, mbuf); -void ctrl_ring_set_prefix(struct rte_mbuf *mbuf, uint32_t udata32) -{ - struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom)); - prox_headroom->prefix = udata32; -} + if (unlikely(ret != 0)) { + plogx_dbg("\tFail to send command %s with ip "IPv6_BYTES_FMT" to ring %p using mbuf %p - ring size now %d\n", actions_string[command], IPv6_BYTES(ip->bytes), ring, mbuf, rte_ring_free_count(ring)); + TASK_STATS_ADD_DROP_DISCARD(&tbase->aux->stats, 1); + rte_pktmbuf_free(mbuf); + } -uint32_t ctrl_ring_get_prefix(struct rte_mbuf *mbuf) -{ - struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom)); - return prox_headroom->prefix; } diff --git a/VNFs/DPPD-PROX/tx_pkt.h b/VNFs/DPPD-PROX/tx_pkt.h index f7443cf4..b54a2bef 100644 --- a/VNFs/DPPD-PROX/tx_pkt.h +++ b/VNFs/DPPD-PROX/tx_pkt.h @@ -1,5 +1,5 @@ /* -// Copyright (c) 2010-2017 Intel Corporation +// Copyright (c) 2010-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,15 +18,19 @@ #define _TX_PKT_H_ #include +#include +#include +#include "ip6_addr.h" struct task_base; -struct rte_mbuf; struct prox_headroom { uint64_t command; uint32_t ip; uint32_t prefix; uint32_t gateway_ip; + uint64_t data64; + struct ipv6_addr ipv6_addr; } __attribute__((packed)); void flush_queues_hw(struct task_base *tbase); @@ -88,19 +92,108 @@ uint16_t tx_try_self(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t sink. */ int tx_pkt_drop_all(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts, uint8_t *out); int tx_pkt_l3(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts, uint8_t *out); - -int tx_ring_cti(struct task_base *tbase, struct rte_ring *ring, uint16_t command, struct rte_mbuf *mbuf, uint8_t core_id, uint8_t task_id, uint32_t ip); -void tx_ring_ip(struct task_base *tbase, struct rte_ring *ring, uint16_t command, struct rte_mbuf *mbuf, uint32_t ip); +int tx_pkt_ndp(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts, uint8_t *out); + +static inline uint8_t get_command(struct rte_mbuf *mbuf) +{ + struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom)); + return prox_headroom->command & 0xFF; +} +static inline uint8_t get_task(struct rte_mbuf *mbuf) +{ + struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom)); + return (prox_headroom->command >> 8) & 0xFF; +} +static inline uint8_t get_core(struct rte_mbuf *mbuf) +{ + struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom)); + return (prox_headroom->command >> 16) & 0xFF; +} +static inline uint32_t get_ip(struct rte_mbuf *mbuf) +{ + struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom)); + return (prox_headroom->command >> 32) & 0xFFFFFFFF; +} + +static inline void ctrl_ring_set_command_core_task_ip(struct rte_mbuf *mbuf, uint64_t udata64) +{ + struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom)); + prox_headroom->command = udata64; +} + +static inline void ctrl_ring_set_command(struct rte_mbuf *mbuf, uint8_t command) +{ + struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom)); + prox_headroom->command = command; +} + +static inline void ctrl_ring_set_ip(struct rte_mbuf *mbuf, uint32_t udata32) +{ + struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom)); + prox_headroom->ip = udata32; +} + +static inline uint32_t ctrl_ring_get_ip(struct rte_mbuf *mbuf) +{ + struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom)); + return prox_headroom->ip; +} + +static inline void ctrl_ring_set_gateway_ip(struct rte_mbuf *mbuf, uint32_t udata32) +{ + struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom)); + prox_headroom->gateway_ip = udata32; +} + +static inline uint32_t ctrl_ring_get_gateway_ip(struct rte_mbuf *mbuf) +{ + struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom)); + return prox_headroom->gateway_ip; +} + +static inline void ctrl_ring_set_prefix(struct rte_mbuf *mbuf, uint32_t udata32) +{ + struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom)); + prox_headroom->prefix = udata32; +} + +static inline uint32_t ctrl_ring_get_prefix(struct rte_mbuf *mbuf) +{ + struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom)); + return prox_headroom->prefix; +} + +static inline void ctrl_ring_set_data(struct rte_mbuf *mbuf, uint64_t data) +{ + struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom)); + prox_headroom->data64 = data; +} + +static inline uint64_t ctrl_ring_get_data(struct rte_mbuf *mbuf) +{ + struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom)); + return prox_headroom->data64; +} + +static inline void ctrl_ring_set_ipv6_addr(struct rte_mbuf *mbuf, struct ipv6_addr *ip) +{ + struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom)); + memcpy(&prox_headroom->ipv6_addr, ip, sizeof(struct ipv6_addr)); +} + +static inline struct ipv6_addr *ctrl_ring_get_ipv6_addr(struct rte_mbuf *mbuf) +{ + struct prox_headroom *prox_headroom = (struct prox_headroom *)(rte_pktmbuf_mtod(mbuf, uint8_t*) - sizeof(struct prox_headroom)); + return &prox_headroom->ipv6_addr; +} + +int tx_ring_cti(struct task_base *tbase, struct rte_ring *ring, uint8_t command, struct rte_mbuf *mbuf, uint8_t core_id, uint8_t task_id, uint32_t ip); +void tx_ring_cti6(struct task_base *tbase, struct rte_ring *ring, uint8_t command, struct rte_mbuf *mbuf, uint8_t core_id, uint8_t task_id, struct ipv6_addr *ip); +void tx_ring_ip(struct task_base *tbase, struct rte_ring *ring, uint8_t command, struct rte_mbuf *mbuf, uint32_t ip); +void tx_ring_ip6(struct task_base *tbase, struct rte_ring *ring, uint8_t command, struct rte_mbuf *mbuf, struct ipv6_addr *ip); +void tx_ring_ip6_data(struct task_base *tbase, struct rte_ring *ring, uint8_t command, struct rte_mbuf *mbuf, struct ipv6_addr *ip, uint64_t data); void tx_ring(struct task_base *tbase, struct rte_ring *ring, uint16_t command, struct rte_mbuf *mbuf); - -void ctrl_ring_set_command(struct rte_mbuf *mbuf, uint64_t udata64); -uint64_t ctrl_ring_get_command(struct rte_mbuf *mbuf); -void ctrl_ring_set_ip(struct rte_mbuf *mbuf, uint32_t udata32); -uint32_t ctrl_ring_get_ip(struct rte_mbuf *mbuf); -void ctrl_ring_set_gateway_ip(struct rte_mbuf *mbuf, uint32_t udata32); -uint32_t ctrl_ring_get_gateway_ip(struct rte_mbuf *mbuf); -void ctrl_ring_set_prefix(struct rte_mbuf *mbuf, uint32_t udata32); -uint32_t ctrl_ring_get_prefix(struct rte_mbuf *mbuf); void tx_ring_route(struct task_base *tbase, struct rte_ring *ring, int add, struct rte_mbuf *mbuf, uint32_t ip, uint32_t gateway_ip, uint32_t prefix); +static void store_packet(struct task_base *tbase, struct rte_mbuf *mbufs); #endif /* _TX_PKT_H_ */ -- cgit 1.2.3-korg