summaryrefslogtreecommitdiffstats
path: root/VNFs/DPPD-PROX/handle_lb_qinq.c
diff options
context:
space:
mode:
authorDeepak S <deepak.s@linux.intel.com>2017-07-13 21:26:50 -0700
committerDeepak S <deepak.s@linux.intel.com>2017-07-14 04:58:47 -0700
commit7286b2518ec8e4398b512ce95def9166a7af2e4a (patch)
treec93ef65d9e73e8893ccecb720152e16aae96a8b6 /VNFs/DPPD-PROX/handle_lb_qinq.c
parentadcb79da90176b27224eeb1d00aa0e611ef85a9b (diff)
Adding PROX(Packet pROcessing eXecution engine) VNF to sampleVNF
JIRA: SAMPLEVNF-55 PROX is a DPDK-based application implementing Telco use-cases such as a simplified BRAS/BNG, light-weight AFTR... It also allows configuring finer grained network functions like QoS, Routing, load-balancing... (We are moving PROX version v039 to sampleVNF https://01.org/intel-data-plane-performance-demonstrators/prox-overview) Change-Id: Ia3cb02cf0e49ac5596e922c197ff7e010293d033 Signed-off-by: Deepak S <deepak.s@linux.intel.com>
Diffstat (limited to 'VNFs/DPPD-PROX/handle_lb_qinq.c')
-rw-r--r--VNFs/DPPD-PROX/handle_lb_qinq.c377
1 files changed, 377 insertions, 0 deletions
diff --git a/VNFs/DPPD-PROX/handle_lb_qinq.c b/VNFs/DPPD-PROX/handle_lb_qinq.c
new file mode 100644
index 00000000..d58703c5
--- /dev/null
+++ b/VNFs/DPPD-PROX/handle_lb_qinq.c
@@ -0,0 +1,377 @@
+/*
+// Copyright (c) 2010-2017 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include <string.h>
+
+#include <rte_mbuf.h>
+#include <rte_ip.h>
+#include <rte_byteorder.h>
+#include <rte_version.h>
+
+#include "prox_malloc.h"
+#include "task_base.h"
+#include "tx_pkt.h"
+#include "rx_pkt.h"
+#include "etypes.h"
+#include "log.h"
+#include "quit.h"
+#include "qinq.h"
+#include "lconf.h"
+#include "prefetch.h"
+#include "defines.h"
+#include "prox_cfg.h"
+#include "hash_utils.h"
+#include "handle_lb_net.h"
+#include "toeplitz.h"
+
+#if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
+#define RTE_CACHE_LINE_SIZE CACHE_LINE_SIZE
+#endif
+
+/* Load balancing based on one byte, figures out what type of packet
+ is passed and depending on the type, pass the packet to the correct
+ worker thread. If an unsupported packet type is used, the packet is
+ simply dropped. This Load balancer can only handling QinQ packets
+ (i.e. packets comming from the vCPE). */
+int handle_lb_qinq_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts);
+int handle_lb_qinq_bulk_set_port(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts);
+
+struct task_lb_qinq {
+ struct task_base base;
+ uint8_t *worker_table;
+ uint8_t bit_mask;
+ uint8_t protocols_mask;
+ uint8_t nb_worker_threads;
+ uint16_t qinq_tag;
+};
+
+static void init_task_lb_qinq(struct task_base *tbase, struct task_args *targ)
+{
+ struct task_lb_qinq *task = (struct task_lb_qinq *)tbase;
+ const int socket_id = rte_lcore_to_socket_id(targ->lconf->id);
+
+ task->qinq_tag = targ->qinq_tag;
+ task->nb_worker_threads = targ->nb_worker_threads;
+ task->bit_mask = rte_is_power_of_2(targ->nb_worker_threads) ? targ->nb_worker_threads - 1 : 0xff;
+
+ /* The load distributor is sending to a set of cores. These
+ cores are responsible for handling a set of flows
+ identified by a qinq tag. The load distributor identifies
+ the flows and forwards them to the appropriate worker. The
+ mapping from flow to worker is stored within the
+ work_table. Build the worker_table by asking each worker
+ which flows are handled. */
+
+ task->worker_table = prox_zmalloc(0x1000000, socket_id);
+ for (int i = 0; i < targ->nb_worker_threads; ++i) {
+ struct core_task ct = targ->core_task_set[0].core_task[i];
+ struct task_args *t = core_targ_get(ct.core, ct.task);
+
+ PROX_PANIC(t->task_init->flow_iter.beg == NULL,
+ "Load distributor can't find flows owned by destination worker %d\n", i);
+
+ struct flow_iter *it = &t->task_init->flow_iter;
+
+ int cnt = 0;
+ for (it->beg(it, t); !it->is_end(it, t); it->next(it, t)) {
+ uint16_t svlan = it->get_svlan(it, t);
+ uint16_t cvlan = it->get_cvlan(it, t);
+
+ task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)] = i;
+ }
+
+ }
+
+ /* Check which protocols we are allowed to send to worker tasks */
+ for (int i = 0; i < MAX_PROTOCOLS; ++i) {
+ int is_active = !!targ->core_task_set[i].n_elems;
+ task->protocols_mask |= is_active << i;
+ }
+ plog_info("\t\ttask_lb_qinq protocols_mask = 0x%x\n", task->protocols_mask);
+
+ if (targ->task_init->flag_features & TASK_FEATURE_LUT_QINQ_RSS)
+ tbase->flags |= BASE_FLAG_LUT_QINQ_RSS;
+ if (targ->task_init->flag_features & TASK_FEATURE_LUT_QINQ_HASH)
+ tbase->flags |= BASE_FLAG_LUT_QINQ_HASH;
+ plog_info("\t\ttask_lb_qinq flags = 0x%x\n", tbase->flags);
+}
+
+static struct task_init task_init_lb_qinq = {
+ .mode_str = "lbqinq",
+ .init = init_task_lb_qinq,
+ .handle = handle_lb_qinq_bulk,
+ .size = sizeof(struct task_lb_qinq)
+};
+
+/*
+ Add correct port id to mbufs coming from a DPDK ring port in the loadbalancer.
+ For the split-bng using DPDK rings between the vSwitch and the VMs
+ we need to know the port from which a packet was received.
+ The ring PMD in dpdk does not update the port field in the mbuf
+ and thus we have no control over the port numbers that are being used.
+ This submode allows the loadbalancer to set the port number on which it
+ received the mbuf.
+*/
+static struct task_init task_init_lb_qinq_set_port = {
+ .mode_str = "lbqinq",
+ .sub_mode_str = "lut_qinq_set_port",
+ .init = init_task_lb_qinq,
+ .handle = handle_lb_qinq_bulk_set_port,
+ .size = sizeof(struct task_lb_qinq)
+};
+
+/*
+ Load Balance on Hash of combination of cvlan and svlan
+*/
+static struct task_init task_init_lb_qinq_hash_friend = {
+ .mode_str = "lbqinq",
+ .sub_mode_str ="lut_qinq_hash_friend",
+ .init = init_task_lb_qinq,
+ .handle = handle_lb_qinq_bulk,
+ .flag_features = TASK_FEATURE_LUT_QINQ_HASH,
+ .size = sizeof(struct task_lb_qinq)
+};
+
+/*
+ Load Balance on rss of combination of cvlan and svlan.
+ This could be used to compare with HW implementations.
+*/
+static struct task_init task_init_lb_qinq_rss_friend = {
+ .mode_str = "lbqinq",
+ .sub_mode_str ="lut_qinq_rss_friend",
+ .init = init_task_lb_qinq,
+ .handle = handle_lb_qinq_bulk,
+ .flag_features = TASK_FEATURE_LUT_QINQ_RSS,
+ .size = sizeof(struct task_lb_qinq)
+};
+
+__attribute__((constructor)) static void reg_task_lb_qinq(void)
+{
+ reg_task(&task_init_lb_qinq);
+ reg_task(&task_init_lb_qinq_hash_friend);
+ reg_task(&task_init_lb_qinq_rss_friend);
+ reg_task(&task_init_lb_qinq_set_port);
+}
+
+static inline uint8_t handle_lb_qinq(struct task_lb_qinq *task, struct rte_mbuf *mbuf);
+
+int handle_lb_qinq_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
+{
+ struct task_lb_qinq *task = (struct task_lb_qinq *)tbase;
+ uint8_t out[MAX_PKT_BURST];
+ uint16_t j;
+
+ prefetch_first(mbufs, n_pkts);
+
+ for (j = 0; j + PREFETCH_OFFSET < n_pkts; ++j) {
+#ifdef PROX_PREFETCH_OFFSET
+ PREFETCH0(mbufs[j + PREFETCH_OFFSET]);
+ PREFETCH0(rte_pktmbuf_mtod(mbufs[j + PREFETCH_OFFSET - 1], void *));
+#endif
+ out[j] = handle_lb_qinq(task, mbufs[j]);
+ }
+#ifdef PROX_PREFETCH_OFFSET
+ PREFETCH0(rte_pktmbuf_mtod(mbufs[n_pkts - 1], void *));
+ for (; j < n_pkts; ++j) {
+ out[j] = handle_lb_qinq(task, mbufs[j]);
+ }
+#endif
+
+ return task->base.tx_pkt(&task->base, mbufs, n_pkts, out);
+}
+
+int handle_lb_qinq_bulk_set_port(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
+{
+ struct task_lb_qinq *task = (struct task_lb_qinq *)tbase;
+ uint8_t out[MAX_PKT_BURST];
+ uint16_t j;
+#if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
+ uint32_t port_id = mbufs[0]->pkt.in_port;
+#else
+ uint32_t port_id = mbufs[0]->port;
+#endif
+
+ if (tbase->rx_pkt == rx_pkt_hw) {
+ port_id = tbase->rx_params_hw.last_read_portid + tbase->rx_params_hw.nb_rxports;
+ port_id = ( port_id - 1 ) % tbase->rx_params_hw.nb_rxports;
+ port_id = tbase->rx_params_hw.rx_pq[port_id].port;
+ } else if (tbase->rx_pkt == rx_pkt_hw1) {
+ port_id = tbase->rx_params_hw1.rx_pq.port;
+ }
+
+ prefetch_first(mbufs, n_pkts);
+
+ for (j = 0; j + PREFETCH_OFFSET < n_pkts; ++j) {
+#ifdef PROX_PREFETCH_OFFSET
+ PREFETCH0(mbufs[j + PREFETCH_OFFSET]);
+ PREFETCH0(rte_pktmbuf_mtod(mbufs[j + PREFETCH_OFFSET - 1], void *));
+#endif
+#if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
+ mbufs[j]->pkt.in_port = port_id;
+#else
+ mbufs[j]->port = port_id;
+#endif
+ out[j] = handle_lb_qinq(task, mbufs[j]);
+ }
+#ifdef PROX_PREFETCH_OFFSET
+ PREFETCH0(rte_pktmbuf_mtod(mbufs[n_pkts - 1], void *));
+ for (; j < n_pkts; ++j) {
+#if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
+ mbufs[j]->pkt.in_port = port_id;
+#else
+ mbufs[j]->port = port_id;
+#endif
+ out[j] = handle_lb_qinq(task, mbufs[j]);
+ }
+#endif
+
+ return task->base.tx_pkt(&task->base, mbufs, n_pkts, out);
+}
+
+struct qinq_packet {
+ struct qinq_hdr qinq_hdr;
+ union {
+ struct ipv4_hdr ipv4_hdr;
+ struct ipv6_hdr ipv6_hdr;
+ };
+} __attribute__((packed));
+
+struct qinq_packet_data {
+ struct ether_addr d_addr;
+ struct ether_addr s_addr;
+ uint64_t qinq;
+} __attribute__((packed));
+
+struct ether_packet {
+ struct ether_hdr ether_hdr;
+ union {
+ struct ipv4_hdr ipv4_hdr;
+ struct ipv6_hdr ipv6_hdr;
+ };
+} __attribute__((packed));
+
+struct cpe_packet {
+ union {
+ struct qinq_packet qp;
+ struct ether_packet ep;
+ struct qinq_packet_data qd;
+ };
+};
+
+static inline uint8_t get_worker(struct task_lb_qinq *task, struct cpe_packet *packet)
+{
+ uint8_t worker = 0;
+ if (((struct task_base *)task)->flags & BASE_FLAG_LUT_QINQ_HASH) {
+ // Load Balance on Hash of combination of cvlan and svlan
+ uint64_t qinq_net = packet->qd.qinq;
+ qinq_net = qinq_net & 0xFF0F0000FF0F0000; // Mask Proto and QoS bits
+ if (task->bit_mask != 0xff) {
+ worker = hash_crc32(&qinq_net,8,0) & task->bit_mask;
+ }
+ else {
+ worker = hash_crc32(&qinq_net,8,0) % task->nb_worker_threads;
+ }
+ plogx_dbg("Sending packet svlan=%x, cvlan=%x, pseudo_qinq=%lx to worker %d\n", rte_bswap16(0xFF0F & packet->qp.qinq_hdr.svlan.vlan_tci), rte_bswap16(0xFF0F & packet->qp.qinq_hdr.cvlan.vlan_tci), qinq_net, worker);
+ } else if (((struct task_base *)task)->flags & BASE_FLAG_LUT_QINQ_RSS){
+ // Load Balance on rss of combination of cvlan and svlan
+ uint32_t qinq = (packet->qp.qinq_hdr.cvlan.vlan_tci & 0xFF0F) << 16;
+ uint32_t rss = toeplitz_hash((uint8_t *)&qinq, 4);
+ if (task->bit_mask != 0xff) {
+ worker = rss & task->bit_mask;
+ } else {
+ worker = (0x1ff & rss) % task->nb_worker_threads;
+ }
+ plogx_dbg("Sending packet svlan=%x, cvlan=%x, rss_input=%x, rss=%x to worker %d\n", rte_bswap16(0xFF0F & packet->qp.qinq_hdr.svlan.vlan_tci), rte_bswap16(0xFF0F & packet->qp.qinq_hdr.cvlan.vlan_tci), qinq, rss, worker);
+ } else {
+ uint16_t svlan = packet->qp.qinq_hdr.svlan.vlan_tci;
+ uint16_t cvlan = packet->qp.qinq_hdr.cvlan.vlan_tci;
+ prefetch_nta(&task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)]);
+ worker = task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)];
+
+ const size_t pos = offsetof(struct cpe_packet, qp.qinq_hdr.cvlan.vlan_tci);
+ plogx_dbg("qinq = %u, worker = %u, pos = %lu\n", rte_be_to_cpu_16(cvlan), worker, pos);
+ }
+ return worker;
+}
+
+static inline uint8_t handle_lb_qinq(struct task_lb_qinq *task, struct rte_mbuf *mbuf)
+{
+ struct cpe_packet *packet = rte_pktmbuf_mtod(mbuf, struct cpe_packet*);
+ if (packet->ep.ether_hdr.ether_type == ETYPE_IPv4) {
+ if (unlikely((packet->ep.ipv4_hdr.version_ihl >> 4) != 4)) {
+ plogx_err("Invalid Version %u for ETYPE_IPv4\n", packet->ep.ipv4_hdr.version_ihl);
+ return OUT_DISCARD;
+ }
+ /* use 24 bits from the IP, clients are from the 10.0.0.0/8 network */
+ const uint32_t tmp = rte_bswap32(packet->ep.ipv4_hdr.src_addr) & 0x00FFFFFF;
+ const uint32_t svlan = rte_bswap16(tmp >> 12);
+ const uint32_t cvlan = rte_bswap16(tmp & 0x0FFF);
+ prefetch_nta(&task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)]);
+ uint8_t worker = task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)];
+ return worker + IPV4 * task->nb_worker_threads;
+ }
+ else if (unlikely(packet->qp.qinq_hdr.svlan.eth_proto != task->qinq_tag)) {
+ /* might receive LLDP from the L2 switch... */
+ if (packet->qp.qinq_hdr.svlan.eth_proto != ETYPE_LLDP) {
+ plogdx_err(mbuf, "Invalid packet for LB in QinQ mode\n");
+ }
+ return OUT_DISCARD;
+ }
+
+ uint8_t worker = 0;
+ uint8_t proto = 0xFF;
+ switch (packet->qp.qinq_hdr.ether_type) {
+ case ETYPE_IPv4: {
+ if (unlikely((packet->qp.ipv4_hdr.version_ihl >> 4) != 4)) {
+ plogx_err("Invalid Version %u for ETYPE_IPv4\n", packet->qp.ipv4_hdr.version_ihl);
+ return OUT_DISCARD;
+ }
+ worker = get_worker(task, packet);
+ proto = IPV4;
+ break;
+ }
+ case ETYPE_IPv6: {
+ if (unlikely((packet->qp.ipv4_hdr.version_ihl >> 4) != 6)) {
+ plogx_err("Invalid Version %u for ETYPE_IPv6\n", packet->qp.ipv4_hdr.version_ihl);
+ return OUT_DISCARD;
+ }
+ /* Use IP Destination when IPV6 QinQ */
+ if (task->bit_mask != 0xff) {
+ worker = ((uint8_t *)packet)[61] & task->bit_mask;
+ }
+ else {
+ worker = ((uint8_t *)packet)[61] % task->nb_worker_threads;
+ }
+ proto = IPV6;
+ break;
+ }
+ case ETYPE_ARP: {
+ // We can only send to ARP ring if it exists
+ if (0 != (task->protocols_mask & (1 << ARP))) {
+ proto = ARP;
+ } else {
+ proto = IPV4;
+ }
+ worker = get_worker(task, packet);
+ break;
+ }
+ default:
+ plogx_warn("Error in ETYPE_8021ad: ether_type = %#06x\n", packet->qp.qinq_hdr.ether_type);
+ return OUT_DISCARD;
+ }
+
+ return worker + proto * task->nb_worker_threads;
+}