Fix extrapolation used in latency measurements

When doing latency measurements PROX takes into account the generation or reception of a bulk of packets. For instance, if PROX receives at time T 4 packets, it knows that the first packet was received by te NIC before T (the time to receive the other 3 packets, as they were received at maximum link speed). So the latency data is decreased by the minimum time to receive those 3 packets. For this PROX was using a default link speed of 10Gbps. This is wrong for 1Gbps and 40Gbps networks, and was causing for instance issues on 40 Gbps networks as extrapolating too much, resulting in either too low latencies or negative numbers (visible as very high latencies). Change-Id: I4e0f02e8383dd8d168ac50ecae37a05510ad08bc Signed-off-by: Xavier Simonart <xavier.simonart@intel.com>
author: Xavier Simonart <xavier.simonart@intel.com> 2018-01-08 11:21:44 +0100
committer: Xavier Simonart <xavier.simonart@intel.com> 2018-01-24 14:14:18 +0100
commit: 442501d625b6d05f38267d442fd4e42f6cebef7d (patch)
tree: b3193066faf1255e1c9a34c4f0ebe5a05ec9adb4 /VNFs/DPPD-PROX
parent: deab1ee8197298bd7cf30d259a28206841d59383 (diff)
3 files changed, 48 insertions, 11 deletions
diff --git a/VNFs/DPPD-PROX/cmd_parser.c b/VNFs/DPPD-PROX/cmd_parser.c
index 18a4f5fc..0506fc52 100644
--- a/VNFs/DPPD-PROX/cmd_parser.c
+++ b/VNFs/DPPD-PROX/cmd_parser.c
@@ -545,8 +545,8 @@ static int parse_cmd_speed(const char *str, struct input *input)
 		if ((!task_is_mode_and_submode(lcore_id, task_id, "gen", "")) && (!task_is_mode_and_submode(lcore_id, task_id, "gen", "l3"))) {
 			plog_err("Core %u task %u is not generating packets\n", lcore_id, task_id);
 		}
-		else if (speed > 400.0f || speed < 0.0f) {
-			plog_err("Speed out of range (must be betweeen 0%% and 100%%)\n");
+		else if (speed > 1000.0f || speed < 0.0f) {	// Up to 100 Gbps
+			plog_err("Speed out of range (must be betweeen 0%% and 1000%%)\n");
 		}
 		else {
 			struct task_base *tbase = lcore_cfg[lcore_id].tasks_all[task_id];
@@ -579,8 +579,8 @@ static int parse_cmd_speed_byte(const char *str, struct input *input)
 			if ((!task_is_mode_and_submode(lcore_id, task_id, "gen", "")) && (!task_is_mode_and_submode(lcore_id, task_id, "gen", "l3"))) {
 				plog_err("Core %u task %u is not generating packets\n", lcore_id, task_id);
 			}
-			else if (bps > 1250000000) {
-				plog_err("Speed out of range (must be <= 1250000000)\n");
+			else if (bps > 12500000000) {	// Up to 100Gbps
+				plog_err("Speed out of range (must be <= 12500000000)\n");
 			}
 			else {
 				struct task_base *tbase = lcore_cfg[lcore_id].tasks_all[task_id];
diff --git a/VNFs/DPPD-PROX/handle_gen.c b/VNFs/DPPD-PROX/handle_gen.c
index c48b4c13..0f70ee6b 100644
--- a/VNFs/DPPD-PROX/handle_gen.c
+++ b/VNFs/DPPD-PROX/handle_gen.c
@@ -122,6 +122,7 @@ struct task_gen {
 	struct ether_addr  src_mac;
 	uint8_t flags;
 	uint8_t cksum_offload;
+	struct prox_port_cfg *port;
 } __rte_cache_aligned;
 
 static inline uint8_t ipv4_get_hdr_len(struct ipv4_hdr *ip)
@@ -1144,6 +1145,12 @@ static void start(struct task_base *tbase)
 	if (tbase->l3.tmaster) {
 		register_all_ip_to_ctrl_plane(task);
 	}
+	if (task->port) {
+		// task->port->link->speed reports the link speed in Mbps e.g. 40k for a 40 Gbps NIC
+		// task->link_speed reported link speed in Bytes per sec.
+		task->link_speed = task->port->link_speed * 125000L;
+		plog_info("\tGenerating at %ld Mbps\n", 8 * task->link_speed / 1000000);
+	}
 	/* TODO
 	   Handle the case when two tasks transmit to the same port
 	   and one of them is stopped. In that case ARP (requests or replies)
@@ -1190,9 +1197,16 @@ static void init_task_gen(struct task_base *tbase, struct task_args *targ)
 	task->sig = targ->sig;
 	task->new_rate_bps = targ->rate_bps;
 
+	/*
+	 * For tokens, use 10 Gbps as base rate
+	 * Scripts can then use speed command, with speed=100 as 10 Gbps and speed=400 as 40 Gbps
+	 * Script can query prox "port info" command to find out the port link speed to know
+	 * at which rate to start. Note that virtio running on OVS returns 10 Gbps, so a script has
+	 * probably also to check the driver (as returned by the same "port info" command.
+	 */
 	struct token_time_cfg tt_cfg = token_time_cfg_create(1250000000, rte_get_tsc_hz(), -1);
-
 	token_time_init(&task->token_time, &tt_cfg);
+
 	init_task_gen_seeds(task);
 
 	task->min_bulk_size = targ->min_bulk_size;
@@ -1211,8 +1225,6 @@ static void init_task_gen(struct task_base *tbase, struct task_args *targ)
 
 	task->generator_id = targ->generator_id;
 	task->link_speed = UINT64_MAX;
-	if (targ->nb_txrings == 0 && targ->nb_txports == 1)
-		task->link_speed = 1250000000;
 
 	if (!strcmp(targ->pcap_file, "")) {
 		plog_info("\tUsing inline definition of a packet\n");
@@ -1237,6 +1249,7 @@ static void init_task_gen(struct task_base *tbase, struct task_args *targ)
 	struct prox_port_cfg *port = find_reachable_port(targ);
 	if (port) {
 		task->cksum_offload = port->capabilities.tx_offload_cksum;
+		task->port = port;
 	}
 }
 
diff --git a/VNFs/DPPD-PROX/handle_lat.c b/VNFs/DPPD-PROX/handle_lat.c
index 95ebcc73..b50f9504 100644
--- a/VNFs/DPPD-PROX/handle_lat.c
+++ b/VNFs/DPPD-PROX/handle_lat.c
@@ -32,6 +32,7 @@
 #include "quit.h"
 #include "eld.h"
 #include "prox_shared.h"
+#include "prox_port_cfg.h"
 
 #define DEFAULT_BUCKET_SIZE	10
 
@@ -105,8 +106,10 @@ struct task_lat {
 	uint32_t generator_count;
 	struct early_loss_detect *eld;
 	struct rx_pkt_meta_data *rx_pkt_meta;
+	uint64_t link_speed;
 	FILE *fp_rx;
 	FILE *fp_tx;
+	struct prox_port_cfg *port;
 };
 
 static uint32_t abs_diff(uint32_t a, uint32_t b)
@@ -375,9 +378,9 @@ static uint32_t task_lat_early_loss_detect(struct task_lat *task, struct unique_
 	return early_loss_detect_add(eld, packet_index);
 }
 
-static uint64_t tsc_extrapolate_backward(uint64_t tsc_from, uint64_t bytes, uint64_t tsc_minimum)
+static uint64_t tsc_extrapolate_backward(uint64_t link_speed, uint64_t tsc_from, uint64_t bytes, uint64_t tsc_minimum)
 {
-	uint64_t tsc = tsc_from - rte_get_tsc_hz()*bytes/1250000000;
+	uint64_t tsc = tsc_from - (rte_get_tsc_hz()*bytes)/link_speed;
 	if (likely(tsc > tsc_minimum))
 		return tsc;
 	else
@@ -495,7 +498,7 @@ static int handle_lat_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uin
 		bytes_total_in_bulk += mbuf_wire_size(mbufs[flipped]);
 	}
 
-	pkt_rx_time = tsc_extrapolate_backward(rx_tsc, task->rx_pkt_meta[0].bytes_after_in_bulk, task->last_pkts_tsc) >> LATENCY_ACCURACY;
+	pkt_rx_time = tsc_extrapolate_backward(task->link_speed, rx_tsc, task->rx_pkt_meta[0].bytes_after_in_bulk, task->last_pkts_tsc) >> LATENCY_ACCURACY;
 	if ((uint32_t)((task->begin >> LATENCY_ACCURACY)) > pkt_rx_time) {
 		// Extrapolation went up to BEFORE begin => packets were stuck in the NIC but we were not seeing them
 		rx_time_err = pkt_rx_time - (uint32_t)(task->last_pkts_tsc >> LATENCY_ACCURACY);
@@ -510,7 +513,7 @@ static int handle_lat_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uin
 		struct rx_pkt_meta_data *rx_pkt_meta = &task->rx_pkt_meta[j];
 		uint8_t *hdr = rx_pkt_meta->hdr;
 
-		pkt_rx_time = tsc_extrapolate_backward(rx_tsc, rx_pkt_meta->bytes_after_in_bulk, task->last_pkts_tsc) >> LATENCY_ACCURACY;
+		pkt_rx_time = tsc_extrapolate_backward(task->link_speed, rx_tsc, rx_pkt_meta->bytes_after_in_bulk, task->last_pkts_tsc) >> LATENCY_ACCURACY;
 		pkt_tx_time = rx_pkt_meta->pkt_tx_time;
 
 		if (task->unique_id_pos) {
@@ -604,6 +607,18 @@ void task_lat_set_accuracy_limit(struct task_lat *task, uint32_t accuracy_limit_
 	task->limit = nsec_to_tsc(accuracy_limit_nsec);
 }
 
+static void lat_start(struct task_base *tbase)
+{
+	struct task_lat *task = (struct task_lat *)tbase;
+
+	if (task->port) {
+		// task->port->link->speed reports the link speed in Mbps e.g. 40k for a 40 Gbps NIC
+		// task->link_speed reported link speed in Bytes per sec.
+		task->link_speed = task->port->link_speed * 125000L;
+		plog_info("\tReceiving at %ld Mbps\n", 8 * task->link_speed / 1000000);
+	}
+}
+
 static void init_task_lat(struct task_base *tbase, struct task_args *targ)
 {
 	struct task_lat *task = (struct task_lat *)tbase;
@@ -636,12 +651,21 @@ static void init_task_lat(struct task_base *tbase, struct task_args *targ)
 	task_lat_set_accuracy_limit(task, targ->accuracy_limit_nsec);
 	task->rx_pkt_meta = prox_zmalloc(MAX_RX_PKT_ALL * sizeof(*task->rx_pkt_meta), socket_id);
 	PROX_PANIC(task->rx_pkt_meta == NULL, "unable to allocate memory to store RX packet meta data");
+
+	task->link_speed = UINT64_MAX;
+	if (targ->nb_rxports) {
+		// task->port structure is only used while starting handle_lat to get the link_speed.
+		// link_speed can not be quiried at init as the port has not been initialized yet.
+		struct prox_port_cfg *port = &prox_port_cfg[targ->rx_port_queue[0].port];
+		task->port = port;
+	}
 }
 
 static struct task_init task_init_lat = {
 	.mode_str = "lat",
 	.init = init_task_lat,
 	.handle = handle_lat_bulk,
+	.start = lat_start,
 	.stop = lat_stop,
 	.flag_features = TASK_FEATURE_TSC_RX | TASK_FEATURE_RX_ALL | TASK_FEATURE_ZERO_RX | TASK_FEATURE_NEVER_DISCARDS,
 	.size = sizeof(struct task_lat)
author	Xavier Simonart <xavier.simonart@intel.com>	2018-01-08 11:21:44 +0100
committer	Xavier Simonart <xavier.simonart@intel.com>	2018-01-24 14:14:18 +0100
commit	442501d625b6d05f38267d442fd4e42f6cebef7d (patch)
tree	b3193066faf1255e1c9a34c4f0ebe5a05ec9adb4 /VNFs/DPPD-PROX
parent	deab1ee8197298bd7cf30d259a28206841d59383 (diff)