1 files changed, 911 insertions, 262 deletions
diff --git a/VNFs/DPPD-PROX/handle_gen.c b/VNFs/DPPD-PROX/handle_gen.c
index 89dbe9e4..2c8a65c7 100644
--- a/VNFs/DPPD-PROX/handle_gen.c
+++ b/VNFs/DPPD-PROX/handle_gen.c
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2010-2017 Intel Corporation
+// Copyright (c) 2010-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -14,6 +14,11 @@
 // limitations under the License.
 */
 
+#include <rte_common.h>
+#ifndef __rte_cache_aligned
+#include <rte_memory.h>
+#endif
+
 #include <rte_mbuf.h>
 #include <pcap.h>
 #include <string.h>
@@ -22,7 +27,9 @@
 #include <rte_version.h>
 #include <rte_byteorder.h>
 #include <rte_ether.h>
+#include <rte_hash.h>
 #include <rte_hash_crc.h>
+#include <rte_malloc.h>
 
 #include "prox_shared.h"
 #include "random.h"
@@ -47,20 +54,36 @@
 #include "arp.h"
 #include "tx_pkt.h"
 #include "handle_master.h"
+#include "defines.h"
+#include "prox_ipv6.h"
+#include "handle_lb_5tuple.h"
 
 struct pkt_template {
 	uint16_t len;
 	uint16_t l2_len;
 	uint16_t l3_len;
-	uint8_t  buf[ETHER_MAX_LEN];
+	uint8_t  *buf;
 };
 
-#define MAX_TEMPLATE_INDEX	65536
-#define TEMPLATE_INDEX_MASK	(MAX_TEMPLATE_INDEX - 1)
-#define MBUF_ARP		MAX_TEMPLATE_INDEX
+#define MAX_STORE_PKT_SIZE	2048
+
+struct packet {
+	unsigned int len;
+	unsigned char buf[MAX_STORE_PKT_SIZE];
+};
 
 #define IP4(x) x & 0xff, (x >> 8) & 0xff, (x >> 16) & 0xff, x >> 24
 
+#define DO_PANIC	1
+#define DO_NOT_PANIC	0
+
+#define FROM_PCAP	1
+#define NOT_FROM_PCAP	0
+
+#define MAX_RANGES	64
+
+#define TASK_OVERWRITE_SRC_MAC_WITH_PORT_MAC 1
+
 static void pkt_template_init_mbuf(struct pkt_template *pkt_template, struct rte_mbuf *mbuf, uint8_t *pkt)
 {
 	const uint32_t pkt_size = pkt_template->len;
@@ -81,12 +104,16 @@ struct task_gen_pcap {
 	uint32_t n_pkts;
 	uint64_t last_tsc;
 	uint64_t *proto_tsc;
+	uint32_t socket_id;
+};
+
+struct flows {
+	uint32_t packet_id;
 };
 
 struct task_gen {
 	struct task_base base;
 	uint64_t hz;
-	uint64_t link_speed;
 	struct token_time token_time;
 	struct local_mbuf local_mbuf;
 	struct pkt_template *pkt_template; /* packet templates used at runtime */
@@ -95,16 +122,22 @@ struct task_gen {
 	uint64_t new_rate_bps;
 	uint64_t pkt_queue_index;
 	uint32_t n_pkts; /* number of packets in pcap */
+	uint32_t orig_n_pkts; /* number of packets in pcap */
 	uint32_t pkt_idx; /* current packet from pcap */
 	uint32_t pkt_count; /* how many pakets to generate */
+	uint32_t max_frame_size;
 	uint32_t runtime_flags;
 	uint16_t lat_pos;
 	uint16_t packet_id_pos;
 	uint16_t accur_pos;
 	uint16_t sig_pos;
+	uint16_t flow_id_pos;
+	uint16_t packet_id_in_flow_pos;
 	uint32_t sig;
+	uint32_t socket_id;
 	uint8_t generator_id;
 	uint8_t n_rands; /* number of randoms */
+	uint8_t n_ranges; /* number of ranges */
 	uint8_t min_bulk_size;
 	uint8_t max_bulk_size;
 	uint8_t lat_enabled;
@@ -116,20 +149,37 @@ struct task_gen {
 		uint16_t rand_offset; /* each random has an offset*/
 		uint8_t rand_len; /* # bytes to take from random (no bias introduced) */
 	} rand[64];
-	uint64_t accur[64];
+	struct range ranges[MAX_RANGES];
+	uint64_t accur[ACCURACY_WINDOW];
 	uint64_t pkt_tsc_offset[64];
 	struct pkt_template *pkt_template_orig; /* packet templates (from inline or from pcap) */
-	struct ether_addr  src_mac;
+	prox_rte_ether_addr  src_mac;
 	uint8_t flags;
 	uint8_t cksum_offload;
 	struct prox_port_cfg *port;
+	uint64_t *bytes_to_tsc;
+	uint32_t imix_pkt_sizes[MAX_IMIX_PKTS];
+	uint32_t imix_nb_pkts;
+	uint32_t new_imix_nb_pkts;
+	uint32_t store_pkt_id;
+	uint32_t store_msk;
+	struct packet *store_buf;
+	FILE *fp;
+	struct rte_hash *flow_id_table;
+	struct flows*flows;
 } __rte_cache_aligned;
 
-static inline uint8_t ipv4_get_hdr_len(struct ipv4_hdr *ip)
+static void task_gen_set_pkt_templates_len(struct task_gen *task, uint32_t *pkt_sizes);
+static void task_gen_reset_pkt_templates_content(struct task_gen *task);
+static void task_gen_pkt_template_recalc_metadata(struct task_gen *task);
+static int check_all_pkt_size(struct task_gen *task, int do_panic);
+static int check_all_fields_in_bounds(struct task_gen *task, int do_panic);
+
+static inline uint8_t ipv4_get_hdr_len(prox_rte_ipv4_hdr *ip)
 {
 	/* Optimize for common case of IPv4 header without options. */
 	if (ip->version_ihl == 0x45)
-		return sizeof(struct ipv4_hdr);
+		return sizeof(prox_rte_ipv4_hdr);
 	if (unlikely(ip->version_ihl >> 4 != 4)) {
 		plog_warn("IPv4 ether_type but IP version = %d != 4", ip->version_ihl >> 4);
 		return 0;
@@ -139,16 +189,16 @@ static inline uint8_t ipv4_get_hdr_len(struct ipv4_hdr *ip)
 
 static void parse_l2_l3_len(uint8_t *pkt, uint16_t *l2_len, uint16_t *l3_len, uint16_t len)
 {
-	*l2_len = sizeof(struct ether_hdr);
+	*l2_len = sizeof(prox_rte_ether_hdr);
 	*l3_len = 0;
-	struct vlan_hdr *vlan_hdr;
-	struct ether_hdr *eth_hdr = (struct ether_hdr*)pkt;
-	struct ipv4_hdr *ip;
+	prox_rte_vlan_hdr *vlan_hdr;
+	prox_rte_ether_hdr *eth_hdr = (prox_rte_ether_hdr*)pkt;
+	prox_rte_ipv4_hdr *ip;
 	uint16_t ether_type = eth_hdr->ether_type;
 
 	// Unstack VLAN tags
-	while (((ether_type == ETYPE_8021ad) || (ether_type == ETYPE_VLAN)) && (*l2_len + sizeof(struct vlan_hdr) < len)) {
-		vlan_hdr = (struct vlan_hdr *)(pkt + *l2_len);
+	while (((ether_type == ETYPE_8021ad) || (ether_type == ETYPE_VLAN)) && (*l2_len + sizeof(prox_rte_vlan_hdr) < len)) {
+		vlan_hdr = (prox_rte_vlan_hdr *)(pkt + *l2_len);
 		*l2_len +=4;
 		ether_type = vlan_hdr->eth_proto;
 	}
@@ -161,11 +211,11 @@ static void parse_l2_l3_len(uint8_t *pkt, uint16_t *l2_len, uint16_t *l3_len, ui
 	case ETYPE_MPLSM:
 		*l2_len +=4;
 		break;
+	case ETYPE_IPv6:
 	case ETYPE_IPv4:
 		break;
 	case ETYPE_EoGRE:
 	case ETYPE_ARP:
-	case ETYPE_IPv6:
 		*l2_len = 0;
 		break;
 	default:
@@ -175,8 +225,9 @@ static void parse_l2_l3_len(uint8_t *pkt, uint16_t *l2_len, uint16_t *l3_len, ui
 	}
 
 	if (*l2_len) {
-		struct ipv4_hdr *ip = (struct ipv4_hdr *)(pkt + *l2_len);
-		*l3_len = ipv4_get_hdr_len(ip);
+		prox_rte_ipv4_hdr *ip = (prox_rte_ipv4_hdr *)(pkt + *l2_len);
+		if (ip->version_ihl >> 4 == 4)
+			*l3_len = ipv4_get_hdr_len(ip);
 	}
 }
 
@@ -185,9 +236,20 @@ static void checksum_packet(uint8_t *hdr, struct rte_mbuf *mbuf, struct pkt_temp
 	uint16_t l2_len = pkt_template->l2_len;
 	uint16_t l3_len = pkt_template->l3_len;
 
-	if (l2_len) {
-		struct ipv4_hdr *ip = (struct ipv4_hdr*)(hdr + l2_len);
+	prox_rte_ipv4_hdr *ip = (prox_rte_ipv4_hdr*)(hdr + l2_len);
+	if (l3_len) {
 		prox_ip_udp_cksum(mbuf, ip, l2_len, l3_len, cksum_offload);
+	} else if (ip->version_ihl >> 4 == 6) {
+		prox_rte_ipv6_hdr *ip6 = (prox_rte_ipv6_hdr *)(hdr + l2_len);
+		if (ip6->proto == IPPROTO_UDP) {
+			prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(ip6 + 1);
+			udp->dgram_cksum = 0;
+			udp->dgram_cksum = rte_ipv6_udptcp_cksum(ip6, udp);
+		} else if (ip6->proto == IPPROTO_TCP) {
+			prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(ip6 + 1);
+			tcp->cksum = 0;
+			tcp->cksum = rte_ipv6_udptcp_cksum(ip6, tcp);
+		}
 	}
 }
 
@@ -261,20 +323,14 @@ static int handle_gen_pcap_bulk(struct task_base *tbase, struct rte_mbuf **mbuf,
 	return task->base.tx_pkt(&task->base, new_pkts, send_bulk, NULL);
 }
 
-static uint64_t bytes_to_tsc(struct task_gen *task, uint32_t bytes)
+static inline uint64_t bytes_to_tsc(struct task_gen *task, uint32_t bytes)
 {
-	const uint64_t hz = task->hz;
-	const uint64_t bytes_per_hz = task->link_speed;
-
-	if (bytes_per_hz == UINT64_MAX)
-		return 0;
-
-	return hz * bytes / bytes_per_hz;
+	return task->bytes_to_tsc[bytes];
 }
 
 static uint32_t task_gen_next_pkt_idx(const struct task_gen *task, uint32_t pkt_idx)
 {
-	return pkt_idx + 1 == task->n_pkts? 0 : pkt_idx + 1;
+	return pkt_idx + 1 >= task->n_pkts? 0 : pkt_idx + 1;
 }
 
 static uint32_t task_gen_offset_pkt_idx(const struct task_gen *task, uint32_t offset)
@@ -354,41 +410,168 @@ static void task_gen_apply_all_random_fields(struct task_gen *task, uint8_t **pk
 		task_gen_apply_random_fields(task, pkt_hdr[i]);
 }
 
-static void task_gen_apply_accur_pos(struct task_gen *task, uint8_t *pkt_hdr, uint32_t accuracy)
+static void task_gen_apply_ranges(struct task_gen *task, uint8_t *pkt_hdr)
 {
-	*(uint32_t *)(pkt_hdr + task->accur_pos) = accuracy;
+	uint32_t ret;
+	if (!task->n_ranges)
+		return;
+
+	for (uint16_t j = 0; j < task->n_ranges; ++j) {
+		if (unlikely(task->ranges[j].value == task->ranges[j].max))
+			task->ranges[j].value = task->ranges[j].min;
+		else
+			task->ranges[j].value++;
+		ret = rte_bswap32(task->ranges[j].value);
+		uint8_t *pret = (uint8_t*)&ret;
+		rte_memcpy(pkt_hdr + task->ranges[j].offset, pret + 4 - task->ranges[j].range_len, task->ranges[j].range_len);
+	}
+}
+
+static void task_gen_apply_all_ranges(struct task_gen *task, uint8_t **pkt_hdr, uint32_t count)
+{
+	uint32_t ret;
+	if (!task->n_ranges)
+		return;
+
+	for (uint16_t i = 0; i < count; ++i) {
+		task_gen_apply_ranges(task, pkt_hdr[i]);
+	}
 }
 
-static void task_gen_apply_sig(struct task_gen *task, uint8_t *pkt_hdr)
+static inline uint32_t gcd(uint32_t a, uint32_t b)
 {
-	*(uint32_t *)(pkt_hdr + task->sig_pos) = task->sig;
+	// Euclidean algorithm
+	uint32_t t;
+	while (b != 0) {
+		t = b;
+		b = a % b;
+		a = t;
+	}
+	return a;
 }
 
-static void task_gen_apply_all_accur_pos(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
+static inline uint32_t lcm(uint32_t a, uint32_t b)
 {
-	if (!task->accur_pos)
-		return;
+	return ((a / gcd(a, b)) * b);
+}
 
-	/* The accuracy of task->pkt_queue_index - 64 is stored in
-	   packet task->pkt_queue_index. The ID modulo 64 is the
-	   same. */
-	for (uint16_t j = 0; j < count; ++j) {
-		if ((mbufs[j]->udata64 & MBUF_ARP) == 0) {
-			uint32_t accuracy = task->accur[(task->pkt_queue_index + j) & 63];
-			task_gen_apply_accur_pos(task, pkt_hdr[j], accuracy);
+static uint32_t get_n_range_flows(struct task_gen *task)
+{
+	uint32_t t = 1;
+	for (int i = 0; i < task->n_ranges; i++) {
+		t = lcm((task->ranges[i].max - task->ranges[i].min) + 1, t);
+	}
+	return t;
+}
+
+static uint32_t get_n_rand_flows(struct task_gen *task)
+{
+	uint32_t t = 0;
+	for (int i = 0; i < task->n_rands; i++) {
+		t += __builtin_popcount(task->rand[i].rand_mask);
+	}
+	PROX_PANIC(t > 31, "Too many random bits - maximum 31 supported\n");
+	return 1 << t;
+}
+
+//void add_to_hash_table(struct task_gen *task, uint32_t *buffer, uint32_t *idx, uint32_t mask, uint32_t bit_pos, uint32_t val, uint32_t fixed_bits, uint32_t rand_offset) {
+//		uint32_t ret_tmp = val | fixed_bits;
+//		ret_tmp = rte_bswap32(ret_tmp);
+//		uint8_t *pret_tmp = (uint8_t*)&ret_tmp;
+//		rte_memcpy(buf + rand_offset, pret_tmp + 4 - rand_len, rand_len);
+//
+// init idx
+// alloc buffer
+// init/alloc hash_table
+//void build_buffer(struct task_gen *task, uint32_t *buffer, uint32_t *idx, uint32_t mask, uint32_t bit_pos, uint32_t val)
+//{
+//	if (mask == 0) {
+//		buffer[*idx] = val;
+//		*idx = (*idx) + 1;
+//		return;
+//	}
+//	build_buffer(task, but, mask >> 1, bit_pos + 1, val);
+//	if (mask & 1) {
+//		build_buffer(task, but, mask >> 1, bit_pos + 1, val | (1 << bit_pos));
+//}
+
+static void build_flow_table(struct task_gen *task)
+{
+	uint8_t buf[2048], *key_fields;
+	union ipv4_5tuple_host key;
+	struct pkt_template *pkt_template;
+	uint32_t n_range_flows = get_n_range_flows(task);
+	// uint32_t n_rand_flows = get_n_rand_flows(task);
+	// uint32_t n_flows= n_range_flows * n_rand_flows * task->orig_n_pkts;
+	// for (int i = 0; i < task->n_rands; i++) {
+	// 	build_buffer(task, task->values_buf[i], &task->values_idx[i], task->rand[i].rand_mask, 0, 0);
+	// }
+
+	uint32_t n_flows = n_range_flows * task->orig_n_pkts;
+
+	for (uint32_t k = 0; k < task->orig_n_pkts; k++) {
+		memcpy(buf, task->pkt_template[k].buf, task->pkt_template[k].len);
+		for (uint32_t j = 0; j < n_range_flows; j++) {
+			task_gen_apply_ranges(task, buf);
+			key_fields = buf + sizeof(prox_rte_ether_hdr) + offsetof(prox_rte_ipv4_hdr, time_to_live);
+			key.xmm = _mm_loadu_si128((__m128i*)(key_fields));
+			key.pad0 = key.pad1 = 0;
+			int idx = rte_hash_add_key(task->flow_id_table, (const void *)&key);
+			PROX_PANIC(idx < 0, "Unable to add key in table\n");
+			if (idx >= 0)
+				plog_dbg("Added key %d, %x, %x, %x, %x\n", key.proto, key.ip_src, key.ip_dst, key.port_src, key.port_dst);
+		}
+	}
+}
+
+static int32_t task_gen_get_flow_id(struct task_gen *task, uint8_t *pkt_hdr)
+{
+	int ret = 0;
+	union ipv4_5tuple_host key;
+	uint8_t *hdr = pkt_hdr + sizeof(prox_rte_ether_hdr) + offsetof(prox_rte_ipv4_hdr, time_to_live);
+	// __m128i data = _mm_loadu_si128((__m128i*)(hdr));
+	// key.xmm = _mm_and_si128(data, mask0);
+	key.xmm = _mm_loadu_si128((__m128i*)(hdr));
+	key.pad0 = key.pad1 = 0;
+	ret = rte_hash_lookup(task->flow_id_table, (const void *)&key);
+	if (ret < 0) {
+		plog_err("Flow not found: %d, %x, %x, %x, %x\n", key.proto, key.ip_src, key.ip_dst, key.port_src, key.port_dst);
+	}
+	return ret;
+}
+
+static void task_gen_apply_all_flow_id(struct task_gen *task, uint8_t **pkt_hdr, uint32_t count, int32_t *flow_id)
+{
+	if (task->flow_id_pos) {
+		for (uint16_t j = 0; j < count; ++j) {
+			flow_id[j] = task_gen_get_flow_id(task, pkt_hdr[j]);
+			*(int32_t *)(pkt_hdr[j] + task->flow_id_pos) = flow_id[j];
 		}
 	}
 }
 
-static void task_gen_apply_all_sig(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
+static void task_gen_apply_accur_pos(struct task_gen *task, uint8_t *pkt_hdr, uint32_t accuracy)
 {
-	if (!task->sig_pos)
+	*(uint32_t *)(pkt_hdr + task->accur_pos) = accuracy;
+}
+
+static void task_gen_apply_sig(struct task_gen *task, struct pkt_template *dst)
+{
+	if (task->sig_pos)
+		*(uint32_t *)(dst->buf + task->sig_pos) = task->sig;
+}
+
+static void task_gen_apply_all_accur_pos(struct task_gen *task, uint8_t **pkt_hdr, uint32_t count)
+{
+	if (!task->accur_pos)
 		return;
 
+	/* The accuracy of task->pkt_queue_index - ACCURACY_WINDOW is stored in
+	   packet task->pkt_queue_index. The ID modulo ACCURACY_WINDOW is the
+	   same. */
 	for (uint16_t j = 0; j < count; ++j) {
-		if ((mbufs[j]->udata64 & MBUF_ARP) == 0) {
-			task_gen_apply_sig(task, pkt_hdr[j]);
-		}
+		uint32_t accuracy = task->accur[(task->pkt_queue_index + j) & (ACCURACY_WINDOW - 1)];
+		task_gen_apply_accur_pos(task, pkt_hdr[j], accuracy);
 	}
 }
 
@@ -399,16 +582,34 @@ static void task_gen_apply_unique_id(struct task_gen *task, uint8_t *pkt_hdr, co
 	*dst = *id;
 }
 
-static void task_gen_apply_all_unique_id(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
+static void task_gen_apply_all_unique_id(struct task_gen *task, uint8_t **pkt_hdr, uint32_t count)
 {
 	if (!task->packet_id_pos)
 		return;
 
 	for (uint16_t i = 0; i < count; ++i) {
-		if ((mbufs[i]->udata64 & MBUF_ARP) == 0) {
-			struct unique_id id;
-			unique_id_init(&id, task->generator_id, task->pkt_queue_index++);
-			task_gen_apply_unique_id(task, pkt_hdr[i], &id);
+		struct unique_id id;
+		unique_id_init(&id, task->generator_id, task->pkt_queue_index++);
+		task_gen_apply_unique_id(task, pkt_hdr[i], &id);
+	}
+}
+
+static void task_gen_apply_id_in_flows(struct task_gen *task, uint8_t *pkt_hdr, const struct unique_id *id)
+{
+	struct unique_id *dst = (struct unique_id *)(pkt_hdr + task->packet_id_in_flow_pos);
+	*dst = *id;
+}
+
+static void task_gen_apply_all_id_in_flows(struct task_gen *task, uint8_t **pkt_hdr, uint32_t count, int32_t *idx)
+{
+	if (!task->packet_id_in_flow_pos)
+		return;
+
+	for (uint16_t i = 0; i < count; ++i) {
+		struct unique_id id;
+		if (idx[i] >= 0 ) {
+			unique_id_init(&id, task->generator_id, task->flows[idx[i]].packet_id++);
+			task_gen_apply_id_in_flows(task, pkt_hdr[i], &id);
 		}
 	}
 }
@@ -423,11 +624,9 @@ static void task_gen_checksum_packets(struct task_gen *task, struct rte_mbuf **m
 
 	uint32_t pkt_idx = task_gen_offset_pkt_idx(task, - count);
 	for (uint16_t i = 0; i < count; ++i) {
-		if ((mbufs[i]->udata64 & MBUF_ARP) == 0) {
-			struct pkt_template *pkt_template = &task->pkt_template[pkt_idx];
-			checksum_packet(pkt_hdr[i], mbufs[i], pkt_template, task->cksum_offload);
-			pkt_idx = task_gen_next_pkt_idx(task, pkt_idx);
-		}
+		struct pkt_template *pkt_template = &task->pkt_template[pkt_idx];
+		checksum_packet(pkt_hdr[i], mbufs[i], pkt_template, task->cksum_offload);
+		pkt_idx = task_gen_next_pkt_idx(task, pkt_idx);
 	}
 }
 
@@ -447,8 +646,12 @@ static uint64_t task_gen_calc_bulk_duration(struct task_gen *task, uint32_t coun
 	uint32_t pkt_idx = task_gen_offset_pkt_idx(task, - 1);
 	struct pkt_template *last_pkt_template = &task->pkt_template[pkt_idx];
 	uint32_t last_pkt_len = pkt_len_to_wire_size(last_pkt_template->len);
+#ifdef NO_EXTRAPOLATION
+	uint64_t bulk_duration = task->pkt_tsc_offset[count - 1];
+#else
 	uint64_t last_pkt_duration = bytes_to_tsc(task, last_pkt_len);
 	uint64_t bulk_duration = task->pkt_tsc_offset[count - 1] + last_pkt_duration;
+#endif
 
 	return bulk_duration;
 }
@@ -483,6 +686,14 @@ static uint64_t task_gen_write_latency(struct task_gen *task, uint8_t **pkt_hdr,
 	   simply sleeping until delta_t is zero would leave a period
 	   of silence on the line. The error has been introduced
 	   earlier, but the packets have already been sent. */
+
+	/* This happens typically if previous bulk was delayed
+	   by an interrupt e.g.  (with Time in nsec)
+	   Time x: sleep 4 microsec
+	   Time x+4000: send 64 packets (64 packets as 4000 nsec, w/ 10Gbps 64 bytes)
+	   Time x+5000: send 16 packets (16 packets as 1000 nsec)
+	   When we send the 16 packets, the 64 ealier packets are not yet
+	   fully sent */
 	if (tx_tsc < task->earliest_tsc_next_pkt)
 		delta_t = task->earliest_tsc_next_pkt - tx_tsc;
 	else
@@ -491,12 +702,10 @@ static uint64_t task_gen_write_latency(struct task_gen *task, uint8_t **pkt_hdr,
 	for (uint16_t i = 0; i < count; ++i) {
 		uint32_t *pos = (uint32_t *)(pkt_hdr[i] + task->lat_pos);
 		const uint64_t pkt_tsc = tx_tsc + delta_t + task->pkt_tsc_offset[i];
-
 		*pos = pkt_tsc >> LATENCY_ACCURACY;
 	}
 
 	uint64_t bulk_duration = task_gen_calc_bulk_duration(task, count);
-
 	task->earliest_tsc_next_pkt = tx_tsc + delta_t + bulk_duration;
 	write_tsc_after = rte_rdtsc();
 	task->write_duration_estimate = write_tsc_after - write_tsc_before;
@@ -506,6 +715,7 @@ static uint64_t task_gen_write_latency(struct task_gen *task, uint8_t **pkt_hdr,
 	do {
 		tsc_before_tx = rte_rdtsc();
 	} while (tsc_before_tx < tx_tsc);
+
 	return tsc_before_tx;
 }
 
@@ -518,7 +728,7 @@ static void task_gen_store_accuracy(struct task_gen *task, uint32_t count, uint6
 	uint64_t first_accuracy_idx = task->pkt_queue_index - count;
 
 	for (uint32_t i = 0; i < count; ++i) {
-		uint32_t accuracy_idx = (first_accuracy_idx + i) & 63;
+		uint32_t accuracy_idx = (first_accuracy_idx + i) & (ACCURACY_WINDOW - 1);
 
 		task->accur[accuracy_idx] = accur;
 	}
@@ -542,20 +752,218 @@ static void task_gen_build_packets(struct task_gen *task, struct rte_mbuf **mbuf
 		struct pkt_template *pktpl = &task->pkt_template[task->pkt_idx];
 		struct pkt_template *pkt_template = &task->pkt_template[task->pkt_idx];
 		pkt_template_init_mbuf(pkt_template, mbufs[i], pkt_hdr[i]);
-		mbufs[i]->udata64 = task->pkt_idx & TEMPLATE_INDEX_MASK;
-		struct ether_hdr *hdr = (struct ether_hdr *)pkt_hdr[i];
+		prox_rte_ether_hdr *hdr = (prox_rte_ether_hdr *)pkt_hdr[i];
 		if (task->lat_enabled) {
+#ifdef NO_EXTRAPOLATION
+			task->pkt_tsc_offset[i] = 0;
+#else
 			task->pkt_tsc_offset[i] = bytes_to_tsc(task, will_send_bytes);
+#endif
 			will_send_bytes += pkt_len_to_wire_size(pkt_template->len);
 		}
 		task->pkt_idx = task_gen_next_pkt_idx(task, task->pkt_idx);
 	}
 }
 
+static int task_gen_allocate_templates(struct task_gen *task, uint32_t orig_nb_pkts, uint32_t nb_pkts, int do_panic, int pcap)
+{
+	size_t mem_size = nb_pkts * sizeof(*task->pkt_template);
+	size_t orig_mem_size = orig_nb_pkts * sizeof(*task->pkt_template);
+	task->pkt_template = prox_zmalloc(mem_size, task->socket_id);
+	task->pkt_template_orig = prox_zmalloc(orig_mem_size, task->socket_id);
+
+	if (task->pkt_template == NULL || task->pkt_template_orig == NULL) {
+		plog_err_or_panic(do_panic, "Failed to allocate %lu bytes (in huge pages) for %s\n", mem_size, pcap ? "pcap file":"packet template");
+		return -1;
+	}
+
+	for (size_t i = 0; i < orig_nb_pkts; i++) {
+		task->pkt_template_orig[i].buf = prox_zmalloc(task->max_frame_size, task->socket_id);
+		if (task->pkt_template_orig[i].buf == NULL) {
+			plog_err_or_panic(do_panic, "Failed to allocate %u bytes (in huge pages) for %s\n", task->max_frame_size, pcap ? "packet from pcap": "packet");
+			return -1;
+		}
+	}
+	for (size_t i = 0; i < nb_pkts; i++) {
+		task->pkt_template[i].buf = prox_zmalloc(task->max_frame_size, task->socket_id);
+		if (task->pkt_template[i].buf == NULL) {
+			plog_err_or_panic(do_panic, "Failed to allocate %u bytes (in huge pages) for %s\n", task->max_frame_size, pcap ? "packet from pcap": "packet");
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static int task_gen_reallocate_templates(struct task_gen *task, uint32_t nb_pkts, int do_panic)
+{
+	// Need to free up bufs allocated in previous (longer) imix
+	for (size_t i = nb_pkts; i < task->n_pkts; i++) {
+		if (task->pkt_template[i].buf) {
+			rte_free(task->pkt_template[i].buf);
+			task->pkt_template[i].buf = NULL;
+		}
+	}
+
+	size_t mem_size = nb_pkts * sizeof(*task->pkt_template);
+	size_t old_mem_size = task->n_pkts * sizeof(*task->pkt_template);
+	if (old_mem_size > mem_size)
+		old_mem_size = mem_size;
+
+	struct pkt_template *ptr;
+
+	// re-allocate memory for new pkt_template (this might allocate additional memory or free up some...)
+	if ((ptr = rte_malloc_socket(NULL, mem_size, RTE_CACHE_LINE_SIZE, task->socket_id)) != NULL) {
+		memcpy(ptr, task->pkt_template, old_mem_size);
+		rte_free(task->pkt_template);
+		task->pkt_template = ptr;
+	} else {
+		plog_err_or_panic(do_panic, "Failed to allocate %lu bytes (in huge pages) for packet template for IMIX\n", mem_size);
+		return -1;
+	}
+
+	// Need to allocate bufs for new template but no need to reallocate for existing ones
+	for (size_t i = task->n_pkts; i < nb_pkts; ++i) {
+		task->pkt_template[i].buf = prox_zmalloc(task->max_frame_size, task->socket_id);
+		if (task->pkt_template[i].buf == NULL) {
+			plog_err_or_panic(do_panic, "Failed to allocate %u bytes (in huge pages) for packet %zd in IMIX\n", task->max_frame_size, i);
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static int check_pkt_size(struct task_gen *task, uint32_t pkt_size, int do_panic)
+{
+	const uint16_t min_len = sizeof(prox_rte_ether_hdr) + sizeof(prox_rte_ipv4_hdr);
+	const uint16_t max_len = task->max_frame_size;
+
+	if (do_panic) {
+		PROX_PANIC(pkt_size == 0, "Invalid packet size length (no packet defined?)\n");
+		PROX_PANIC(pkt_size > max_len, "pkt_size out of range (must be <= %u)\n", max_len);
+		PROX_PANIC(pkt_size < min_len, "pkt_size out of range (must be >= %u)\n", min_len);
+		return 0;
+	} else {
+		if (pkt_size == 0) {
+			plog_err("Invalid packet size length (no packet defined?)\n");
+			return -1;
+		}
+		if (pkt_size > max_len) {
+			if (pkt_size >  PROX_RTE_ETHER_MAX_LEN + 2 * PROX_VLAN_TAG_SIZE - 4)
+				plog_err("pkt_size too high and jumbo frames disabled\n");
+			else
+				plog_err("pkt_size out of range (must be <= (mtu=%u))\n", max_len);
+			return -1;
+		}
+		if (pkt_size < min_len) {
+			plog_err("pkt_size out of range (must be >= %u)\n", min_len);
+			return -1;
+		}
+		return 0;
+	}
+}
+
+static int check_fields_in_bounds(struct task_gen *task, uint32_t pkt_size, int do_panic)
+{
+	if (task->lat_enabled) {
+		uint32_t pos_beg = task->lat_pos;
+		uint32_t pos_end = task->lat_pos + 3U;
+
+		if (do_panic)
+			PROX_PANIC(pkt_size <= pos_end, "Writing latency at %u-%u, but packet size is %u bytes\n",
+			   pos_beg, pos_end, pkt_size);
+		else if (pkt_size <= pos_end) {
+			plog_err("Writing latency at %u-%u, but packet size is %u bytes\n", pos_beg, pos_end, pkt_size);
+			return -1;
+		}
+	}
+	if (task->packet_id_pos) {
+		uint32_t pos_beg = task->packet_id_pos;
+		uint32_t pos_end = task->packet_id_pos + 4U;
+
+		if (do_panic)
+			PROX_PANIC(pkt_size <= pos_end, "Writing packet at %u-%u, but packet size is %u bytes\n",
+			   pos_beg, pos_end, pkt_size);
+		else if (pkt_size <= pos_end) {
+			plog_err("Writing packet at %u-%u, but packet size is %u bytes\n", pos_beg, pos_end, pkt_size);
+			return -1;
+		}
+	}
+	if (task->accur_pos) {
+		uint32_t pos_beg = task->accur_pos;
+		uint32_t pos_end = task->accur_pos + 3U;
+
+		if (do_panic)
+			PROX_PANIC(pkt_size <= pos_end, "Writing accuracy at %u-%u, but packet size is %u bytes\n",
+			   pos_beg, pos_end, pkt_size);
+		else if (pkt_size <= pos_end) {
+			plog_err("Writing accuracy at %u-%u, but packet size is %u bytes\n", pos_beg, pos_end, pkt_size);
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static int task_gen_set_eth_ip_udp_sizes(struct task_gen *task, uint32_t orig_n_pkts, uint32_t nb_pkt_sizes, uint32_t *pkt_sizes)
+{
+	size_t k;
+	uint32_t l4_len;
+	prox_rte_ipv4_hdr *ip;
+	struct pkt_template *template;
+
+	for (size_t j = 0; j < nb_pkt_sizes; ++j) {
+		for (size_t i = 0; i < orig_n_pkts; ++i) {
+			k = j * orig_n_pkts + i;
+			template = &task->pkt_template[k];
+			if (template->l2_len == 0)
+				continue;
+			ip = (prox_rte_ipv4_hdr *)(template->buf + template->l2_len);
+			ip->total_length = rte_bswap16(pkt_sizes[j] - template->l2_len);
+			l4_len = pkt_sizes[j] - template->l2_len - template->l3_len;
+			ip->hdr_checksum = 0;
+			prox_ip_cksum_sw(ip);
+
+			if (ip->next_proto_id == IPPROTO_UDP) {
+				prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(((uint8_t *)ip) + template->l3_len);
+				udp->dgram_len = rte_bswap16(l4_len);
+				prox_udp_cksum_sw(udp, l4_len, ip->src_addr, ip->dst_addr);
+			} else if (ip->next_proto_id == IPPROTO_TCP) {
+				prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(((uint8_t *)ip) + template->l3_len);
+				prox_tcp_cksum_sw(tcp, l4_len, ip->src_addr, ip->dst_addr);
+			}
+		}
+	}
+	return 0;
+}
+
+static int task_gen_apply_imix(struct task_gen *task, int do_panic)
+{
+	struct pkt_template *ptr;
+	int rc;
+	task->imix_nb_pkts = task->new_imix_nb_pkts;
+	uint32_t n_pkts = task->imix_nb_pkts * task->orig_n_pkts;
+
+	if ((n_pkts != task->n_pkts) && ((rc = task_gen_reallocate_templates(task, n_pkts, do_panic)) < 0))
+		return rc;
+
+	task->n_pkts = n_pkts;
+	if (task->pkt_idx >= n_pkts)
+		task->pkt_idx = 0;
+	task_gen_set_pkt_templates_len(task, task->imix_pkt_sizes);
+	task_gen_reset_pkt_templates_content(task);
+	task_gen_pkt_template_recalc_metadata(task);
+	check_all_pkt_size(task, DO_NOT_PANIC);
+	check_all_fields_in_bounds(task, DO_NOT_PANIC);
+	task_gen_set_eth_ip_udp_sizes(task, task->orig_n_pkts, task->imix_nb_pkts, task->imix_pkt_sizes);
+	return 0;
+}
+
 static void task_gen_update_config(struct task_gen *task)
 {
 	if (task->token_time.cfg.bpp != task->new_rate_bps)
 		task_gen_reset_token_time(task);
+	if (task->new_imix_nb_pkts)
+		task_gen_apply_imix(task, DO_NOT_PANIC);
+	task->new_imix_nb_pkts = 0;
 }
 
 static inline void build_value(struct task_gen *task, uint32_t mask, int bit_pos, uint32_t val, uint32_t fixed_bits)
@@ -570,45 +978,86 @@ static inline void build_value(struct task_gen *task, uint32_t mask, int bit_pos
 		register_ip_to_ctrl_plane(tbase->l3.tmaster, rte_cpu_to_be_32(val | fixed_bits), tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id);
 	}
 }
+
+static inline void build_value_ipv6(struct task_gen *task, uint32_t mask, int var_bit_pos, int init_var_bit_pos, struct ipv6_addr val, struct ipv6_addr fixed_bits)
+{
+	struct task_base *tbase = (struct task_base *)task;
+	if (var_bit_pos < 32) {
+		build_value_ipv6(task, mask >> 1, var_bit_pos + 1, init_var_bit_pos, val, fixed_bits);
+		if (mask & 1) {
+			int byte_pos = (var_bit_pos + init_var_bit_pos) / 8;
+			int bit_pos = (var_bit_pos + init_var_bit_pos) % 8;
+			val.bytes[byte_pos] = val.bytes[byte_pos] | (1 << bit_pos);
+			build_value_ipv6(task, mask >> 1, var_bit_pos + 1, init_var_bit_pos, val, fixed_bits);
+		}
+	} else {
+		for (uint i = 0; i < sizeof(struct ipv6_addr) / 8; i++)
+			val.bytes[i] = val.bytes[i] | fixed_bits.bytes[i];
+		register_node_to_ctrl_plane(tbase->l3.tmaster, &null_addr, &val, tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id);
+	}
+}
+
 static inline void register_all_ip_to_ctrl_plane(struct task_gen *task)
 {
 	struct task_base *tbase = (struct task_base *)task;
 	int i, len, fixed;
 	unsigned int offset;
-	uint32_t mask;
+	uint32_t mask, ip_len;
+	struct ipv6_addr *ip6_src = NULL;
+	uint32_t *ip_src;
 
 	for (uint32_t i = 0; i < task->n_pkts; ++i) {
 		struct pkt_template *pktpl = &task->pkt_template[i];
 		unsigned int ip_src_pos = 0;
-		int maybe_ipv4 = 0;
-		unsigned int l2_len = sizeof(struct ether_hdr);
+		int ipv4 = 0;
+		unsigned int l2_len = sizeof(prox_rte_ether_hdr);
 
 		uint8_t *pkt = pktpl->buf;
-		struct ether_hdr *eth_hdr = (struct ether_hdr*)pkt;
+		prox_rte_ether_hdr *eth_hdr = (prox_rte_ether_hdr*)pkt;
 		uint16_t ether_type = eth_hdr->ether_type;
-		struct vlan_hdr *vlan_hdr;
+		prox_rte_vlan_hdr *vlan_hdr;
+		prox_rte_ipv4_hdr *ip;
 
 		// Unstack VLAN tags
-		while (((ether_type == ETYPE_8021ad) || (ether_type == ETYPE_VLAN)) && (l2_len + sizeof(struct vlan_hdr) < pktpl->len)) {
-			vlan_hdr = (struct vlan_hdr *)(pkt + l2_len);
+		while (((ether_type == ETYPE_8021ad) || (ether_type == ETYPE_VLAN)) && (l2_len + sizeof(prox_rte_vlan_hdr) < pktpl->len)) {
+			vlan_hdr = (prox_rte_vlan_hdr *)(pkt + l2_len);
 			l2_len +=4;
 			ether_type = vlan_hdr->eth_proto;
 		}
 		if ((ether_type == ETYPE_MPLSU) || (ether_type == ETYPE_MPLSM)) {
 			l2_len +=4;
-			maybe_ipv4 = 1;
-		}
-		if ((ether_type != ETYPE_IPv4) && !maybe_ipv4)
+			ip = (prox_rte_ipv4_hdr *)(pkt + l2_len);
+			if (ip->version_ihl >> 4 == 4)
+				ipv4 = 1;
+			else if (ip->version_ihl >> 4 != 6)	// Version field at same location for IPv4 and IPv6
+				continue;
+		} else if (ether_type == ETYPE_IPv4) {
+			ip = (prox_rte_ipv4_hdr *)(pkt + l2_len);
+			PROX_PANIC(ip->version_ihl >> 4 != 4, "IPv4 ether_type but IP version = %d != 4", ip->version_ihl >> 4);	// Invalid Packet
+			ipv4 = 1;
+		} else if (ether_type == ETYPE_IPv6) {
+			ip = (prox_rte_ipv4_hdr *)(pkt + l2_len);
+			PROX_PANIC(ip->version_ihl >> 4 != 6, "IPv6 ether_type but IP version = %d != 6", ip->version_ihl >> 4);	// Invalid Packet
+		} else {
 			continue;
+		}
 
-		struct ipv4_hdr *ip = (struct ipv4_hdr *)(pkt + l2_len);
-		PROX_PANIC(ip->version_ihl >> 4 != 4, "IPv4 ether_type but IP version = %d != 4", ip->version_ihl >> 4);
-
-		// Even if IPv4 header contains options, options are after ip src and dst
-		ip_src_pos = l2_len + sizeof(struct ipv4_hdr) - 2 * sizeof(uint32_t);
-		uint32_t *ip_src = ((uint32_t *)(pktpl->buf + ip_src_pos));
-		plog_info("\tip_src_pos = %d, ip_src = %x\n", ip_src_pos, *ip_src);
-		register_ip_to_ctrl_plane(tbase->l3.tmaster, *ip_src, tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id);
+		PROX_PANIC(ipv4 && ((prox_cfg.flags & DSF_L3_ENABLED) == 0), "Trying to generate an IPv4 packet in NDP mode => not supported\n");
+		PROX_PANIC((ipv4 == 0) && ((prox_cfg.flags & DSF_NDP_ENABLED) == 0), "Trying to generate an IPv6 packet in L3 (IPv4) mode => not supported\n");
+		if (ipv4) {
+			// Even if IPv4 header contains options, options are after ip src and dst
+			ip_src_pos = l2_len + sizeof(prox_rte_ipv4_hdr) - 2 * sizeof(uint32_t);
+			ip_src = ((uint32_t *)(pktpl->buf + ip_src_pos));
+			plog_info("\tip_src_pos = %d, ip_src = %x\n", ip_src_pos, *ip_src);
+			register_ip_to_ctrl_plane(tbase->l3.tmaster, *ip_src, tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id);
+			ip_len = sizeof(uint32_t);
+		} else {
+			ip_src_pos = l2_len + sizeof(prox_rte_ipv6_hdr) - 2 * sizeof(struct ipv6_addr);
+			ip6_src = ((struct ipv6_addr *)(pktpl->buf + ip_src_pos));
+			plog_info("\tip_src_pos = %d, ip6_src = "IPv6_BYTES_FMT"\n", ip_src_pos, IPv6_BYTES(ip6_src->bytes));
+			register_node_to_ctrl_plane(tbase->l3.tmaster, ip6_src, &null_addr, tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id);
+			ip_len = sizeof(struct ipv6_addr);
+		}
 
 		for (int j = 0; j < task->n_rands; j++) {
 			offset = task->rand[j].rand_offset;
@@ -616,7 +1065,12 @@ static inline void register_all_ip_to_ctrl_plane(struct task_gen *task)
 			mask = task->rand[j].rand_mask;
 			fixed = task->rand[j].fixed_bits;
 			plog_info("offset = %d, len = %d, mask = %x, fixed = %x\n", offset, len, mask, fixed);
-			if ((offset < ip_src_pos + 4) && (offset + len >= ip_src_pos)) {
+			if (offset >= ip_src_pos + ip_len)	// First random bit after IP
+				continue;
+			if (offset + len < ip_src_pos)		// Last random bit before IP
+				continue;
+
+			if (ipv4) {
 				if (offset >= ip_src_pos) {
 					int32_t ip_src_mask = (1 << (4 + ip_src_pos - offset) * 8) - 1;
 					mask = mask & ip_src_mask;
@@ -628,6 +1082,28 @@ static inline void register_all_ip_to_ctrl_plane(struct task_gen *task)
 					fixed = (fixed << bits) | (rte_be_to_cpu_32(*ip_src) & ((1 << bits) - 1));
 					build_value(task, mask, 0, 0, fixed);
 				}
+			} else {
+				// We do not support when random partially covers IP - either starting before or finishing after
+				if (offset + len >= ip_src_pos + ip_len) { // len over the ip
+					plog_err("Not supported: random_offset = %d, random_len = %d, ip_src_pos = %d, ip_len = %d\n", offset, len, ip_src_pos, ip_len);
+					continue;
+				}
+				if (offset < ip_src_pos) {
+					plog_err("Not supported: random_offset = %d, random_len = %d, ip_src_pos = %d, ip_len = %d\n", offset, len, ip_src_pos, ip_len);
+					continue;
+				}
+				// Even for IPv6 the random mask supported by PROX are 32 bits only
+				struct ipv6_addr fixed_ipv6;
+				uint init_var_byte_pos = (offset - ip_src_pos);
+				for (uint i = 0; i < sizeof(struct ipv6_addr); i++) {
+					if (i < init_var_byte_pos)
+						fixed_ipv6.bytes[i] = ip6_src->bytes[i];
+					else if (i < init_var_byte_pos + len)
+						fixed_ipv6.bytes[i] = (fixed >> (i - init_var_byte_pos)) & 0xFF;
+					else
+						fixed_ipv6.bytes[i] = ip6_src->bytes[i];
+				}
+				build_value_ipv6(task, mask, 0, init_var_byte_pos * 8, null_addr, fixed_ipv6);
 			}
 		}
 	}
@@ -641,16 +1117,6 @@ static int handle_gen_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uin
 
 	int i, j;
 
-	// If link is down, link_speed is 0
-	if (unlikely(task->link_speed == 0)) {
-		if (task->port && task->port->link_speed != 0) {
-			task->link_speed = task->port->link_speed * 125000L;
-			plog_info("\tPort %u: link speed is %ld Mbps\n",
-				(uint8_t)(task->port - prox_port_cfg), 8 * task->link_speed / 1000000);
-		} else
-			return 0;
-	}
-
 	task_gen_update_config(task);
 
 	if (task->pkt_count == 0) {
@@ -674,20 +1140,47 @@ static int handle_gen_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uin
 	if (new_pkts == NULL)
 		return 0;
 	uint8_t *pkt_hdr[MAX_RING_BURST];
-
+	int32_t flow_id[MAX_RING_BURST];
 	task_gen_load_and_prefetch(new_pkts, pkt_hdr, send_bulk);
 	task_gen_build_packets(task, new_pkts, pkt_hdr, send_bulk);
 	task_gen_apply_all_random_fields(task, pkt_hdr, send_bulk);
-	task_gen_apply_all_accur_pos(task, new_pkts, pkt_hdr, send_bulk);
-	task_gen_apply_all_sig(task, new_pkts, pkt_hdr, send_bulk);
-	task_gen_apply_all_unique_id(task, new_pkts, pkt_hdr, send_bulk);
+	task_gen_apply_all_ranges(task, pkt_hdr, send_bulk);
+	task_gen_apply_all_accur_pos(task, pkt_hdr, send_bulk);
+	task_gen_apply_all_flow_id(task, pkt_hdr, send_bulk, flow_id);
+	task_gen_apply_all_unique_id(task, pkt_hdr, send_bulk);
+	task_gen_apply_all_id_in_flows(task, pkt_hdr, send_bulk, flow_id);
 
 	uint64_t tsc_before_tx;
 
 	tsc_before_tx = task_gen_write_latency(task, pkt_hdr, send_bulk);
 	task_gen_checksum_packets(task, new_pkts, pkt_hdr, send_bulk);
+	if (task->store_msk) {
+		for (uint32_t i = 0; i < send_bulk; i++) {
+			if (out[i] != OUT_DISCARD) {
+				uint8_t *hdr;
+				hdr = (uint8_t *)rte_pktmbuf_mtod(new_pkts[i], prox_rte_ether_hdr *);
+				memcpy(&task->store_buf[task->store_pkt_id & task->store_msk].buf, hdr, rte_pktmbuf_pkt_len(new_pkts[i]));
+				task->store_buf[task->store_pkt_id & task->store_msk].len = rte_pktmbuf_pkt_len(new_pkts[i]);
+				task->store_pkt_id++;
+			}
+		}
+	}
 	ret = task->base.tx_pkt(&task->base, new_pkts, send_bulk, out);
 	task_gen_store_accuracy(task, send_bulk, tsc_before_tx);
+
+	// If we failed to send some packets, we need to do some clean-up:
+
+	if (unlikely(ret)) {
+		// We need re-use the packets indexes not being sent
+		// Hence non-sent packets will not be considered as lost by the receiver when it looks at
+		// packet ids. This should also increase the percentage of packets used for latency measurements
+		task->pkt_queue_index -= ret;
+
+		// In case of failures, the estimate about when we can send next packet (earliest_tsc_next_pkt) is wrong
+		// This would result in under-estimated latency (up to 0 or negative)
+		uint64_t bulk_duration = task_gen_calc_bulk_duration(task, ret);
+		task->earliest_tsc_next_pkt -= bulk_duration;
+	}
 	return ret;
 }
 
@@ -697,14 +1190,17 @@ static void init_task_gen_seeds(struct task_gen *task)
 		random_init_seed(&task->rand[i].state);
 }
 
-static uint32_t pcap_count_pkts(pcap_t *handle)
+static uint32_t pcap_count_pkts(pcap_t *handle, uint32_t *max_frame_size)
 {
 	struct pcap_pkthdr header;
 	const uint8_t *buf;
 	uint32_t ret = 0;
+	*max_frame_size = 0;
 	long pkt1_fpos = ftell(pcap_file(handle));
 
 	while ((buf = pcap_next(handle, &header))) {
+		if (header.len > *max_frame_size)
+			*max_frame_size = header.len;
 		ret++;
 	}
 	int ret2 = fseek(pcap_file(handle), pkt1_fpos, SEEK_SET);
@@ -721,7 +1217,7 @@ static uint64_t avg_time_stamp(uint64_t *time_stamp, uint32_t n)
 	return (tot_inter_pkt + n / 2)/n;
 }
 
-static int pcap_read_pkts(pcap_t *handle, const char *file_name, uint32_t n_pkts, struct pkt_template *proto, uint64_t *time_stamp)
+static int pcap_read_pkts(pcap_t *handle, const char *file_name, uint32_t n_pkts, struct pkt_template *proto, uint64_t *time_stamp, uint32_t max_frame_size)
 {
 	struct pcap_pkthdr header;
 	const uint8_t *buf;
@@ -732,7 +1228,7 @@ static int pcap_read_pkts(pcap_t *handle, const char *file_name, uint32_t n_pkts
 
 		PROX_PANIC(buf == NULL, "Failed to read packet %d from pcap %s\n", i, file_name);
 		proto[i].len = header.len;
-		len = RTE_MIN(header.len, sizeof(proto[i].buf));
+		len = RTE_MIN(header.len, max_frame_size);
 		if (header.len > len)
 			plogx_warn("Packet truncated from %u to %zu bytes\n", header.len, len);
 
@@ -764,33 +1260,6 @@ static int pcap_read_pkts(pcap_t *handle, const char *file_name, uint32_t n_pkts
 	return 0;
 }
 
-static int check_pkt_size(struct task_gen *task, uint32_t pkt_size, int do_panic)
-{
-	const uint16_t min_len = sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr);
-	const uint16_t max_len = ETHER_MAX_LEN - 4;
-
-	if (do_panic) {
-		PROX_PANIC(pkt_size == 0, "Invalid packet size length (no packet defined?)\n");
-		PROX_PANIC(pkt_size > max_len, "pkt_size out of range (must be <= %u)\n", max_len);
-		PROX_PANIC(pkt_size < min_len, "pkt_size out of range (must be >= %u)\n", min_len);
-		return 0;
-	} else {
-		if (pkt_size == 0) {
-			plog_err("Invalid packet size length (no packet defined?)\n");
-			return -1;
-		}
-		if (pkt_size > max_len) {
-			plog_err("pkt_size out of range (must be <= %u)\n", max_len);
-			return -1;
-		}
-		if (pkt_size < min_len) {
-			plog_err("pkt_size out of range (must be >= %u)\n", min_len);
-			return -1;
-		}
-		return 0;
-	}
-}
-
 static int check_all_pkt_size(struct task_gen *task, int do_panic)
 {
 	int rc;
@@ -801,43 +1270,12 @@ static int check_all_pkt_size(struct task_gen *task, int do_panic)
 	return 0;
 }
 
-static int check_fields_in_bounds(struct task_gen *task, uint32_t pkt_size, int do_panic)
+static int check_all_fields_in_bounds(struct task_gen *task, int do_panic)
 {
-	if (task->lat_enabled) {
-		uint32_t pos_beg = task->lat_pos;
-		uint32_t pos_end = task->lat_pos + 3U;
-
-		if (do_panic)
-			PROX_PANIC(pkt_size <= pos_end, "Writing latency at %u-%u, but packet size is %u bytes\n",
-			   pos_beg, pos_end, pkt_size);
-		else if (pkt_size <= pos_end) {
-			plog_err("Writing latency at %u-%u, but packet size is %u bytes\n", pos_beg, pos_end, pkt_size);
-			return -1;
-		}
-	}
-	if (task->packet_id_pos) {
-		uint32_t pos_beg = task->packet_id_pos;
-		uint32_t pos_end = task->packet_id_pos + 4U;
-
-		if (do_panic)
-			PROX_PANIC(pkt_size <= pos_end, "Writing packet at %u-%u, but packet size is %u bytes\n",
-			   pos_beg, pos_end, pkt_size);
-		else if (pkt_size <= pos_end) {
-			plog_err("Writing packet at %u-%u, but packet size is %u bytes\n", pos_beg, pos_end, pkt_size);
-			return -1;
-		}
-	}
-	if (task->accur_pos) {
-		uint32_t pos_beg = task->accur_pos;
-		uint32_t pos_end = task->accur_pos + 3U;
-
-		if (do_panic)
-			PROX_PANIC(pkt_size <= pos_end, "Writing accuracy at %u%-u, but packet size is %u bytes\n",
-			   pos_beg, pos_end, pkt_size);
-		else if (pkt_size <= pos_end) {
-			plog_err("Writing accuracy at %u%-u, but packet size is %u bytes\n", pos_beg, pos_end, pkt_size);
-			return -1;
-		}
+	int rc;
+	for (uint32_t i = 0; i < task->n_pkts;++i) {
+		if ((rc = check_fields_in_bounds(task, task->pkt_template[i].len, do_panic)) != 0)
+			return rc;
 	}
 	return 0;
 }
@@ -855,25 +1293,37 @@ static void task_gen_pkt_template_recalc_metadata(struct task_gen *task)
 static void task_gen_pkt_template_recalc_checksum(struct task_gen *task)
 {
 	struct pkt_template *template;
-	struct ipv4_hdr *ip;
+	prox_rte_ipv4_hdr *ip;
 
 	task->runtime_checksum_needed = 0;
 	for (size_t i = 0; i < task->n_pkts; ++i) {
 		template = &task->pkt_template[i];
 		if (template->l2_len == 0)
 			continue;
-		ip = (struct ipv4_hdr *)(template->buf + template->l2_len);
-
-		ip->hdr_checksum = 0;
-		prox_ip_cksum_sw(ip);
-		uint32_t l4_len = rte_bswap16(ip->total_length) - template->l3_len;
-
-		if (ip->next_proto_id == IPPROTO_UDP) {
-			struct udp_hdr *udp = (struct udp_hdr *)(((uint8_t *)ip) + template->l3_len);
-			prox_udp_cksum_sw(udp, l4_len, ip->src_addr, ip->dst_addr);
-		} else if (ip->next_proto_id == IPPROTO_TCP) {
-			struct tcp_hdr *tcp = (struct tcp_hdr *)(((uint8_t *)ip) + template->l3_len);
-			prox_tcp_cksum_sw(tcp, l4_len, ip->src_addr, ip->dst_addr);
+		ip = (prox_rte_ipv4_hdr *)(template->buf + template->l2_len);
+		if (ip->version_ihl >> 4 == 4) {
+			ip->hdr_checksum = 0;
+			prox_ip_cksum_sw(ip);
+			uint32_t l4_len = rte_bswap16(ip->total_length) - template->l3_len;
+			if (ip->next_proto_id == IPPROTO_UDP) {
+				prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(((uint8_t *)ip) + template->l3_len);
+				prox_udp_cksum_sw(udp, l4_len, ip->src_addr, ip->dst_addr);
+			} else if (ip->next_proto_id == IPPROTO_TCP) {
+				prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(((uint8_t *)ip) + template->l3_len);
+				prox_tcp_cksum_sw(tcp, l4_len, ip->src_addr, ip->dst_addr);
+			}
+		} else if (ip->version_ihl >> 4 == 6) {
+			prox_rte_ipv6_hdr *ip6;
+			ip6 = (prox_rte_ipv6_hdr *)(template->buf + template->l2_len);
+			if (ip6->proto == IPPROTO_UDP) {
+				prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(ip6 + 1);
+				udp->dgram_cksum = 0;
+				udp->dgram_cksum = rte_ipv6_udptcp_cksum(ip6, udp);
+			} else if (ip6->proto == IPPROTO_TCP) {
+				prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(ip6 + 1);
+				tcp->cksum = 0;
+				tcp->cksum = rte_ipv6_udptcp_cksum(ip6, tcp);
+			}
 		}
 
 		/* The current implementation avoids checksum
@@ -895,14 +1345,28 @@ static void task_gen_pkt_template_recalc_all(struct task_gen *task)
 	task_gen_pkt_template_recalc_checksum(task);
 }
 
+static void task_gen_set_pkt_templates_len(struct task_gen *task, uint32_t *pkt_sizes)
+{
+	struct pkt_template *src, *dst;
+
+	for (size_t j = 0; j < task->n_pkts / task->orig_n_pkts; ++j) {
+		for (size_t i = 0; i < task->orig_n_pkts; ++i) {
+			dst = &task->pkt_template[j * task->orig_n_pkts + i];
+			dst->len = pkt_sizes[j];
+		}
+	}
+}
+
 static void task_gen_reset_pkt_templates_len(struct task_gen *task)
 {
 	struct pkt_template *src, *dst;
 
-	for (size_t i = 0; i < task->n_pkts; ++i) {
-		src = &task->pkt_template_orig[i];
-		dst = &task->pkt_template[i];
-		dst->len = src->len;
+	for (size_t j = 0; j < task->n_pkts / task->orig_n_pkts; ++j) {
+		for (size_t i = 0; i < task->orig_n_pkts; ++i) {
+			src = &task->pkt_template_orig[i];
+			dst = &task->pkt_template[j * task->orig_n_pkts + i];
+			dst->len = src->len;
+		}
 	}
 }
 
@@ -910,82 +1374,105 @@ static void task_gen_reset_pkt_templates_content(struct task_gen *task)
 {
 	struct pkt_template *src, *dst;
 
-	for (size_t i = 0; i < task->n_pkts; ++i) {
-		src = &task->pkt_template_orig[i];
-		dst = &task->pkt_template[i];
-		memcpy(dst->buf, src->buf, dst->len);
+	for (size_t j = 0; j < task->n_pkts / task->orig_n_pkts; ++j) {
+		for (size_t i = 0; i < task->orig_n_pkts; ++i) {
+			src = &task->pkt_template_orig[i];
+			dst = &task->pkt_template[j * task->orig_n_pkts + i];
+			memcpy(dst->buf, src->buf, RTE_MAX(src->len, dst->len));
+			if (task->flags & TASK_OVERWRITE_SRC_MAC_WITH_PORT_MAC) {
+				rte_memcpy(&dst->buf[sizeof(prox_rte_ether_addr)], &task->src_mac, sizeof(prox_rte_ether_addr));
+			}
+			task_gen_apply_sig(task, dst);
+		}
 	}
 }
 
 static void task_gen_reset_pkt_templates(struct task_gen *task)
 {
-	task_gen_reset_pkt_templates_len(task);
+	if (task->imix_nb_pkts)
+		task_gen_set_pkt_templates_len(task, task->imix_pkt_sizes);
+	else
+		task_gen_reset_pkt_templates_len(task);
 	task_gen_reset_pkt_templates_content(task);
 	task_gen_pkt_template_recalc_all(task);
 }
 
 static void task_init_gen_load_pkt_inline(struct task_gen *task, struct task_args *targ)
 {
-	const int socket_id = rte_lcore_to_socket_id(targ->lconf->id);
-
-	if (targ->pkt_size > sizeof(task->pkt_template[0].buf))
-		targ->pkt_size = sizeof(task->pkt_template[0].buf);
-	task->n_pkts = 1;
-
-	size_t mem_size = task->n_pkts * sizeof(*task->pkt_template);
-	task->pkt_template = prox_zmalloc(mem_size, socket_id);
-	task->pkt_template_orig = prox_zmalloc(mem_size, socket_id);
+	int rc;
 
-	PROX_PANIC(task->pkt_template == NULL ||
-		   task->pkt_template_orig == NULL,
-		   "Failed to allocate %lu bytes (in huge pages) for pcap file\n", mem_size);
+	task->orig_n_pkts = 1;
+	if (task->imix_nb_pkts == 0) {
+		task->n_pkts = 1;
+		task->imix_pkt_sizes[0] = targ->pkt_size;
+	} else {
+		task->n_pkts = task->imix_nb_pkts;
+	}
+	task_gen_allocate_templates(task, task->orig_n_pkts, task->n_pkts, DO_PANIC, NOT_FROM_PCAP);
 
-	rte_memcpy(task->pkt_template_orig[0].buf, targ->pkt_inline, targ->pkt_size);
-	task->pkt_template_orig[0].len = targ->pkt_size;
+	rte_memcpy(task->pkt_template_orig[0].buf, targ->pkt_inline, task->max_frame_size);
+	task->pkt_template_orig[0].len = task->imix_pkt_sizes[0];
 	task_gen_reset_pkt_templates(task);
-	check_all_pkt_size(task, 1);
-	check_fields_in_bounds(task, task->pkt_template[0].len, 1);
+	check_all_pkt_size(task, DO_PANIC);
+	check_all_fields_in_bounds(task, DO_PANIC);
+
+	// If IMIX was not specified then pkt_size is specified using pkt_size parameter or the length of pkt_inline
+	// In that case, for backward compatibility, we do NOT adapt the length of IP and UDP to the length of the packet
+	task_gen_set_eth_ip_udp_sizes(task, task->orig_n_pkts, task->imix_nb_pkts, task->imix_pkt_sizes);
 }
 
 static void task_init_gen_load_pcap(struct task_gen *task, struct task_args *targ)
 {
-	const int socket_id = rte_lcore_to_socket_id(targ->lconf->id);
 	char err[PCAP_ERRBUF_SIZE];
+	uint32_t max_frame_size;
 	pcap_t *handle = pcap_open_offline(targ->pcap_file, err);
 	PROX_PANIC(handle == NULL, "Failed to open PCAP file: %s\n", err);
 
-	task->n_pkts = pcap_count_pkts(handle);
-	plogx_info("%u packets in pcap file '%s'\n", task->n_pkts, targ->pcap_file);
+	task->orig_n_pkts = pcap_count_pkts(handle, &max_frame_size);
+	plogx_info("%u packets in pcap file '%s'; max frame size=%d\n", task->orig_n_pkts, targ->pcap_file, max_frame_size);
+	PROX_PANIC(max_frame_size > task->max_frame_size,
+		max_frame_size > PROX_RTE_ETHER_MAX_LEN + 2 * PROX_VLAN_TAG_SIZE -4 ?
+			"pkt_size too high and jumbo frames disabled" : "pkt_size > mtu");
 
 	if (targ->n_pkts)
-		task->n_pkts = RTE_MIN(task->n_pkts, targ->n_pkts);
-	PROX_PANIC(task->n_pkts > MAX_TEMPLATE_INDEX, "Too many packets specified in pcap - increase MAX_TEMPLATE_INDEX\n");
+		task->orig_n_pkts = RTE_MIN(task->orig_n_pkts, targ->n_pkts);
+	if (task->imix_nb_pkts == 0) {
+		task->n_pkts = task->orig_n_pkts;
+	} else {
+		task->n_pkts = task->imix_nb_pkts * task->orig_n_pkts;
+	}
+	task_gen_allocate_templates(task, task->orig_n_pkts, task->n_pkts, DO_PANIC, FROM_PCAP);
 	plogx_info("Loading %u packets from pcap\n", task->n_pkts);
-	size_t mem_size = task->n_pkts * sizeof(*task->pkt_template);
-	task->pkt_template = prox_zmalloc(mem_size, socket_id);
-	task->pkt_template_orig = prox_zmalloc(mem_size, socket_id);
-	PROX_PANIC(task->pkt_template == NULL ||
-		   task->pkt_template_orig == NULL,
-		   "Failed to allocate %lu bytes (in huge pages) for pcap file\n", mem_size);
-
-	pcap_read_pkts(handle, targ->pcap_file, task->n_pkts, task->pkt_template_orig, NULL);
+
+	pcap_read_pkts(handle, targ->pcap_file, task->orig_n_pkts, task->pkt_template_orig, NULL, max_frame_size);
 	pcap_close(handle);
 	task_gen_reset_pkt_templates(task);
+	check_all_pkt_size(task, DO_PANIC);
+	check_all_fields_in_bounds(task, DO_PANIC);
+	task_gen_set_eth_ip_udp_sizes(task, task->orig_n_pkts, task->imix_nb_pkts, task->imix_pkt_sizes);
 }
 
-static struct rte_mempool *task_gen_create_mempool(struct task_args *targ)
+static struct rte_mempool *task_gen_create_mempool(struct task_args *targ, uint16_t max_frame_size)
 {
 	static char name[] = "gen_pool";
 	struct rte_mempool *ret;
 	const int sock_id = rte_lcore_to_socket_id(targ->lconf->id);
 
 	name[0]++;
-	ret = rte_mempool_create(name, targ->nb_mbuf - 1, MBUF_SIZE,
+	uint32_t mbuf_size = TX_MBUF_SIZE;
+	if (max_frame_size + (unsigned)sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM > mbuf_size)
+		mbuf_size = max_frame_size + (unsigned)sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM;
+	plog_info("\t\tCreating mempool with name '%s'\n", name);
+	ret = rte_mempool_create(name, targ->nb_mbuf - 1, mbuf_size,
 				 targ->nb_cache_mbuf, sizeof(struct rte_pktmbuf_pool_private),
 				 rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, 0,
 				 sock_id, 0);
 	PROX_PANIC(ret == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n",
 		   sock_id, targ->nb_mbuf - 1);
+
+	plog_info("\t\tMempool %p size = %u * %u cache %u, socket %d\n", ret,
+		targ->nb_mbuf - 1, mbuf_size, targ->nb_cache_mbuf, sock_id);
+
 	return ret;
 }
 
@@ -1001,12 +1488,33 @@ int task_gen_set_pkt_size(struct task_base *tbase, uint32_t pkt_size)
 	struct task_gen *task = (struct task_gen *)tbase;
 	int rc;
 
-	if ((rc = check_pkt_size(task, pkt_size, 0)) != 0)
-		return rc;
-	if ((rc = check_fields_in_bounds(task, pkt_size, 0)) != 0)
-		return rc;
-	task->pkt_template[0].len = pkt_size;
-	return rc;
+	for (size_t i = 0; i < task->n_pkts; ++i) {
+		if ((rc = check_pkt_size(task, pkt_size, 0)) != 0)
+			return rc;
+		if ((rc = check_fields_in_bounds(task, pkt_size, 0)) != 0)
+			return rc;
+	}
+	for (size_t i = 0; i < task->n_pkts; ++i) {
+		task->pkt_template[i].len = pkt_size;
+	}
+	return 0;
+}
+
+int task_gen_set_imix(struct task_base *tbase, uint32_t nb_pkt_sizes, uint32_t *pkt_sizes)
+{
+	struct task_gen *task = (struct task_gen *)tbase;
+	int rc;
+
+	memcpy(task->imix_pkt_sizes, pkt_sizes, nb_pkt_sizes * sizeof(uint32_t));
+	for (size_t i = 0; i < nb_pkt_sizes; ++i) {
+		if ((rc = check_pkt_size(task, pkt_sizes[i], DO_NOT_PANIC)) != 0)
+			return rc;
+		if ((rc = check_fields_in_bounds(task, pkt_sizes[i], DO_NOT_PANIC)) != 0)
+			return rc;
+	}
+	// only set new_imix_nb_pkts if checks of pkt sizes succeeded
+	task->new_imix_nb_pkts = nb_pkt_sizes;
+	return 0;
 }
 
 void task_gen_set_rate(struct task_base *tbase, uint64_t bps)
@@ -1028,10 +1536,20 @@ void task_gen_reset_randoms(struct task_base *tbase)
 	task->n_rands = 0;
 }
 
+void task_gen_reset_ranges(struct task_base *tbase)
+{
+	struct task_gen *task = (struct task_gen *)tbase;
+
+	memset(task->ranges, 0, task->n_ranges * sizeof(struct range));
+	task->n_ranges = 0;
+}
+
 int task_gen_set_value(struct task_base *tbase, uint32_t value, uint32_t offset, uint32_t len)
 {
 	struct task_gen *task = (struct task_gen *)tbase;
 
+	if (offset + len > task->max_frame_size)
+		return -1;
 	for (size_t i = 0; i < task->n_pkts; ++i) {
 		uint32_t to_write = rte_cpu_to_be_32(value) >> ((4 - len) * 8);
 		uint8_t *dst = task->pkt_template[i].buf;
@@ -1049,6 +1567,16 @@ void task_gen_reset_values(struct task_base *tbase)
 	struct task_gen *task = (struct task_gen *)tbase;
 
 	task_gen_reset_pkt_templates_content(task);
+	task_gen_pkt_template_recalc_metadata(task);
+	check_all_pkt_size(task, DO_NOT_PANIC);
+	check_all_fields_in_bounds(task, DO_NOT_PANIC);
+	task_gen_set_eth_ip_udp_sizes(task, task->orig_n_pkts, task->imix_nb_pkts, task->imix_pkt_sizes);
+
+	if (task->flags & TASK_OVERWRITE_SRC_MAC_WITH_PORT_MAC) {
+		for (uint32_t i = 0; i < task->n_pkts; ++i) {
+			rte_memcpy(&task->pkt_template[i].buf[sizeof(prox_rte_ether_addr)], &task->src_mac, sizeof(prox_rte_ether_addr));
+		}
+	}
 }
 
 uint32_t task_gen_get_n_randoms(struct task_base *tbase)
@@ -1058,43 +1586,54 @@ uint32_t task_gen_get_n_randoms(struct task_base *tbase)
 	return task->n_rands;
 }
 
+uint32_t task_gen_get_n_ranges(struct task_base *tbase)
+{
+	struct task_gen *task = (struct task_gen *)tbase;
+
+	return task->n_ranges;
+}
+
 static void init_task_gen_pcap(struct task_base *tbase, struct task_args *targ)
 {
 	struct task_gen_pcap *task = (struct task_gen_pcap *)tbase;
-	const uint32_t sockid = rte_lcore_to_socket_id(targ->lconf->id);
+	task->socket_id = rte_lcore_to_socket_id(targ->lconf->id);
+	uint32_t max_frame_size;
 
 	task->loop = targ->loop;
 	task->pkt_idx = 0;
 	task->hz = rte_get_tsc_hz();
 
-	task->local_mbuf.mempool = task_gen_create_mempool(targ);
-
-	PROX_PANIC(!strcmp(targ->pcap_file, ""), "No pcap file defined\n");
-
 	char err[PCAP_ERRBUF_SIZE];
 	pcap_t *handle = pcap_open_offline(targ->pcap_file, err);
 	PROX_PANIC(handle == NULL, "Failed to open PCAP file: %s\n", err);
 
-	task->n_pkts = pcap_count_pkts(handle);
+	task->n_pkts = pcap_count_pkts(handle, &max_frame_size);
 	plogx_info("%u packets in pcap file '%s'\n", task->n_pkts, targ->pcap_file);
 
+	task->local_mbuf.mempool = task_gen_create_mempool(targ, max_frame_size);
+
+	PROX_PANIC(!strcmp(targ->pcap_file, ""), "No pcap file defined\n");
+
 	if (targ->n_pkts) {
 		plogx_info("Configured to load %u packets\n", targ->n_pkts);
 		if (task->n_pkts > targ->n_pkts)
 			task->n_pkts = targ->n_pkts;
 	}
-	PROX_PANIC(task->n_pkts > MAX_TEMPLATE_INDEX, "Too many packets specified in pcap - increase MAX_TEMPLATE_INDEX\n");
-
 	plogx_info("Loading %u packets from pcap\n", task->n_pkts);
 
 	size_t mem_size = task->n_pkts * (sizeof(*task->proto) + sizeof(*task->proto_tsc));
-	uint8_t *mem = prox_zmalloc(mem_size, sockid);
+	uint8_t *mem = prox_zmalloc(mem_size, task->socket_id);
 
 	PROX_PANIC(mem == NULL, "Failed to allocate %lu bytes (in huge pages) for pcap file\n", mem_size);
 	task->proto = (struct pkt_template *) mem;
 	task->proto_tsc = (uint64_t *)(mem + task->n_pkts * sizeof(*task->proto));
 
-	pcap_read_pkts(handle, targ->pcap_file, task->n_pkts, task->proto, task->proto_tsc);
+	for (uint i = 0; i < targ->n_pkts; i++) {
+		task->proto[i].buf = prox_zmalloc(max_frame_size, task->socket_id);
+		PROX_PANIC(task->proto[i].buf == NULL, "Failed to allocate %u bytes (in huge pages) for pcap file\n", max_frame_size);
+	}
+
+	pcap_read_pkts(handle, targ->pcap_file, task->n_pkts, task->proto, task->proto_tsc, max_frame_size);
 	pcap_close(handle);
 }
 
@@ -1109,6 +1648,26 @@ static int task_gen_find_random_with_offset(struct task_gen *task, uint32_t offs
 	return UINT32_MAX;
 }
 
+int task_gen_add_range(struct task_base *tbase, struct range *range)
+{
+	struct task_gen *task = (struct task_gen *)tbase;
+	if (task->n_ranges == MAX_RANGES) {
+		plog_err("Too many ranges\n");
+		return -1;
+	}
+	task->ranges[task->n_ranges].min = range->min;
+	task->ranges[task->n_ranges].value = range->min;
+	uint32_t m = range->max;
+	task->ranges[task->n_ranges].range_len = 0;
+	while (m != 0) {
+    		m >>= 8;
+    		task->ranges[task->n_ranges].range_len++;
+	}
+	task->ranges[task->n_ranges].offset = range->offset;
+	task->ranges[task->n_ranges++].max = range->max;
+	return 0;
+}
+
 int task_gen_add_rand(struct task_base *tbase, const char *rand_str, uint32_t offset, uint32_t rand_id)
 {
 	struct task_gen *task = (struct task_gen *)tbase;
@@ -1155,18 +1714,7 @@ static void start(struct task_base *tbase)
 	if (tbase->l3.tmaster) {
 		register_all_ip_to_ctrl_plane(task);
 	}
-	if (task->port) {
-		// task->port->link_speed reports the link speed in Mbps e.g. 40k for a 40 Gbps NIC.
-		// task->link_speed reports link speed in Bytes per sec.
-		// It can be 0 if link is down, and must hence be updated in fast path.
-		task->link_speed = task->port->link_speed * 125000L;
-		if (task->link_speed)
-			plog_info("\tPort %u: link speed is %ld Mbps\n",
-				(uint8_t)(task->port - prox_port_cfg), 8 * task->link_speed / 1000000);
-		else
-			plog_info("\tPort %u: link speed is %ld Mbps - link might be down\n",
-				(uint8_t)(task->port - prox_port_cfg), 8 * task->link_speed / 1000000);
-	}
+
 	/* TODO
 	   Handle the case when two tasks transmit to the same port
 	   and one of them is stopped. In that case ARP (requests or replies)
@@ -1177,6 +1725,31 @@ static void start(struct task_base *tbase)
 	*/
 }
 
+static void stop_gen(struct task_base *tbase)
+{
+	uint32_t i, j;
+	struct task_gen *task = (struct task_gen *)tbase;
+	if (task->store_msk) {
+		for (i = task->store_pkt_id & task->store_msk; i < task->store_msk + 1; i++) {
+			if (task->store_buf[i].len) {
+				fprintf(task->fp, "%06d: ", i);
+				for (j = 0; j < task->store_buf[i].len; j++) {
+					fprintf(task->fp, "%02x ", task->store_buf[i].buf[j]);
+				}
+				fprintf(task->fp, "\n");
+			}
+		}
+		for (i = 0; i < (task->store_pkt_id & task->store_msk); i++) {
+			if (task->store_buf[i].len) {
+				fprintf(task->fp, "%06d: ", i);
+				for (j = 0; j < task->store_buf[i].len; j++) {
+					fprintf(task->fp, "%02x ", task->store_buf[i].buf[j]);
+				}
+				fprintf(task->fp, "\n");
+			}
+		}
+	}
+}
 static void start_pcap(struct task_base *tbase)
 {
 	struct task_gen_pcap *task = (struct task_gen_pcap *)tbase;
@@ -1201,16 +1774,29 @@ static void init_task_gen_early(struct task_args *targ)
 static void init_task_gen(struct task_base *tbase, struct task_args *targ)
 {
 	struct task_gen *task = (struct task_gen *)tbase;
+	task->socket_id = rte_lcore_to_socket_id(targ->lconf->id);
 
 	task->packet_id_pos = targ->packet_id_pos;
 
-	task->local_mbuf.mempool = task_gen_create_mempool(targ);
+	struct prox_port_cfg *port = find_reachable_port(targ);
+	// TODO: check that all reachable ports have the same mtu...
+	if (port) {
+		task->cksum_offload = port->requested_tx_offload & (RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | RTE_ETH_TX_OFFLOAD_UDP_CKSUM);
+		task->port = port;
+		task->max_frame_size = port->mtu + PROX_RTE_ETHER_HDR_LEN + 2 * PROX_VLAN_TAG_SIZE;
+	} else {
+		// Not generating to any port...
+		task->max_frame_size = PROX_RTE_ETHER_MAX_LEN;
+	}
+	task->local_mbuf.mempool = task_gen_create_mempool(targ, task->max_frame_size);
 	PROX_PANIC(task->local_mbuf.mempool == NULL, "Failed to create mempool\n");
 	task->pkt_idx = 0;
 	task->hz = rte_get_tsc_hz();
 	task->lat_pos = targ->lat_pos;
 	task->accur_pos = targ->accur_pos;
 	task->sig_pos = targ->sig_pos;
+	task->flow_id_pos = targ->flow_id_pos;
+	task->packet_id_in_flow_pos = targ->packet_id_in_flow_pos;
 	task->sig = targ->sig;
 	task->new_rate_bps = targ->rate_bps;
 
@@ -1241,34 +1827,95 @@ static void init_task_gen(struct task_base *tbase, struct task_args *targ)
 	PROX_PANIC((task->lat_pos || task->accur_pos) && !task->lat_enabled, "lat not enabled by lat pos or accur pos configured\n");
 
 	task->generator_id = targ->generator_id;
-	plog_info("\tGenerator id = %d\n", task->generator_id);
-	task->link_speed = UINT64_MAX;
+	plog_info("\t\tGenerator id = %d\n", task->generator_id);
+
+	// Allocate array holding bytes to tsc for supported frame sizes
+	task->bytes_to_tsc = prox_zmalloc(task->max_frame_size * MAX_PKT_BURST * sizeof(task->bytes_to_tsc[0]), task->socket_id);
+	PROX_PANIC(task->bytes_to_tsc == NULL,
+		"Failed to allocate %u bytes (in huge pages) for bytes_to_tsc\n", task->max_frame_size);
+
+	// task->port->max_link_speed reports the maximum, non negotiated ink speed in Mbps e.g. 40k for a 40 Gbps NIC.
+	// It can be UINT32_MAX (virtual devices or not supported by DPDK < 16.04)
+	uint64_t bytes_per_hz = UINT64_MAX;
+	if ((task->port) && (task->port->max_link_speed != UINT32_MAX)) {
+		bytes_per_hz = task->port->max_link_speed * 125000L;
+		plog_info("\t\tPort %u: max link speed is %ld Mbps\n",
+			(uint8_t)(task->port - prox_port_cfg), 8 * bytes_per_hz / 1000000);
+	}
+	// There are cases where hz estimate might be slighly over-estimated
+	// This results in too much extrapolation
+	// Only account for 99% of extrapolation to handle cases with up to 1% error clocks
+	for (unsigned int i = 0; i < task->max_frame_size * MAX_PKT_BURST ; i++) {
+		if (bytes_per_hz == UINT64_MAX)
+			task->bytes_to_tsc[i] = 0;
+		else
+			task->bytes_to_tsc[i] = (task->hz * i * 0.99) / bytes_per_hz;
+	}
 
+	task->imix_nb_pkts = targ->imix_nb_pkts;
+	for (uint32_t i = 0; i < targ->imix_nb_pkts; i++) {
+		task->imix_pkt_sizes[i] = targ->imix_pkt_sizes[i];
+	}
 	if (!strcmp(targ->pcap_file, "")) {
-		plog_info("\tUsing inline definition of a packet\n");
+		plog_info("\t\tUsing inline definition of a packet\n");
 		task_init_gen_load_pkt_inline(task, targ);
 	} else {
-		plog_info("Loading from pcap %s\n", targ->pcap_file);
+		plog_info("\t\tLoading from pcap %s\n", targ->pcap_file);
 		task_init_gen_load_pcap(task, targ);
 	}
 
-	if ((targ->flags & DSF_KEEP_SRC_MAC) == 0 && (targ->nb_txrings || targ->nb_txports)) {
-		uint8_t *src_addr = prox_port_cfg[tbase->tx_params_hw.tx_port_queue->port].eth_addr.addr_bytes;
+	PROX_PANIC(((targ->nb_txrings == 0) && (targ->nb_txports == 0)), "Gen mode requires a tx ring or a tx port");
+	if ((targ->flags & DSF_KEEP_SRC_MAC) == 0) {
+		task->flags |= TASK_OVERWRITE_SRC_MAC_WITH_PORT_MAC;
+		memcpy(&task->src_mac, &prox_port_cfg[task->base.tx_params_hw.tx_port_queue->port].eth_addr, sizeof(prox_rte_ether_addr));
 		for (uint32_t i = 0; i < task->n_pkts; ++i) {
-			rte_memcpy(&task->pkt_template[i].buf[6], src_addr, 6);
+			rte_memcpy(&task->pkt_template[i].buf[sizeof(prox_rte_ether_addr)], &task->src_mac, sizeof(prox_rte_ether_addr));
 		}
 	}
-	memcpy(&task->src_mac, &prox_port_cfg[task->base.tx_params_hw.tx_port_queue->port].eth_addr, sizeof(struct ether_addr));
 	for (uint32_t i = 0; i < targ->n_rand_str; ++i) {
 		PROX_PANIC(task_gen_add_rand(tbase, targ->rand_str[i], targ->rand_offset[i], UINT32_MAX),
 			   "Failed to add random\n");
 	}
-
-	struct prox_port_cfg *port = find_reachable_port(targ);
-	if (port) {
-		task->cksum_offload = port->capabilities.tx_offload_cksum;
-		task->port = port;
+	for (uint32_t i = 0; i < targ->n_ranges; ++i) {
+		PROX_PANIC(task_gen_add_range(tbase, &targ->range[i]), "Failed to add range\n");
+	}
+	if (targ->store_max) {
+		char filename[256];
+		sprintf(filename, "gen_buf_%02d_%02d", targ->lconf->id, targ->task);
+
+		task->store_msk = targ->store_max - 1;
+		task->store_buf = (struct packet *)malloc(sizeof(struct packet) * targ->store_max);
+		task->fp = fopen(filename, "w+");
+		PROX_PANIC(task->fp == NULL, "Unable to open %s\n", filename);
+	} else {
+		task->store_msk = 0;
 	}
+	uint32_t n_entries = get_n_range_flows(task) * task->orig_n_pkts * 4;
+#ifndef RTE_HASH_BUCKET_ENTRIES
+#define RTE_HASH_BUCKET_ENTRIES	8
+#endif
+	// cuckoo hash requires at least RTE_HASH_BUCKET_ENTRIES (8) entries
+	if (n_entries < RTE_HASH_BUCKET_ENTRIES)
+		n_entries = RTE_HASH_BUCKET_ENTRIES;
+
+	static char hash_name[30];
+	sprintf(hash_name, "A%03d_hash_gen_table", targ->lconf->id);
+	struct rte_hash_parameters hash_params = {
+		.name = hash_name,
+		.entries = n_entries,
+		.key_len = sizeof(union ipv4_5tuple_host),
+		.hash_func = rte_hash_crc,
+		.hash_func_init_val = 0,
+		.socket_id = task->socket_id,
+	};
+	plog_info("\t\thash table name = %s\n", hash_params.name);
+	task->flow_id_table = rte_hash_create(&hash_params);
+	PROX_PANIC(task->flow_id_table == NULL, "Failed to set up flow_id hash table for gen\n");
+	plog_info("\t\tflow_id hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len);
+	build_flow_table(task);
+	task->flows = (struct flows *)prox_zmalloc(n_entries * sizeof(struct flows), task->socket_id);
+	PROX_PANIC(task->flows == NULL, "Failed to allocate flows\n");
+	plog_info("\t\t%d flows allocated\n", n_entries);
 }
 
 static struct task_init task_init_gen = {
@@ -1280,11 +1927,12 @@ static struct task_init task_init_gen = {
 #ifdef SOFT_CRC
 	// For SOFT_CRC, no offload is needed. If both NOOFFLOADS and NOMULTSEGS flags are set the
 	// vector mode is used by DPDK, resulting (theoretically) in higher performance.
-	.flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS | TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS,
+	.flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS,
 #else
 	.flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX,
 #endif
-	.size = sizeof(struct task_gen)
+	.size = sizeof(struct task_gen),
+	.stop_last = stop_gen
 };
 
 static struct task_init task_init_gen_l3 = {
@@ -1297,13 +1945,14 @@ static struct task_init task_init_gen_l3 = {
 #ifdef SOFT_CRC
 	// For SOFT_CRC, no offload is needed. If both NOOFFLOADS and NOMULTSEGS flags are set the
 	// vector mode is used by DPDK, resulting (theoretically) in higher performance.
-	.flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS | TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS,
+	.flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS,
 #else
 	.flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX,
 #endif
 	.size = sizeof(struct task_gen)
 };
 
+/* This mode uses time stamps in the pcap file */
 static struct task_init task_init_gen_pcap = {
 	.mode_str = "gen",
 	.sub_mode_str = "pcap",
@@ -1312,7 +1961,7 @@ static struct task_init task_init_gen_pcap = {
 	.start = start_pcap,
 	.early_init = init_task_gen_early,
 #ifdef SOFT_CRC
-	.flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS | TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS,
+	.flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS,
 #else
 	.flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX,
 #endif