From 259a75b97d735cff6cfc91c12bf84801c9dc992b Mon Sep 17 00:00:00 2001 From: fmenguy Date: Wed, 22 Jul 2020 09:33:16 +0200 Subject: NFVBENCH-172: Add quartiles and 99 percentile latency values Change-Id: If223a47bbb039e4565924a49259c07cede356373 Signed-off-by: fmenguy --- docker/Dockerfile | 5 +- nfvbench/cfg.default.yaml | 4 ++ nfvbench/packet_stats.py | 23 +++++++-- nfvbench/stats_manager.py | 2 +- nfvbench/summarizer.py | 92 +++++++++++++++++++++++++++++------- nfvbench/traffic_client.py | 34 +++++++++++-- nfvbench/traffic_gen/dummy.py | 2 +- nfvbench/traffic_gen/traffic_base.py | 2 +- nfvbench/traffic_gen/trex_gen.py | 33 +++++++++++-- requirements.txt | 1 + 10 files changed, 166 insertions(+), 32 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 2ae8c83..d871acc 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -22,6 +22,9 @@ RUN apt-get update && apt-get install -y \ net-tools \ iproute2 \ libelf1 \ + python3-dev \ + libpython3.6-dev \ + gcc \ && ln -s /usr/bin/python3.6 /usr/local/bin/python3 \ && mkdir -p /opt/trex \ && mkdir /var/log/nfvbench \ @@ -43,7 +46,7 @@ RUN apt-get update && apt-get install -y \ && cp xtesting/testcases.yaml /usr/local/lib/python3.6/dist-packages/xtesting/ci/testcases.yaml \ && python3 ./docker/cleanup_generators.py \ && rm -rf /nfvbench/.git \ - && apt-get remove -y wget git \ + && apt-get remove -y wget git python3-dev libpython3.6-dev gcc \ && apt-get autoremove -y && apt-get clean && rm -rf /var/lib/apt/lists/* ENV TREX_EXT_LIBS "/opt/trex/$TREX_VER/external_libs" diff --git a/nfvbench/cfg.default.yaml b/nfvbench/cfg.default.yaml index 1c4e20b..253e8bc 100755 --- a/nfvbench/cfg.default.yaml +++ b/nfvbench/cfg.default.yaml @@ -348,6 +348,10 @@ mbuf_factor: 0.2 # hdrh is enabled by default and requires TRex v2.58 or higher disable_hdrh: false +# List of latency percentiles values returned using hdrh +# elements should be int or float between 0.0 and 100.0 +lat_percentiles: [25, 75, 99] + # ----------------------------------------------------------------------------- # These variables are not likely to be changed diff --git a/nfvbench/packet_stats.py b/nfvbench/packet_stats.py index 63b1f11..d6b9a68 100644 --- a/nfvbench/packet_stats.py +++ b/nfvbench/packet_stats.py @@ -21,6 +21,7 @@ PacketPathStatsManager manages all packet path stats for all chains. import copy +from hdrh.histogram import HdrHistogram from .traffic_gen.traffic_base import Latency class InterfaceStats(object): @@ -141,7 +142,7 @@ class PacketPathStats(object): chain. """ - def __init__(self, if_stats, aggregate=False): + def __init__(self, config, if_stats, aggregate=False): """Create a packet path stats intance with the list of associated if stats. if_stats: a list of interface stats that compose this packet path stats @@ -150,6 +151,7 @@ class PacketPathStats(object): Aggregate packet path stats are the only one that should show counters for shared interface stats """ + self.config = config self.if_stats = if_stats # latency for packets sent from port 0 and 1 self.latencies = [Latency(), Latency()] @@ -170,7 +172,7 @@ class PacketPathStats(object): ifstats.add_if_stats(pps.if_stats[index]) @staticmethod - def get_agg_packet_path_stats(pps_list): + def get_agg_packet_path_stats(config, pps_list): """Get the aggregated packet path stats from a list of packet path stats. Interface counters are added, latency stats are updated. @@ -179,7 +181,7 @@ class PacketPathStats(object): for pps in pps_list: if agg_pps is None: # Get a clone of the first in the list - agg_pps = PacketPathStats(pps.get_cloned_if_stats(), aggregate=True) + agg_pps = PacketPathStats(config, pps.get_cloned_if_stats(), aggregate=True) else: agg_pps.add_packet_path_stats(pps) # aggregate all latencies @@ -239,6 +241,16 @@ class PacketPathStats(object): 'lat_avg_usec': latency.avg_usec} if latency.hdrh: results['hdrh'] = latency.hdrh + decoded_histogram = HdrHistogram.decode(latency.hdrh) + # override min max and avg from hdrh + results['lat_min_usec'] = decoded_histogram.get_min_value() + results['lat_max_usec'] = decoded_histogram.get_max_value() + results['lat_avg_usec'] = decoded_histogram.get_mean_value() + results['lat_percentile'] = {} + for percentile in self.config.lat_percentiles: + results['lat_percentile'][percentile] = decoded_histogram.\ + get_value_at_percentile(percentile) + else: results = {} results['packets'] = counters @@ -251,12 +263,13 @@ class PacketPathStatsManager(object): Each run will generate packet path stats for 1 or more chains. """ - def __init__(self, pps_list): + def __init__(self, config, pps_list): """Create a packet path stats intance with the list of associated if stats. pps_list: a list of packet path stats indexed by the chain id. All packet path stats must have the same length. """ + self.config = config self.pps_list = pps_list def insert_pps_list(self, chain_index, if_stats): @@ -288,7 +301,7 @@ class PacketPathStatsManager(object): chains = {} # insert the aggregated row if applicable if len(self.pps_list) > 1: - agg_pps = PacketPathStats.get_agg_packet_path_stats(self.pps_list) + agg_pps = PacketPathStats.get_agg_packet_path_stats(self.config, self.pps_list) chains['total'] = agg_pps.get_stats(reverse) for index, pps in enumerate(self.pps_list): diff --git a/nfvbench/stats_manager.py b/nfvbench/stats_manager.py index 5ba3a1a..6fa98bd 100644 --- a/nfvbench/stats_manager.py +++ b/nfvbench/stats_manager.py @@ -35,7 +35,7 @@ class StatsManager(object): if self.config.single_run: pps_list = [] self.traffic_client.insert_interface_stats(pps_list) - self.pps_mgr = PacketPathStatsManager(pps_list) + self.pps_mgr = PacketPathStatsManager(self.config, pps_list) else: self.pps_mgr = None self.worker = None diff --git a/nfvbench/summarizer.py b/nfvbench/summarizer.py index de62a73..326de10 100644 --- a/nfvbench/summarizer.py +++ b/nfvbench/summarizer.py @@ -259,6 +259,12 @@ class NFVBenchSummarizer(Summarizer): self.record_header = None self.record_data = None self.sender = sender + + # add percentiles headers if hdrh enabled + if not self.config.disable_hdrh: + for percentile in self.config.lat_percentiles: + self.ndr_pdr_header.append((str(percentile) + ' %ile lat.', Formatter.standard)) + self.single_run_header.append((str(percentile) + ' %ile lat.', Formatter.standard)) # if sender is available initialize record if self.sender: self.__record_init() @@ -394,7 +400,8 @@ class NFVBenchSummarizer(Summarizer): for frame_size, analysis in list(traffic_result.items()): if frame_size == 'warning': continue - summary_table.add_row([ + + row_data = [ 'NDR', frame_size, analysis['ndr']['rate_bps'], @@ -403,21 +410,32 @@ class NFVBenchSummarizer(Summarizer): analysis['ndr']['stats']['overall']['avg_delay_usec'], analysis['ndr']['stats']['overall']['min_delay_usec'], analysis['ndr']['stats']['overall']['max_delay_usec'] - ]) - self.__record_data_put(frame_size, {'ndr': { + ] + if not self.config.disable_hdrh: + self.extract_hdrh_percentiles( + analysis['ndr']['stats']['overall']['lat_percentile'], row_data) + summary_table.add_row(row_data) + + ndr_data = { 'type': 'NDR', 'rate_bps': analysis['ndr']['rate_bps'], 'rate_pps': analysis['ndr']['rate_pps'], + 'offered_tx_rate_bps': analysis['ndr']['stats']['offered_tx_rate_bps'], 'drop_percentage': analysis['ndr']['stats']['overall']['drop_percentage'], 'avg_delay_usec': analysis['ndr']['stats']['overall']['avg_delay_usec'], 'min_delay_usec': analysis['ndr']['stats']['overall']['min_delay_usec'], 'max_delay_usec': analysis['ndr']['stats']['overall']['max_delay_usec'] - }}) + } + if not self.config.disable_hdrh: + self.extract_hdrh_percentiles( + analysis['ndr']['stats']['overall']['lat_percentile'], ndr_data, True) + self.__record_data_put(frame_size, {'ndr': ndr_data}) if self.config['pdr_run']: for frame_size, analysis in list(traffic_result.items()): if frame_size == 'warning': continue - summary_table.add_row([ + + row_data = [ 'PDR', frame_size, analysis['pdr']['rate_bps'], @@ -426,34 +444,62 @@ class NFVBenchSummarizer(Summarizer): analysis['pdr']['stats']['overall']['avg_delay_usec'], analysis['pdr']['stats']['overall']['min_delay_usec'], analysis['pdr']['stats']['overall']['max_delay_usec'] - ]) - self.__record_data_put(frame_size, {'pdr': { + ] + if not self.config.disable_hdrh: + self.extract_hdrh_percentiles( + analysis['pdr']['stats']['overall']['lat_percentile'], row_data) + summary_table.add_row(row_data) + + pdr_data = { 'type': 'PDR', 'rate_bps': analysis['pdr']['rate_bps'], 'rate_pps': analysis['pdr']['rate_pps'], + 'offered_tx_rate_bps': analysis['pdr']['stats']['offered_tx_rate_bps'], 'drop_percentage': analysis['pdr']['stats']['overall']['drop_percentage'], 'avg_delay_usec': analysis['pdr']['stats']['overall']['avg_delay_usec'], 'min_delay_usec': analysis['pdr']['stats']['overall']['min_delay_usec'], 'max_delay_usec': analysis['pdr']['stats']['overall']['max_delay_usec'] - }}) + } + if not self.config.disable_hdrh: + self.extract_hdrh_percentiles( + analysis['pdr']['stats']['overall']['lat_percentile'], pdr_data, True) + self.__record_data_put(frame_size, {'pdr': pdr_data}) if self.config['single_run']: for frame_size, analysis in list(traffic_result.items()): - summary_table.add_row([ + row_data = [ frame_size, analysis['stats']['overall']['drop_rate_percent'], analysis['stats']['overall']['rx']['avg_delay_usec'], analysis['stats']['overall']['rx']['min_delay_usec'], analysis['stats']['overall']['rx']['max_delay_usec'] - ]) - self.__record_data_put(frame_size, {'single_run': { + ] + if not self.config.disable_hdrh: + self.extract_hdrh_percentiles( + analysis['stats']['overall']['rx']['lat_percentile'], row_data) + summary_table.add_row(row_data) + single_run_data = { 'type': 'single_run', + 'offered_tx_rate_bps': analysis['stats']['offered_tx_rate_bps'], 'drop_rate_percent': analysis['stats']['overall']['drop_rate_percent'], 'avg_delay_usec': analysis['stats']['overall']['rx']['avg_delay_usec'], 'min_delay_usec': analysis['stats']['overall']['rx']['min_delay_usec'], 'max_delay_usec': analysis['stats']['overall']['rx']['max_delay_usec'] - }}) + } + if not self.config.disable_hdrh: + self.extract_hdrh_percentiles( + analysis['stats']['overall']['rx']['lat_percentile'], single_run_data, True) + self.__record_data_put(frame_size, {'single_run': single_run_data}) return summary_table + def extract_hdrh_percentiles(self, lat_percentile, data, add_key=False): + if add_key: + data['lat_percentile'] = {} + for percentile in self.config.lat_percentiles: + if add_key: + data['lat_percentile_' + str(percentile)] = lat_percentile[percentile] + else: + data.append(lat_percentile[percentile]) + def __get_config_table(self, run_config, frame_size): config_table = Table(self.config_header) for key, name in zip(self.direction_keys, self.direction_names): @@ -498,21 +544,35 @@ class NFVBenchSummarizer(Summarizer): _annotate_chain_stats(chains) header = [('Chain', Formatter.standard)] + \ [(ifname, Formatter.standard) for ifname in chain_stats['interfaces']] - # add latency columns if available Avg, Min, Max + # add latency columns if available Avg, Min, Max and percentiles lat_keys = [] lat_map = {'lat_avg_usec': 'Avg lat.', 'lat_min_usec': 'Min lat.', 'lat_max_usec': 'Max lat.'} if 'lat_avg_usec' in chains['0']: - lat_keys = ['lat_avg_usec', 'lat_min_usec', 'lat_max_usec'] - for key in lat_keys: + lat_keys = ['lat_avg_usec', 'lat_min_usec', 'lat_max_usec', 'lat_percentile'] + + if not self.config.disable_hdrh: + for percentile in self.config.lat_percentiles: + lat_map['lat_' + str(percentile) + '_percentile'] = str( + percentile) + ' %ile lat.' + + for key in lat_map: header.append((lat_map[key], Formatter.standard)) table = Table(header) for chain in sorted(list(chains.keys()), key=str): row = [chain] + chains[chain]['packets'] for lat_key in lat_keys: - row.append('{:,} usec'.format(chains[chain][lat_key])) + if chains[chain].get(lat_key, None): + if lat_key == 'lat_percentile': + if not self.config.disable_hdrh: + for percentile in chains[chain][lat_key]: + row.append(Formatter.standard(chains[chain][lat_key][percentile])) + else: + row.append(Formatter.standard(chains[chain][lat_key])) + else: + row.append('--') table.add_row(row) return table diff --git a/nfvbench/traffic_client.py b/nfvbench/traffic_client.py index b70e8f9..89653a8 100755 --- a/nfvbench/traffic_client.py +++ b/nfvbench/traffic_client.py @@ -13,7 +13,6 @@ # under the License. """Interface to the traffic generator clients including NDR/PDR binary search.""" - from math import gcd import socket import struct @@ -21,6 +20,7 @@ import time from attrdict import AttrDict import bitmath +from hdrh.histogram import HdrHistogram from netaddr import IPNetwork # pylint: disable=import-error from trex.stl.api import Ether @@ -919,7 +919,7 @@ class TrafficClient(object): def get_stats(self): """Collect final stats for previous run.""" - stats = self.gen.get_stats() + stats = self.gen.get_stats(self.ifstats) retDict = {'total_tx_rate': stats['total_tx_rate'], 'offered_tx_rate_bps': stats['offered_tx_rate_bps']} @@ -967,6 +967,18 @@ class TrafficClient(object): for key in ['pkt_bit_rate', 'pkt_rate']: for dirc in ['tx', 'rx']: retDict['overall'][dirc][key] /= 2.0 + retDict['overall']['hdrh'] = stats.get('hdrh', None) + if retDict['overall']['hdrh']: + decoded_histogram = HdrHistogram.decode(retDict['overall']['hdrh']) + # override min max and avg from hdrh + retDict['overall']['rx']['min_delay_usec'] = decoded_histogram.get_min_value() + retDict['overall']['rx']['max_delay_usec'] = decoded_histogram.get_max_value() + retDict['overall']['rx']['avg_delay_usec'] = decoded_histogram.get_mean_value() + retDict['overall']['rx']['lat_percentile'] = {} + for percentile in self.config.lat_percentiles: + retDict['overall']['rx']['lat_percentile'][percentile] = \ + decoded_histogram.get_value_at_percentile(percentile) + else: retDict['overall'] = retDict[ports[0]] retDict['overall']['drop_rate_percent'] = self.__get_dropped_rate(retDict['overall']) @@ -997,6 +1009,20 @@ class TrafficClient(object): 'min_delay_usec': interface['rx']['min_delay_usec'], } + if key == 'overall': + stats[key]['hdrh'] = interface.get('hdrh', None) + if stats[key]['hdrh']: + decoded_histogram = HdrHistogram.decode(stats[key]['hdrh']) + # override min max and avg from hdrh + stats[key]['min_delay_usec'] = decoded_histogram.get_min_value() + stats[key]['max_delay_usec'] = decoded_histogram.get_max_value() + stats[key]['avg_delay_usec'] = decoded_histogram.get_mean_value() + stats[key]['lat_percentile'] = {} + for percentile in self.config.lat_percentiles: + stats[key]['lat_percentile'][percentile] = decoded_histogram.\ + get_value_at_percentile(percentile) + + return stats def __targets_found(self, rate, targets, results): @@ -1214,7 +1240,7 @@ class TrafficClient(object): for chain_idx in range(self.config.service_chain_count)] # note that we need to make a copy of the ifs list so that any modification in the # list from pps will not change the list saved in self.ifstats - self.pps_list = [PacketPathStats(list(ifs)) for ifs in self.ifstats] + self.pps_list = [PacketPathStats(self.config, list(ifs)) for ifs in self.ifstats] # insert the corresponding pps in the passed list pps_list.extend(self.pps_list) @@ -1233,7 +1259,7 @@ class TrafficClient(object): ] """ if diff: - stats = self.gen.get_stats() + stats = self.gen.get_stats(self.ifstats) for chain_idx, ifs in enumerate(self.ifstats): # each ifs has exactly 2 InterfaceStats and 2 Latency instances # corresponding to the diff --git a/nfvbench/traffic_gen/dummy.py b/nfvbench/traffic_gen/dummy.py index 272990a..25664e5 100644 --- a/nfvbench/traffic_gen/dummy.py +++ b/nfvbench/traffic_gen/dummy.py @@ -102,7 +102,7 @@ class DummyTG(AbstractTrafficGenerator): def clear_streamblock(self): pass - def get_stats(self): + def get_stats(self, ifstats): """Get stats from current run. The binary search mainly looks at 2 results to make the decision: diff --git a/nfvbench/traffic_gen/traffic_base.py b/nfvbench/traffic_gen/traffic_base.py index 3bff7da..abf5a22 100644 --- a/nfvbench/traffic_gen/traffic_base.py +++ b/nfvbench/traffic_gen/traffic_base.py @@ -82,7 +82,7 @@ class AbstractTrafficGenerator(object): LOG.info('Modified traffic stream for port %s, new rate=%s.', port, self.rates[port_index]) @abc.abstractmethod - def get_stats(self): + def get_stats(self, ifstats): # Must be implemented by sub classes return None diff --git a/nfvbench/traffic_gen/trex_gen.py b/nfvbench/traffic_gen/trex_gen.py index 7220e4b..b35d13f 100644 --- a/nfvbench/traffic_gen/trex_gen.py +++ b/nfvbench/traffic_gen/trex_gen.py @@ -18,6 +18,7 @@ import os import random import time import traceback +from functools import reduce from itertools import count # pylint: disable=import-error @@ -29,6 +30,8 @@ from nfvbench.utils import cast_integer from nfvbench.utils import timeout from nfvbench.utils import TimeoutError +from hdrh.histogram import HdrHistogram + # pylint: disable=import-error from trex.common.services.trex_service_arp import ServiceARP from trex.stl.api import bind_layers @@ -116,7 +119,7 @@ class TRex(AbstractTrafficGenerator): pg_id = port * TRex.PORT_PG_ID_MASK | chain_id return pg_id, pg_id | TRex.LATENCY_PG_ID_MASK - def extract_stats(self, in_stats): + def extract_stats(self, in_stats, ifstats): """Extract stats from dict returned by Trex API. :param in_stats: dict as returned by TRex api @@ -158,6 +161,30 @@ class TRex(AbstractTrafficGenerator): result['offered_tx_rate_bps'] = total_tx_bps result["flow_stats"] = in_stats["flow_stats"] result["latency"] = in_stats["latency"] + + # Merge HDRHistogram to have an overall value for all chains and ports + try: + hdrh_list = [] + if ifstats: + for chain_id, _ in enumerate(ifstats): + for ph in self.port_handle: + _, lat_pg_id = self.get_pg_id(ph, chain_id) + hdrh_list.append( + HdrHistogram.decode(in_stats['latency'][lat_pg_id]['latency']['hdrh'])) + else: + for pg_id in in_stats['latency']: + if pg_id != 'global': + hdrh_list.append( + HdrHistogram.decode(in_stats['latency'][pg_id]['latency']['hdrh'])) + + def add_hdrh(x, y): + x.add(y) + return x + decoded_hdrh = reduce(add_hdrh, hdrh_list) + result["hdrh"] = HdrHistogram.encode(decoded_hdrh).decode('utf-8') + except KeyError: + pass + return result def get_stream_stats(self, trex_stats, if_stats, latencies, chain_idx): @@ -865,10 +892,10 @@ class TRex(AbstractTrafficGenerator): self.client.reset(self.port_handle) LOG.info('Cleared all existing streams') - def get_stats(self): + def get_stats(self, if_stats=None): """Get stats from Trex.""" stats = self.client.get_stats() - return self.extract_stats(stats) + return self.extract_stats(stats, if_stats) def get_macs(self): """Return the Trex local port MAC addresses. diff --git a/requirements.txt b/requirements.txt index 430d70c..ba50911 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,3 +23,4 @@ flask>=0.12 fluent-logger>=0.5.3 netaddr>=0.7.19 xtesting>=0.87.0 +hdrhistogram>=0.8.0 \ No newline at end of file -- cgit 1.2.3-korg