summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJulien Meunier <julien.meunier@6wind.com>2018-03-30 10:27:52 +0200
committerJulien Meunier <julien.meunier@6wind.com>2018-04-10 17:09:14 +0200
commit4ba4c2e82f8fbc59b9992f53201e26049ede44c8 (patch)
tree2de99e59cd0b2b11467ae36ca6f35ed4f18801db
parent66029803af7f3d21a0697e3054b7dc3564bc5e7d (diff)
sysmetrics/pidstat: monitor all threads
Since sysstat commit 52977c479d3d ("Introduce new SP_VALUE_100() macro and use it in pidstat"), pidstat can return a wrong CPU usage for all monitored PIDs. For example, if a process like ovs-vswitchd uses 2 threads, the CPU usage of each thread (TID) is equal to 100%, so the CPU usage of the main process (PID) must be equal at least to 200%. However, with this sysstat commit, the CPU usage is restricted to 100%. It is not possible to change this behavior. Now, pidstat is started in order to monitor all threads created by a process. As the output header has changed, readapt the existing code. In order to fix this issue, only the CPU usage is accumulated with all threads. For all other measures, main process should report correct values. JIRA: VSPERF-569 Change-Id: I98aa94f545d04f4de1b994c420fb5756c6f2a387 Signed-off-by: Julien Meunier <julien.meunier@6wind.com>
-rw-r--r--tools/collectors/sysmetrics/pidstat.py48
-rw-r--r--tools/report/report_rst.jinja4
2 files changed, 43 insertions, 9 deletions
diff --git a/tools/collectors/sysmetrics/pidstat.py b/tools/collectors/sysmetrics/pidstat.py
index 245d8d22..277fdb11 100644
--- a/tools/collectors/sysmetrics/pidstat.py
+++ b/tools/collectors/sysmetrics/pidstat.py
@@ -76,7 +76,7 @@ class Pidstat(collector.ICollector):
with open(self._log, 'w') as logfile:
cmd = ['sudo', 'LC_ALL=' + settings.getValue('DEFAULT_CMD_LOCALE'),
'pidstat', settings.getValue('PIDSTAT_OPTIONS'),
- '-p', ','.join(pids),
+ '-t', '-p', ','.join(pids),
str(settings.getValue('PIDSTAT_SAMPLE_INTERVAL'))]
self._logger.debug('%s', ' '.join(cmd))
self._pid = subprocess.Popen(cmd, stdout=logfile, bufsize=0).pid
@@ -116,16 +116,48 @@ class Pidstat(collector.ICollector):
# combine stored header fields with actual values
tmp_res = OrderedDict(zip(tmp_header,
line[8:].split()))
- # use process's name and its pid as unique key
- key = tmp_res.pop('Command') + '_' + tmp_res['PID']
- # store values for given command into results dict
- if key in self._results:
- self._results[key].update(tmp_res)
- else:
- self._results[key] = tmp_res
+ cmd = tmp_res.pop('Command')
+ # remove unused fields (given by option '-t')
+ tmp_res.pop('UID')
+ tmp_res.pop('TID')
+ if '|_' not in cmd: # main process
+ # use process's name and its pid as unique key
+ tmp_pid = tmp_res.pop('TGID')
+ tmp_key = "%s_%s" % (cmd, tmp_pid)
+ # do not trust cpu usage of pid
+ # see VSPERF-569 for more details
+ if 'CPU' not in tmp_header:
+ self.update_results(tmp_key, tmp_res, False)
+ else: # thread
+ # accumulate cpu usage of all threads
+ if 'CPU' in tmp_header:
+ tmp_res.pop('TGID')
+ self.update_results(tmp_key, tmp_res, True)
line = logfile.readline()
+ def update_results(self, key, result, accumulate=False):
+ """
+ Update final results dictionary. If ``accumulate`` param is set to
+ ``True``, try to accumulate existing values.
+ """
+ # store values for given command into results dict
+ if key not in self._results:
+ self._results[key] = result
+ elif accumulate:
+ for field in result:
+ if field not in self._results[key]:
+ self._results[key][field] = result[field]
+ else:
+ try:
+ val = float(self._results[key][field]) + float(result[field])
+ self._results[key][field] = '{0:.2f}'.format(val)
+ except ValueError:
+ # cannot cast to float, let's update with the previous value
+ self._results[key][field] = result[field]
+ else:
+ self._results[key].update(result)
+
def get_results(self):
"""Returns collected statistics.
"""
diff --git a/tools/report/report_rst.jinja b/tools/report/report_rst.jinja
index eda0c01e..6b51807a 100644
--- a/tools/report/report_rst.jinja
+++ b/tools/report/report_rst.jinja
@@ -90,7 +90,9 @@ Testing Activities/Events
~~~~~~~~~~~~~~~~~~~~~~~~~
pidstat is used to collect the process statistics, as such some values such as
%CPU and %USER maybe > 100% as the values are summed across multiple cores. For
-more info on pidstat please see: http://linux.die.net/man/1/pidstat.
+more info on pidstat please see: http://linux.die.net/man/1/pidstat. Please
+note that vsperf recalculates the CPU consumption of a process by aggregating
+the CPU usage of each thread.
Known issues: Some reported metrics have the value "unkown". These values are
marked unknown as they are not values retrieved from the external tester