From 4ba4c2e82f8fbc59b9992f53201e26049ede44c8 Mon Sep 17 00:00:00 2001 From: Julien Meunier Date: Fri, 30 Mar 2018 10:27:52 +0200 Subject: sysmetrics/pidstat: monitor all threads Since sysstat commit 52977c479d3d ("Introduce new SP_VALUE_100() macro and use it in pidstat"), pidstat can return a wrong CPU usage for all monitored PIDs. For example, if a process like ovs-vswitchd uses 2 threads, the CPU usage of each thread (TID) is equal to 100%, so the CPU usage of the main process (PID) must be equal at least to 200%. However, with this sysstat commit, the CPU usage is restricted to 100%. It is not possible to change this behavior. Now, pidstat is started in order to monitor all threads created by a process. As the output header has changed, readapt the existing code. In order to fix this issue, only the CPU usage is accumulated with all threads. For all other measures, main process should report correct values. JIRA: VSPERF-569 Change-Id: I98aa94f545d04f4de1b994c420fb5756c6f2a387 Signed-off-by: Julien Meunier --- tools/collectors/sysmetrics/pidstat.py | 48 ++++++++++++++++++++++++++++------ tools/report/report_rst.jinja | 4 ++- 2 files changed, 43 insertions(+), 9 deletions(-) diff --git a/tools/collectors/sysmetrics/pidstat.py b/tools/collectors/sysmetrics/pidstat.py index 245d8d22..277fdb11 100644 --- a/tools/collectors/sysmetrics/pidstat.py +++ b/tools/collectors/sysmetrics/pidstat.py @@ -76,7 +76,7 @@ class Pidstat(collector.ICollector): with open(self._log, 'w') as logfile: cmd = ['sudo', 'LC_ALL=' + settings.getValue('DEFAULT_CMD_LOCALE'), 'pidstat', settings.getValue('PIDSTAT_OPTIONS'), - '-p', ','.join(pids), + '-t', '-p', ','.join(pids), str(settings.getValue('PIDSTAT_SAMPLE_INTERVAL'))] self._logger.debug('%s', ' '.join(cmd)) self._pid = subprocess.Popen(cmd, stdout=logfile, bufsize=0).pid @@ -116,16 +116,48 @@ class Pidstat(collector.ICollector): # combine stored header fields with actual values tmp_res = OrderedDict(zip(tmp_header, line[8:].split())) - # use process's name and its pid as unique key - key = tmp_res.pop('Command') + '_' + tmp_res['PID'] - # store values for given command into results dict - if key in self._results: - self._results[key].update(tmp_res) - else: - self._results[key] = tmp_res + cmd = tmp_res.pop('Command') + # remove unused fields (given by option '-t') + tmp_res.pop('UID') + tmp_res.pop('TID') + if '|_' not in cmd: # main process + # use process's name and its pid as unique key + tmp_pid = tmp_res.pop('TGID') + tmp_key = "%s_%s" % (cmd, tmp_pid) + # do not trust cpu usage of pid + # see VSPERF-569 for more details + if 'CPU' not in tmp_header: + self.update_results(tmp_key, tmp_res, False) + else: # thread + # accumulate cpu usage of all threads + if 'CPU' in tmp_header: + tmp_res.pop('TGID') + self.update_results(tmp_key, tmp_res, True) line = logfile.readline() + def update_results(self, key, result, accumulate=False): + """ + Update final results dictionary. If ``accumulate`` param is set to + ``True``, try to accumulate existing values. + """ + # store values for given command into results dict + if key not in self._results: + self._results[key] = result + elif accumulate: + for field in result: + if field not in self._results[key]: + self._results[key][field] = result[field] + else: + try: + val = float(self._results[key][field]) + float(result[field]) + self._results[key][field] = '{0:.2f}'.format(val) + except ValueError: + # cannot cast to float, let's update with the previous value + self._results[key][field] = result[field] + else: + self._results[key].update(result) + def get_results(self): """Returns collected statistics. """ diff --git a/tools/report/report_rst.jinja b/tools/report/report_rst.jinja index eda0c01e..6b51807a 100644 --- a/tools/report/report_rst.jinja +++ b/tools/report/report_rst.jinja @@ -90,7 +90,9 @@ Testing Activities/Events ~~~~~~~~~~~~~~~~~~~~~~~~~ pidstat is used to collect the process statistics, as such some values such as %CPU and %USER maybe > 100% as the values are summed across multiple cores. For -more info on pidstat please see: http://linux.die.net/man/1/pidstat. +more info on pidstat please see: http://linux.die.net/man/1/pidstat. Please +note that vsperf recalculates the CPU consumption of a process by aggregating +the CPU usage of each thread. Known issues: Some reported metrics have the value "unkown". These values are marked unknown as they are not values retrieved from the external tester -- cgit 1.2.3-korg