summaryrefslogtreecommitdiffstats
path: root/tools/collectors/sysmetrics/pidstat.py
blob: 277fdb11b17c6ba1be5ba04ab4597d0c930a51b1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# Copyright 2015-2017 Intel Corporation.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""module for statistics collection by pidstat

Provides system statistics collected between calls of start() and stop()
by command line tool pidstat (part of sysstat package)

This requires the following setting in your config:

* PIDSTAT_MONITOR = ['ovs-vswitchd', 'ovsdb-server', 'kvm']
    processes to be monitorred by pidstat

* PIDSTAT_OPTIONS = '-dur'
    options which will be passed to pidstat, i.e. what
    statistics should be collected by pidstat

* LOG_FILE_PIDSTAT = 'pidstat.log'
    log file for pidstat; it defines suffix, which will be added
    to testcase name. Pidstat detailed statistics will be stored separately
    for every testcase.

If this doesn't exist, the application will raise an exception
(EAFP).
"""

import os
import logging
import subprocess
import time
from collections import OrderedDict
from tools import tasks
from tools import systeminfo
from tools.collectors.collector import collector
from conf import settings

_ROOT_DIR = os.path.dirname(os.path.realpath(__file__))

class Pidstat(collector.ICollector):
    """A logger of system statistics based on pidstat

    It collects statistics based on configuration
    """
    _logger = logging.getLogger(__name__)

    def __init__(self, results_dir, test_name):
        """
        Initialize collection of statistics
        """
        self._log = os.path.join(results_dir,
                                 settings.getValue('LOG_FILE_PIDSTAT') +
                                 '_' + test_name + '.log')
        self._results = OrderedDict()
        self._pid = 0

    def start(self):
        """
        Starts collection of statistics by pidstat and stores them
        into the file in directory with test results
        """
        monitor = settings.getValue('PIDSTAT_MONITOR')
        self._logger.info('Statistics are requested for: %s', ', '.join(monitor))
        pids = systeminfo.get_pids(monitor)
        if pids:
            with open(self._log, 'w') as logfile:
                cmd = ['sudo', 'LC_ALL=' + settings.getValue('DEFAULT_CMD_LOCALE'),
                       'pidstat', settings.getValue('PIDSTAT_OPTIONS'),
                       '-t', '-p', ','.join(pids),
                       str(settings.getValue('PIDSTAT_SAMPLE_INTERVAL'))]
                self._logger.debug('%s', ' '.join(cmd))
                self._pid = subprocess.Popen(cmd, stdout=logfile, bufsize=0).pid

    def stop(self):
        """
        Stops collection of statistics by pidstat and stores statistic summary
        for each monitored process into self._results dictionary
        """
        if self._pid:
            self._pid = 0
            # in python3.4 it's not possible to send signal through pid of sudo
            # process, so all pidstat processes are interupted instead
            # as a workaround
            tasks.run_task(['sudo', 'pkill', '--signal', '2', 'pidstat'],
                           self._logger)

        self._logger.info(
            'Pidstat log available at %s', self._log)

        # let's give pidstat some time to write down average summary
        time.sleep(2)

        # parse average values from log file and store them to _results dict
        self._results = OrderedDict()
        logfile = open(self._log, 'r')
        with logfile:
            line = logfile.readline()
            while line:
                line = line.strip()
                # process only lines with summary
                if line[0:7] == 'Average':
                    if line[-7:] == 'Command':
                        # store header fields if detected
                        tmp_header = line[8:].split()
                    else:
                        # combine stored header fields with actual values
                        tmp_res = OrderedDict(zip(tmp_header,
                                                  line[8:].split()))
                        cmd = tmp_res.pop('Command')
                        # remove unused fields (given by option '-t')
                        tmp_res.pop('UID')
                        tmp_res.pop('TID')
                        if '|_' not in cmd:  # main process
                            # use process's name and its pid as unique key
                            tmp_pid = tmp_res.pop('TGID')
                            tmp_key = "%s_%s" % (cmd, tmp_pid)
                            # do not trust cpu usage of pid
                            # see VSPERF-569 for more details
                            if 'CPU' not in tmp_header:
                                self.update_results(tmp_key, tmp_res, False)
                        else:  # thread
                            # accumulate cpu usage of all threads
                            if 'CPU' in tmp_header:
                                tmp_res.pop('TGID')
                                self.update_results(tmp_key, tmp_res, True)

                line = logfile.readline()

    def update_results(self, key, result, accumulate=False):
        """
        Update final results dictionary. If ``accumulate`` param is set to
        ``True``, try to accumulate existing values.
        """
        # store values for given command into results dict
        if key not in self._results:
            self._results[key] = result
        elif accumulate:
            for field in result:
                if field not in self._results[key]:
                    self._results[key][field] = result[field]
                else:
                    try:
                        val = float(self._results[key][field]) + float(result[field])
                        self._results[key][field] = '{0:.2f}'.format(val)
                    except ValueError:
                        # cannot cast to float, let's update with the previous value
                        self._results[key][field] = result[field]
        else:
            self._results[key].update(result)

    def get_results(self):
        """Returns collected statistics.
        """
        return self._results

    def print_results(self):
        """Logs collected statistics.
        """
        for process in self._results:
            logging.info("Process: %s", '_'.join(process.split('_')[:-1]))
            for(key, value) in self._results[process].items():
                logging.info("         Statistic: " + str(key) +
                             ", Value: " + str(value))