diff options
author | mbeierl <mark.beierl@dell.com> | 2017-01-26 11:30:26 -0500 |
---|---|---|
committer | mbeierl <mark.beierl@dell.com> | 2017-01-26 11:45:50 -0500 |
commit | 3d41a65738c44c2859ec9bdba11a0c0714b62b14 (patch) | |
tree | dd3401b659272b77543702d3baeac8d02fa9a9a8 /storperf | |
parent | 29cab6cd9d6e669c74a1dd6960aba8250f539c2f (diff) |
Hardening FIO interaction
Fixes a problem where FIO does not terminate by scheduling a
second killall if we get a specific message back from FIO
stderr.
Introduces a new flavor for StorPerf that has a little more
memory as larger memory maps for duplicate blocks sometimes
caused out of memory killer to be invoked.
Change-Id: I06856561ad73fef582a81d4136a36a1bea47654a
JIRA: STORPERF-99
Signed-off-by: mbeierl <mark.beierl@dell.com>
Diffstat (limited to 'storperf')
-rw-r--r-- | storperf/fio/fio_invoker.py | 41 | ||||
-rw-r--r-- | storperf/resources/hot/agent-group.yaml | 4 | ||||
-rw-r--r-- | storperf/resources/hot/storperf-agent.yaml | 4 | ||||
-rw-r--r-- | storperf/utilities/data_handler.py | 11 |
4 files changed, 31 insertions, 29 deletions
diff --git a/storperf/fio/fio_invoker.py b/storperf/fio/fio_invoker.py index 2febf25..a201802 100644 --- a/storperf/fio/fio_invoker.py +++ b/storperf/fio/fio_invoker.py @@ -9,7 +9,6 @@ import json import logging -import subprocess from threading import Thread import paramiko @@ -65,7 +64,7 @@ class FIOInvoker(object): "Event listener callback complete") except Exception, e: self.logger.error("Error parsing JSON: %s", e) - except ValueError: + except IOError: pass # We might have read from the closed socket, ignore it stdout.close() @@ -76,6 +75,14 @@ class FIOInvoker(object): for line in iter(stderr.readline, b''): self.logger.error("FIO Error: %s", line.rstrip()) + # Sometime, FIO gets stuck and will give us this message: + # fio: job 'sequential_read' hasn't exited in 60 seconds, + # it appears to be stuck. Doing forceful exit of this job. + # A second killall of fio will release it stuck process. + + if 'it appears to be stuck' in line: + self.terminate() + stderr.close() self.logger.debug("Finished") @@ -121,24 +128,22 @@ class FIOInvoker(object): def terminate(self): self.logger.debug("Terminating fio on " + self.remote_host) - cmd = ['ssh', '-o', 'StrictHostKeyChecking=no', - '-o', 'UserKnownHostsFile=/dev/null', - '-o', 'LogLevel=error', - '-i', 'storperf/resources/ssh/storperf_rsa', - 'storperf@' + self.remote_host, - 'sudo', 'killall', '-9', 'fio'] - kill_process = subprocess.Popen(cmd, - universal_newlines=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + ssh = paramiko.SSHClient() + ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + ssh.connect(self.remote_host, username='storperf', + key_filename='storperf/resources/ssh/storperf_rsa', + timeout=2) - for line in iter(kill_process.stdout.readline, b''): - self.logger.debug("FIO Termination: " + line) + command = "sudo killall fio" - kill_process.stdout.close() + self.logger.debug("Executing on %s: %s" % (self.remote_host, command)) + (_, stdout, stderr) = ssh.exec_command(command) - for line in iter(kill_process.stderr.readline, b''): - self.logger.debug("FIO Termination: " + line) + for line in stdout.readlines(): + self.logger.debug(line.strip()) + for line in stderr.readlines(): + self.logger.error(line.strip()) - kill_process.stderr.close() + stdout.close() + stderr.close() diff --git a/storperf/resources/hot/agent-group.yaml b/storperf/resources/hot/agent-group.yaml index fc98c23..a06c847 100644 --- a/storperf/resources/hot/agent-group.yaml +++ b/storperf/resources/hot/agent-group.yaml @@ -16,7 +16,7 @@ parameters: - custom_constraint: neutron.network flavor: type: string - default: "m1.small" + default: "storperf" agent_image: type: string default: 'StorPerf Ubuntu 14.04' @@ -38,7 +38,7 @@ parameters: resources: slaves: type: OS::Heat::ResourceGroup - depends_on: [storperf_subnet, storperf_network_router_interface, + depends_on: [storperf_subnet, storperf_network_router_interface, storperf_open_security_group, storperf_key_pair] properties: count: {get_param: agent_count} diff --git a/storperf/resources/hot/storperf-agent.yaml b/storperf/resources/hot/storperf-agent.yaml index 587b6d8..7bf8b4d 100644 --- a/storperf/resources/hot/storperf-agent.yaml +++ b/storperf/resources/hot/storperf-agent.yaml @@ -12,7 +12,7 @@ heat_template_version: 2013-05-23 parameters: flavor: type: string - default: m1.small + default: storperf image: type: string default: 'Ubuntu 16.04' @@ -96,4 +96,4 @@ resources: outputs: storperf_agent_ip: description: The floating IP address of the agent on the public network - value: { get_attr: [ storperf_floating_ip, floating_ip_address ] }
\ No newline at end of file + value: { get_attr: [ storperf_floating_ip, floating_ip_address ] } diff --git a/storperf/utilities/data_handler.py b/storperf/utilities/data_handler.py index 0aae3b1..2d4194a 100644 --- a/storperf/utilities/data_handler.py +++ b/storperf/utilities/data_handler.py @@ -24,7 +24,7 @@ class DataHandler(object): def __init__(self): self.logger = logging.getLogger(__name__) - self.samples = 11 + self.samples = 10 """ """ @@ -116,12 +116,9 @@ class DataHandler(object): self.logger.debug("Data series: %s" % data_series) if len(data_series) == 0: return False - earliest_timestamp = data_series[0][0] - latest_timestamp = data_series[-1][0] - duration = latest_timestamp - earliest_timestamp - if (duration < 60 * self.samples): - self.logger.debug("Only %s minutes of samples, ignoring" % - ((duration / 60 + 1),)) + number_of_samples = len(data_series) + if (number_of_samples < self.samples): + self.logger.debug("Only %s samples, ignoring" % number_of_samples) return False return SteadyState.steady_state(data_series) |