aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosep Puigdemont <josep.puigdemont@enea.com>2016-05-06 12:09:58 +0200
committerJosep Puigdemont <josep.puigdemont@gmail.com>2016-06-15 15:00:41 +0000
commit2ea2690762b4a1e547d2ca71674125c7e101d7fd (patch)
tree3a5b43447ab20ca69e5eb8f1aabdee41b2505e98
parent874ca3f24d349ee844bb7339dda34eddb2e91c73 (diff)
ipmi_adapter: simplify, retry if command fails
The method get_node_state has been added to the the IpmiAdapter class. In addition, now the power on/off methods will try several times to perform their IPMI command before giving up, instead of bailing out at the first error. After the power on/off command is completed, the method will wait until the node is in the desired state. NOTE: a command could potentially take several minutes if the defaults are used; each IPMI command can take up to 1 minute, and there can be 3 commands issued per operation, one of them may be retried 20 times with the current defaults. Ideally we would use eventlet or something similar to allow each command a limited time to execute, instead: with eventlet.timeout.Timeout(seconds) as t: power_on/off_command Signed-off-by: Josep Puigdemont <josep.puigdemont@enea.com>
-rw-r--r--deploy/dha_adapters/ipmi_adapter.py101
1 files changed, 42 insertions, 59 deletions
diff --git a/deploy/dha_adapters/ipmi_adapter.py b/deploy/dha_adapters/ipmi_adapter.py
index 4bd6bd378..6ce4012f4 100644
--- a/deploy/dha_adapters/ipmi_adapter.py
+++ b/deploy/dha_adapters/ipmi_adapter.py
@@ -1,5 +1,6 @@
###############################################################################
# Copyright (c) 2015 Ericsson AB and others.
+# (c) 2016 Enea Software AB
# szilard.cserey@ericsson.com
# All rights reserved. This program and the accompanying materials
# are made available under the terms of the Apache License, Version 2.0
@@ -20,8 +21,10 @@ from common import (
class IpmiAdapter(HardwareAdapter):
- def __init__(self, yaml_path):
+ def __init__(self, yaml_path, attempts=20, delay=3):
super(IpmiAdapter, self).__init__(yaml_path)
+ self.attempts = attempts
+ self.delay = delay
def get_access_info(self, node_id):
ip = self.get_node_property(node_id, 'ipmiIp')
@@ -43,69 +46,46 @@ class IpmiAdapter(HardwareAdapter):
mac_list.append(self.get_node_property(node_id, 'pxeMac').lower())
return mac_list
+ def node_get_state(self, node_id):
+ state = exec_cmd('%s chassis power status' % self.ipmi_cmd(node_id),
+ attempts=self.attempts, delay=self.delay,
+ verbose=True)
+ return state
+
+ def _node_power_cmd(self, node_id, cmd):
+ expected = 'Chassis Power is %s' % cmd
+ if self.node_get_state(node_id) == expected:
+ return
+
+ pow_cmd = '%s chassis power %s' % (self.ipmi_cmd(node_id), cmd)
+ exec_cmd(pow_cmd, attempts=self.attempts, delay=self.delay,
+ verbose=True)
+
+ attempts = self.attempts
+ while attempts:
+ state = self.node_get_state(node_id)
+ attempts -= 1
+ if state == expected:
+ return
+ elif attempts != 0:
+ # reinforce our will, but allow the command to fail,
+ # we know our message got across once already...
+ exec_cmd(pow_cmd, check=False)
+
+ err('Could not set chassis %s for node %s' % (cmd, node_id))
+
def node_power_on(self, node_id):
- WAIT_LOOP = 200
- SLEEP_TIME = 3
log('Power ON Node %s' % node_id)
- cmd_prefix = self.ipmi_cmd(node_id)
- state = exec_cmd('%s chassis power status' % cmd_prefix)
- if state == 'Chassis Power is off':
- exec_cmd('%s chassis power on' % cmd_prefix)
- done = False
- for i in range(WAIT_LOOP):
- state, _ = exec_cmd('%s chassis power status' % cmd_prefix,
- False)
- if state == 'Chassis Power is on':
- done = True
- break
- else:
- time.sleep(SLEEP_TIME)
- if not done:
- err('Could Not Power ON Node %s' % node_id)
+ self._node_power_cmd(node_id, 'on')
def node_power_off(self, node_id):
- WAIT_LOOP = 200
- SLEEP_TIME = 3
log('Power OFF Node %s' % node_id)
- cmd_prefix = self.ipmi_cmd(node_id)
- state = exec_cmd('%s chassis power status' % cmd_prefix)
- if state == 'Chassis Power is on':
- done = False
- exec_cmd('%s chassis power off' % cmd_prefix)
- for i in range(WAIT_LOOP):
- state, _ = exec_cmd('%s chassis power status' % cmd_prefix,
- False)
- if state == 'Chassis Power is off':
- done = True
- break
- else:
- time.sleep(SLEEP_TIME)
- if not done:
- err('Could Not Power OFF Node %s' % node_id)
+ self._node_power_cmd(node_id, 'off')
def node_reset(self, node_id):
- WAIT_LOOP = 600
log('RESET Node %s' % node_id)
- cmd_prefix = self.ipmi_cmd(node_id)
- state = exec_cmd('%s chassis power status' % cmd_prefix)
- if state == 'Chassis Power is on':
- was_shut_off = False
- done = False
- exec_cmd('%s chassis power reset' % cmd_prefix)
- for i in range(WAIT_LOOP):
- state, _ = exec_cmd('%s chassis power status' % cmd_prefix,
- False)
- if state == 'Chassis Power is off':
- was_shut_off = True
- elif state == 'Chassis Power is on' and was_shut_off:
- done = True
- break
- time.sleep(1)
- if not done:
- err('Could Not RESET Node %s' % node_id)
- else:
- err('Cannot RESET Node %s because it\'s not Active, state: %s'
- % (node_id, state))
+ cmd = '%s chassis power reset' % self.ipmi_cmd(node_id)
+ exec_cmd(cmd, attempts=self.attempts, delay=self.delay, verbose=True)
def node_set_boot_order(self, node_id, boot_order_list):
log('Set boot order %s on Node %s' % (boot_order_list, node_id))
@@ -114,9 +94,12 @@ class IpmiAdapter(HardwareAdapter):
for dev in boot_order_list:
if dev == 'pxe':
exec_cmd('%s chassis bootdev pxe options=persistent'
- % cmd_prefix)
+ % cmd_prefix, attempts=self.attempts, delay=self.delay,
+ verbose=True)
elif dev == 'iso':
- exec_cmd('%s chassis bootdev cdrom' % cmd_prefix)
+ exec_cmd('%s chassis bootdev cdrom' % cmd_prefix,
+ attempts=self.attempts, delay=self.delay, verbose=True)
elif dev == 'disk':
exec_cmd('%s chassis bootdev disk options=persistent'
- % cmd_prefix)
+ % cmd_prefix, attempts=self.attempts, delay=self.delay,
+ verbose=True)