aboutsummaryrefslogtreecommitdiffstats
path: root/deploy
diff options
context:
space:
mode:
authorMichal Skalski <mskalski@mirantis.com>2016-09-20 09:38:32 +0200
committerMichal Skalski <mskalski@mirantis.com>2016-09-23 00:50:09 +0200
commitb73526033c5c5b93ca96f411a1d90dc5c49b9228 (patch)
tree122d1f0fa57b6e6e1b2fc4f7e99a608b8eab452f /deploy
parent1d4f96facadd5458a87a4d68b0b5298ca47cd1fd (diff)
Change the way how we track deployment
Use fuel2 for start deployment. Since it does not return progress use deployment task to provide this information. Currently used 'deploy-changes' will behave the same: https://bugs.launchpad.net/fuel/+bug/1565026 Try to handle situation when nodes temporary go offline. With deploy-changes environment still was in 'new' state in this situtation which causes timeouts from jenkins. JIRA: FUEL-196 Change-Id: I6548a5ec807551388e845044c282b7af32eb9100 Signed-off-by: Michal Skalski <mskalski@mirantis.com>
Diffstat (limited to 'deploy')
-rw-r--r--deploy/cloud/deployment.py111
1 files changed, 76 insertions, 35 deletions
diff --git a/deploy/cloud/deployment.py b/deploy/cloud/deployment.py
index 4a9fcd9a8..75bd4ef97 100644
--- a/deploy/cloud/deployment.py
+++ b/deploy/cloud/deployment.py
@@ -9,6 +9,7 @@
import time
import re
+import json
from common import (
N,
@@ -29,9 +30,17 @@ GREP_LINES_OF_LEADING_CONTEXT = 100
GREP_LINES_OF_TRAILING_CONTEXT = 100
LIST_OF_CHAR_TO_BE_ESCAPED = ['[', ']', '"']
-class Deployment(object):
+
+class DeployNotStart(Exception):
+ """Unable to start deployment"""
+
+
+class NodesGoOffline(Exception):
+ """Nodes goes offline during deployment"""
+class Deployment(object):
+
def __init__(self, dea, yaml_config_dir, env_id, node_id_roles_dict,
no_health_check, deploy_timeout):
self.dea = dea
@@ -43,7 +52,6 @@ class Deployment(object):
self.pattern = re.compile(
'\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d')
-
def collect_error_logs(self):
for node_id, roles_blade in self.node_id_roles_dict.iteritems():
log_list = []
@@ -89,7 +97,7 @@ class Deployment(object):
log_msg += details
if log_msg:
- log_list.append(log_msg)
+ log_list.append(log_msg)
if log_list:
role = ('controller' if 'controller' in roles_blade[0]
@@ -99,47 +107,83 @@ class Deployment(object):
for log_msg in log_list:
print(log_msg + '\n')
-
def run_deploy(self):
SLEEP_TIME = 60
- LOG_FILE = 'cloud.log'
+ abort_after = 60 * int(self.deploy_timeout)
+ start = time.time()
log('Starting deployment of environment %s' % self.env_id)
- deploy_proc = run_proc('fuel --env %s deploy-changes | strings > %s'
- % (self.env_id, LOG_FILE))
-
+ deploy_id = None
ready = False
- for i in range(int(self.deploy_timeout)):
- env = parse(exec_cmd('fuel env --env %s' % self.env_id))
- log('Environment status: %s' % env[0][E['status']])
- r, _ = exec_cmd('tail -2 %s | head -1' % LOG_FILE, False)
- if r:
- log(r)
- if env[0][E['status']] == 'operational':
- ready = True
- break
- elif (env[0][E['status']] == 'error'
- or env[0][E['status']] == 'stopped'):
- break
- else:
+ timeout = False
+
+ attempts = 0
+ while attempts < 3:
+ try:
+ if time.time() > start + abort_after:
+ timeout = True
+ break
+ if not deploy_id:
+ deploy_id = self._start_deploy_task()
+ sts, prg, msg = self._deployment_status(deploy_id)
+ if sts == 'error':
+ log('Error during deployment: {}'.format(msg))
+ break
+ if sts == 'running':
+ log('Environmnent deploymnet progress: {}%'.format(prg))
+ elif sts == 'ready':
+ ready = True
+ break
time.sleep(SLEEP_TIME)
-
- if (env[0][E['status']] <> 'operational'
- and env[0][E['status']] <> 'error'
- and env[0][E['status']] <> 'stopped'):
- err('Deployment timed out, environment %s is not operational, snapshot will not be performed'
- % self.env_id, self.collect_logs)
-
- run_proc_wait_terminated(deploy_proc)
- delete(LOG_FILE)
-
+ except (DeployNotStart, NodesGoOffline) as e:
+ log(e)
+ attempts += 1
+ deploy_id = None
+ time.sleep(SLEEP_TIME * attempts)
+
+ if timeout:
+ err('Deployment timed out, environment %s is not operational, '
+ 'snapshot will not be performed'
+ % self.env_id)
if ready:
- log('Environment %s successfully deployed' % self.env_id)
+ log('Environment %s successfully deployed'
+ % self.env_id)
else:
self.collect_error_logs()
err('Deployment failed, environment %s is not operational'
% self.env_id, self.collect_logs)
+ def _start_deploy_task(self):
+ out, _ = exec_cmd('fuel2 env deploy {}'.format(self.env_id), False)
+ id = self._deployment_task_id(out)
+ return id
+
+ def _deployment_task_id(self, response):
+ response = str(response)
+ if response.startswith('Deployment task with id'):
+ for s in response.split():
+ if s.isdigit():
+ return int(s)
+ raise DeployNotStart('Unable to start deployment: {}'.format(response))
+
+ def _deployment_status(self, id):
+ task = self._task_fields(id)
+ if task['status'] == 'error':
+ if task['message'].endswith(
+ 'offline. Remove them from environment and try again.'):
+ raise NodesGoOffline(task['message'])
+ return task['status'], task['progress'], task['message']
+
+ def _task_fields(self, id):
+ try:
+ out, _ = exec_cmd('fuel2 task show {} -f json'.format(id), False)
+ task_info = json.loads(out)
+ properties = {}
+ for d in task_info:
+ properties.update({d['Field']: d['Value']})
+ return properties
+ except ValueError as e:
+ err('Unable to fetch task info: {}'.format(e))
def collect_logs(self):
log('Cleaning out any previous deployment logs')
@@ -155,7 +199,6 @@ class Deployment(object):
r, _ = exec_cmd('tar -czhf /root/deploy-%s.log.tar.gz /var/log/remote' % time.strftime("%Y%m%d-%H%M%S"), False)
log(r)
-
def verify_node_status(self):
node_list = parse(exec_cmd('fuel --env %s node' % self.env_id))
failed_nodes = []
@@ -169,7 +212,6 @@ class Deployment(object):
summary += '[node %s, status %s]\n' % (node, status)
err('Deployment failed: %s' % summary, self.collect_logs)
-
def health_check(self):
log('Now running sanity and smoke health checks')
r = exec_cmd('fuel health --env %s --check sanity,smoke --force' % self.env_id)
@@ -177,7 +219,6 @@ class Deployment(object):
if 'failure' in r:
err('Healthcheck failed!', self.collect_logs)
-
def deploy(self):
self.run_deploy()
self.verify_node_status()