summaryrefslogtreecommitdiffstats
path: root/deploy/cloud/deployment.py
diff options
context:
space:
mode:
authorMichal Skalski <mskalski@mirantis.com>2016-09-20 09:38:32 +0200
committerMichal Skalski <mskalski@mirantis.com>2016-10-04 08:42:24 +0000
commitb295ae64a96d2c74ad562d70849e3d67f8b7c1ee (patch)
treee032b3c30fc33d033a06b3781b455f3ce3364629 /deploy/cloud/deployment.py
parent8e501a8589db2c5f29a43b5fb2d253c431397b4f (diff)
Change the way how we track deployment
Use fuel2 for start deployment. Since it does not return progress use deployment task to provide this information. Currently used 'deploy-changes' will behave the same: https://bugs.launchpad.net/fuel/+bug/1565026 Try to handle situation when nodes temporary go offline. With deploy-changes environment still was in 'new' state in this situtation which causes timeouts from jenkins. JIRA: FUEL-196 Change-Id: I6548a5ec807551388e845044c282b7af32eb9100 Signed-off-by: Michal Skalski <mskalski@mirantis.com> (cherry picked from commit b73526033c5c5b93ca96f411a1d90dc5c49b9228)
Diffstat (limited to 'deploy/cloud/deployment.py')
-rw-r--r--deploy/cloud/deployment.py111
1 files changed, 76 insertions, 35 deletions
diff --git a/deploy/cloud/deployment.py b/deploy/cloud/deployment.py
index 4a9fcd9a8..75bd4ef97 100644
--- a/deploy/cloud/deployment.py
+++ b/deploy/cloud/deployment.py
@@ -9,6 +9,7 @@
import time
import re
+import json
from common import (
N,
@@ -29,9 +30,17 @@ GREP_LINES_OF_LEADING_CONTEXT = 100
GREP_LINES_OF_TRAILING_CONTEXT = 100
LIST_OF_CHAR_TO_BE_ESCAPED = ['[', ']', '"']
-class Deployment(object):
+
+class DeployNotStart(Exception):
+ """Unable to start deployment"""
+
+
+class NodesGoOffline(Exception):
+ """Nodes goes offline during deployment"""
+class Deployment(object):
+
def __init__(self, dea, yaml_config_dir, env_id, node_id_roles_dict,
no_health_check, deploy_timeout):
self.dea = dea
@@ -43,7 +52,6 @@ class Deployment(object):
self.pattern = re.compile(
'\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d')
-
def collect_error_logs(self):
for node_id, roles_blade in self.node_id_roles_dict.iteritems():
log_list = []
@@ -89,7 +97,7 @@ class Deployment(object):
log_msg += details
if log_msg:
- log_list.append(log_msg)
+ log_list.append(log_msg)
if log_list:
role = ('controller' if 'controller' in roles_blade[0]
@@ -99,47 +107,83 @@ class Deployment(object):
for log_msg in log_list:
print(log_msg + '\n')
-
def run_deploy(self):
SLEEP_TIME = 60
- LOG_FILE = 'cloud.log'
+ abort_after = 60 * int(self.deploy_timeout)
+ start = time.time()
log('Starting deployment of environment %s' % self.env_id)
- deploy_proc = run_proc('fuel --env %s deploy-changes | strings > %s'
- % (self.env_id, LOG_FILE))
-
+ deploy_id = None
ready = False
- for i in range(int(self.deploy_timeout)):
- env = parse(exec_cmd('fuel env --env %s' % self.env_id))
- log('Environment status: %s' % env[0][E['status']])
- r, _ = exec_cmd('tail -2 %s | head -1' % LOG_FILE, False)
- if r:
- log(r)
- if env[0][E['status']] == 'operational':
- ready = True
- break
- elif (env[0][E['status']] == 'error'
- or env[0][E['status']] == 'stopped'):
- break
- else:
+ timeout = False
+
+ attempts = 0
+ while attempts < 3:
+ try:
+ if time.time() > start + abort_after:
+ timeout = True
+ break
+ if not deploy_id:
+ deploy_id = self._start_deploy_task()
+ sts, prg, msg = self._deployment_status(deploy_id)
+ if sts == 'error':
+ log('Error during deployment: {}'.format(msg))
+ break
+ if sts == 'running':
+ log('Environmnent deploymnet progress: {}%'.format(prg))
+ elif sts == 'ready':
+ ready = True
+ break
time.sleep(SLEEP_TIME)
-
- if (env[0][E['status']] <> 'operational'
- and env[0][E['status']] <> 'error'
- and env[0][E['status']] <> 'stopped'):
- err('Deployment timed out, environment %s is not operational, snapshot will not be performed'
- % self.env_id, self.collect_logs)
-
- run_proc_wait_terminated(deploy_proc)
- delete(LOG_FILE)
-
+ except (DeployNotStart, NodesGoOffline) as e:
+ log(e)
+ attempts += 1
+ deploy_id = None
+ time.sleep(SLEEP_TIME * attempts)
+
+ if timeout:
+ err('Deployment timed out, environment %s is not operational, '
+ 'snapshot will not be performed'
+ % self.env_id)
if ready:
- log('Environment %s successfully deployed' % self.env_id)
+ log('Environment %s successfully deployed'
+ % self.env_id)
else:
self.collect_error_logs()
err('Deployment failed, environment %s is not operational'
% self.env_id, self.collect_logs)
+ def _start_deploy_task(self):
+ out, _ = exec_cmd('fuel2 env deploy {}'.format(self.env_id), False)
+ id = self._deployment_task_id(out)
+ return id
+
+ def _deployment_task_id(self, response):
+ response = str(response)
+ if response.startswith('Deployment task with id'):
+ for s in response.split():
+ if s.isdigit():
+ return int(s)
+ raise DeployNotStart('Unable to start deployment: {}'.format(response))
+
+ def _deployment_status(self, id):
+ task = self._task_fields(id)
+ if task['status'] == 'error':
+ if task['message'].endswith(
+ 'offline. Remove them from environment and try again.'):
+ raise NodesGoOffline(task['message'])
+ return task['status'], task['progress'], task['message']
+
+ def _task_fields(self, id):
+ try:
+ out, _ = exec_cmd('fuel2 task show {} -f json'.format(id), False)
+ task_info = json.loads(out)
+ properties = {}
+ for d in task_info:
+ properties.update({d['Field']: d['Value']})
+ return properties
+ except ValueError as e:
+ err('Unable to fetch task info: {}'.format(e))
def collect_logs(self):
log('Cleaning out any previous deployment logs')
@@ -155,7 +199,6 @@ class Deployment(object):
r, _ = exec_cmd('tar -czhf /root/deploy-%s.log.tar.gz /var/log/remote' % time.strftime("%Y%m%d-%H%M%S"), False)
log(r)
-
def verify_node_status(self):
node_list = parse(exec_cmd('fuel --env %s node' % self.env_id))
failed_nodes = []
@@ -169,7 +212,6 @@ class Deployment(object):
summary += '[node %s, status %s]\n' % (node, status)
err('Deployment failed: %s' % summary, self.collect_logs)
-
def health_check(self):
log('Now running sanity and smoke health checks')
r = exec_cmd('fuel health --env %s --check sanity,smoke --force' % self.env_id)
@@ -177,7 +219,6 @@ class Deployment(object):
if 'failure' in r:
err('Healthcheck failed!', self.collect_logs)
-
def deploy(self):
self.run_deploy()
self.verify_node_status()