diff options
Diffstat (limited to 'deploy/cloud/deployment.py')
-rw-r--r-- | deploy/cloud/deployment.py | 230 |
1 files changed, 0 insertions, 230 deletions
diff --git a/deploy/cloud/deployment.py b/deploy/cloud/deployment.py deleted file mode 100644 index 4329a4cec..000000000 --- a/deploy/cloud/deployment.py +++ /dev/null @@ -1,230 +0,0 @@ -############################################################################### -# Copyright (c) 2015 Ericsson AB and others. -# szilard.cserey@ericsson.com -# All rights reserved. This program and the accompanying materials -# are made available under the terms of the Apache License, Version 2.0 -# which accompanies this distribution, and is available at -# http://www.apache.org/licenses/LICENSE-2.0 -############################################################################### - -import time -import re -import json - -from common import ( - N, - exec_cmd, - parse, - err, - log, -) - -SEARCH_TEXT = '(err)' -LOG_FILE = '/var/log/puppet.log' -GREP_LINES_OF_LEADING_CONTEXT = 100 -GREP_LINES_OF_TRAILING_CONTEXT = 100 -LIST_OF_CHAR_TO_BE_ESCAPED = ['[', ']', '"'] -ERROR_MSGS = ['Critical nodes are not available for deployment', - 'offline. Remove them from environment and try again.', - 'Task[move_to_bootstrap/', - 'Failed tasks: Task[connectivity-checker/'] - - -class DeployNotStart(Exception): - """Unable to start deployment""" - - -class NodesGoOffline(Exception): - """Nodes goes offline during deployment""" - - -class Deployment(object): - - def __init__(self, dea, yaml_config_dir, env_id, node_id_roles_dict, - no_health_check, deploy_timeout): - self.dea = dea - self.yaml_config_dir = yaml_config_dir - self.env_id = env_id - self.node_id_roles_dict = node_id_roles_dict - self.no_health_check = no_health_check - self.deploy_timeout = deploy_timeout - self.pattern = re.compile( - '\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d') - - def collect_error_logs(self): - for node_id, roles_blade in self.node_id_roles_dict.iteritems(): - log_list = [] - cmd = ('ssh -q node-%s grep \'"%s"\' %s' - % (node_id, SEARCH_TEXT, LOG_FILE)) - results, _ = exec_cmd(cmd, False) - for result in results.splitlines(): - log_msg = '' - - sub_cmd = '"%s" %s' % (result, LOG_FILE) - for c in LIST_OF_CHAR_TO_BE_ESCAPED: - sub_cmd = sub_cmd.replace(c, '\%s' % c) - grep_cmd = ('grep -B%s %s' - % (GREP_LINES_OF_LEADING_CONTEXT, sub_cmd)) - cmd = ('ssh -q node-%s "%s"' % (node_id, grep_cmd)) - - details, _ = exec_cmd(cmd, False) - details_list = details.splitlines() - - found_prev_log = False - for i in range(len(details_list) - 2, -1, -1): - if self.pattern.match(details_list[i]): - found_prev_log = True - break - if found_prev_log: - log_msg += '\n'.join(details_list[i:-1]) + '\n' - - grep_cmd = ('grep -A%s %s' - % (GREP_LINES_OF_TRAILING_CONTEXT, sub_cmd)) - cmd = ('ssh -q node-%s "%s"' % (node_id, grep_cmd)) - - details, _ = exec_cmd(cmd, False) - details_list = details.splitlines() - - found_next_log = False - for i in range(1, len(details_list)): - if self.pattern.match(details_list[i]): - found_next_log = True - break - if found_next_log: - log_msg += '\n'.join(details_list[:i]) - else: - log_msg += details - - if log_msg: - log_list.append(log_msg) - - if log_list: - role = ('controller' if 'controller' in roles_blade[0] - else 'compute host') - log('_' * 40 + 'Errors in node-%s %s' % (node_id, role) - + '_' * 40) - for log_msg in log_list: - print(log_msg + '\n') - - def run_deploy(self): - SLEEP_TIME = 60 - abort_after = 60 * int(self.deploy_timeout) - start = time.time() - - log('Starting deployment of environment %s' % self.env_id) - deploy_id = None - ready = False - timeout = False - - attempts = 5 - while attempts > 0: - try: - if time.time() > start + abort_after: - timeout = True - break - if not deploy_id: - deploy_id = self._start_deploy_task() - sts, prg, msg = self._deployment_status(deploy_id) - if sts == 'error': - log('Error during deployment: {}'.format(msg)) - break - if sts == 'running': - log('Environment deployment progress: {}%'.format(prg)) - elif sts == 'ready': - ready = True - break - time.sleep(SLEEP_TIME) - except (DeployNotStart, NodesGoOffline) as e: - log(e) - attempts -= 1 - deploy_id = None - time.sleep(SLEEP_TIME * attempts) - - if timeout: - err('Deployment timed out, environment %s is not operational, ' - 'snapshot will not be performed' - % self.env_id) - if ready: - log('Environment %s successfully deployed' - % self.env_id) - else: - self.collect_error_logs() - err('Deployment failed, environment %s is not operational' - % self.env_id, self.collect_logs) - - def _start_deploy_task(self): - out, _ = exec_cmd('fuel2 env deploy {}'.format(self.env_id), False) - id = self._deployment_task_id(out) - return id - - def _deployment_task_id(self, response): - response = str(response) - if response.startswith('Deployment task with id'): - for s in response.split(): - if s.isdigit(): - return int(s) - raise DeployNotStart('Unable to start deployment: {}'.format(response)) - - def _deployment_status(self, id): - task = self._task_fields(id) - if task['status'] == 'error': - if any(msg in task['message'] for msg in ERROR_MSGS): - raise NodesGoOffline(task['message']) - return task['status'], task['progress'], task['message'] - - def _task_fields(self, id): - try: - out, _ = exec_cmd('fuel2 task show {} -f json'.format(id), False) - task_info = json.loads(out) - properties = {} - # for 9.0 this can be list of dicts or dict - # see https://bugs.launchpad.net/fuel/+bug/1625518 - if isinstance(task_info, list): - for d in task_info: - properties.update({d['Field']: d['Value']}) - else: - return task_info - return properties - except ValueError as e: - err('Unable to fetch task info: {}'.format(e)) - - def collect_logs(self): - log('Cleaning out any previous deployment logs') - exec_cmd('rm -f /var/log/remote/fuel-snapshot-*', False) - exec_cmd('rm -f /root/deploy-*', False) - log('Generating Fuel deploy snap-shot') - if exec_cmd('fuel snapshot < /dev/null &> snapshot.log', False)[1] != 0: - log('Could not create a Fuel snapshot') - else: - exec_cmd('mv /root/fuel-snapshot* /var/log/remote/', False) - - log('Collecting all Fuel Snapshot & deploy log files') - r, _ = exec_cmd('tar -czhf /root/deploy-%s.log.tar.gz /var/log/remote' % time.strftime("%Y%m%d-%H%M%S"), False) - log(r) - - def verify_node_status(self): - node_list = parse(exec_cmd('fuel --env %s node' % self.env_id)) - failed_nodes = [] - for node in node_list: - if node[N['status']] != 'ready': - failed_nodes.append((node[N['id']], node[N['status']])) - - if failed_nodes: - summary = '' - for node, status in failed_nodes: - summary += '[node %s, status %s]\n' % (node, status) - err('Deployment failed: %s' % summary, self.collect_logs) - - def health_check(self): - log('Now running sanity and smoke health checks') - r = exec_cmd('fuel health --env %s --check sanity,smoke --force' % self.env_id) - log(r) - if 'failure' in r: - err('Healthcheck failed!', self.collect_logs) - - def deploy(self): - self.run_deploy() - self.verify_node_status() - if not self.no_health_check: - self.health_check() - self.collect_logs() |