diff options
Diffstat (limited to 'deploy')
-rw-r--r-- | deploy/README | 6 | ||||
-rw-r--r-- | deploy/cloud/deployment.py | 116 | ||||
-rw-r--r-- | deploy/environments/execution_environment.py | 4 |
3 files changed, 86 insertions, 40 deletions
diff --git a/deploy/README b/deploy/README index 40f95ef92..ee6bc3156 100644 --- a/deploy/README +++ b/deploy/README @@ -13,9 +13,11 @@ the following dependencies and python modules are required to be installed: - for Ubuntu: -sudo apt-get install -y libvirt-bin qemu-kvm python-pip fuseiso mkisofs genisoimage +sudo apt-get update +sudo apt-get install -y libvirt-bin qemu-kvm python-pip fuseiso mkisofs \ +genisoimage ipmitool sudo apt-get install -y python-dev libz-dev libxml2-dev libxslt-dev libyaml-dev -sudo pip install pyyaml netaddr paramiko lxml scp pycrypto ecdsa +sudo pip install pyyaml netaddr paramiko lxml scp pycrypto ecdsa amt During libvirt install the user is added to the libvirtd group, so you have to logout then login back again diff --git a/deploy/cloud/deployment.py b/deploy/cloud/deployment.py index 4a9fcd9a8..ecccc241f 100644 --- a/deploy/cloud/deployment.py +++ b/deploy/cloud/deployment.py @@ -9,6 +9,7 @@ import time import re +import json from common import ( N, @@ -29,8 +30,16 @@ GREP_LINES_OF_LEADING_CONTEXT = 100 GREP_LINES_OF_TRAILING_CONTEXT = 100 LIST_OF_CHAR_TO_BE_ESCAPED = ['[', ']', '"'] -class Deployment(object): +class DeployNotStart(Exception): + """Unable to start deployment""" + + +class NodesGoOffline(Exception): + """Nodes goes offline during deployment""" + + +class Deployment(object): def __init__(self, dea, yaml_config_dir, env_id, node_id_roles_dict, no_health_check, deploy_timeout): @@ -43,7 +52,6 @@ class Deployment(object): self.pattern = re.compile( '\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d') - def collect_error_logs(self): for node_id, roles_blade in self.node_id_roles_dict.iteritems(): log_list = [] @@ -89,7 +97,7 @@ class Deployment(object): log_msg += details if log_msg: - log_list.append(log_msg) + log_list.append(log_msg) if log_list: role = ('controller' if 'controller' in roles_blade[0] @@ -99,47 +107,88 @@ class Deployment(object): for log_msg in log_list: print(log_msg + '\n') - def run_deploy(self): SLEEP_TIME = 60 - LOG_FILE = 'cloud.log' + abort_after = 60 * int(self.deploy_timeout) + start = time.time() log('Starting deployment of environment %s' % self.env_id) - deploy_proc = run_proc('fuel --env %s deploy-changes | strings > %s' - % (self.env_id, LOG_FILE)) - + deploy_id = None ready = False - for i in range(int(self.deploy_timeout)): - env = parse(exec_cmd('fuel env --env %s' % self.env_id)) - log('Environment status: %s' % env[0][E['status']]) - r, _ = exec_cmd('tail -2 %s | head -1' % LOG_FILE, False) - if r: - log(r) - if env[0][E['status']] == 'operational': - ready = True - break - elif (env[0][E['status']] == 'error' - or env[0][E['status']] == 'stopped'): - break - else: + timeout = False + + attempts = 0 + while attempts < 3: + try: + if time.time() > start + abort_after: + timeout = True + break + if not deploy_id: + deploy_id = self._start_deploy_task() + sts, prg, msg = self._deployment_status(deploy_id) + if sts == 'error': + log('Error during deployment: {}'.format(msg)) + break + if sts == 'running': + log('Environmnent deploymnet progress: {}%'.format(prg)) + elif sts == 'ready': + ready = True + break time.sleep(SLEEP_TIME) - - if (env[0][E['status']] <> 'operational' - and env[0][E['status']] <> 'error' - and env[0][E['status']] <> 'stopped'): - err('Deployment timed out, environment %s is not operational, snapshot will not be performed' - % self.env_id, self.collect_logs) - - run_proc_wait_terminated(deploy_proc) - delete(LOG_FILE) - + except (DeployNotStart, NodesGoOffline) as e: + log(e) + attempts += 1 + deploy_id = None + time.sleep(SLEEP_TIME * attempts) + + if timeout: + err('Deployment timed out, environment %s is not operational, ' + 'snapshot will not be performed' + % self.env_id) if ready: - log('Environment %s successfully deployed' % self.env_id) + log('Environment %s successfully deployed' + % self.env_id) else: self.collect_error_logs() err('Deployment failed, environment %s is not operational' % self.env_id, self.collect_logs) + def _start_deploy_task(self): + out, _ = exec_cmd('fuel2 env deploy {}'.format(self.env_id), False) + id = self._deployment_task_id(out) + return id + + def _deployment_task_id(self, response): + response = str(response) + if response.startswith('Deployment task with id'): + for s in response.split(): + if s.isdigit(): + return int(s) + raise DeployNotStart('Unable to start deployment: {}'.format(response)) + + def _deployment_status(self, id): + task = self._task_fields(id) + if task['status'] == 'error': + if task['message'].endswith( + 'offline. Remove them from environment and try again.'): + raise NodesGoOffline(task['message']) + return task['status'], task['progress'], task['message'] + + def _task_fields(self, id): + try: + out, _ = exec_cmd('fuel2 task show {} -f json'.format(id), False) + task_info = json.loads(out) + properties = {} + # for 9.0 this can be list of dicts or dict + # see https://bugs.launchpad.net/fuel/+bug/1625518 + if isinstance(task_info, list): + for d in task_info: + properties.update({d['Field']: d['Value']}) + else: + return task_info + return properties + except ValueError as e: + err('Unable to fetch task info: {}'.format(e)) def collect_logs(self): log('Cleaning out any previous deployment logs') @@ -155,7 +204,6 @@ class Deployment(object): r, _ = exec_cmd('tar -czhf /root/deploy-%s.log.tar.gz /var/log/remote' % time.strftime("%Y%m%d-%H%M%S"), False) log(r) - def verify_node_status(self): node_list = parse(exec_cmd('fuel --env %s node' % self.env_id)) failed_nodes = [] @@ -169,7 +217,6 @@ class Deployment(object): summary += '[node %s, status %s]\n' % (node, status) err('Deployment failed: %s' % summary, self.collect_logs) - def health_check(self): log('Now running sanity and smoke health checks') r = exec_cmd('fuel health --env %s --check sanity,smoke --force' % self.env_id) @@ -177,7 +224,6 @@ class Deployment(object): if 'failure' in r: err('Healthcheck failed!', self.collect_logs) - def deploy(self): self.run_deploy() self.verify_node_status() diff --git a/deploy/environments/execution_environment.py b/deploy/environments/execution_environment.py index af0e130dd..3812902aa 100644 --- a/deploy/environments/execution_environment.py +++ b/deploy/environments/execution_environment.py @@ -46,9 +46,7 @@ class ExecutionEnvironment(object): disk_files.append(source_file) log('Deleting VM %s with disks %s' % (vm_name, disk_files)) exec_cmd('virsh destroy %s' % vm_name, False) - exec_cmd('virsh undefine %s' % vm_name, False) - for file in disk_files: - delete(file) + exec_cmd('virsh undefine --managed-save --remove-all-storage %s' % vm_name, False) def overwrite_xml(self, vm_xml, vm_definition_overwrite): if not vm_definition_overwrite: |