From 1fc07d1d0ad750e6d1049f5b763320db2de1b396 Mon Sep 17 00:00:00 2001 From: Jonas Bjurel Date: Wed, 11 May 2016 13:38:31 +0200 Subject: Introducing collection of all fuel and stack deployment logs. The purpose of this patch is to collect all available Fuel snapshots- and stack/node ldeployment logs for later off-line troubleshooting. The intention is that Jenkins, or other deployment robots will be able to collect all logs from the deployment and store it at some repository where developers can fetch it and perform off-line post deployment trouble-shooting. Following script arguments have been added: CI Arg changes: Added an argument to ci/deploy.sh: -L [Deploy log path and file name], E.g. -L ~/jenkins/deploy/deploy-888.log.tar.gz This will create an tar gzip archive at the path and filename pointed out. If -L is not specified, the log archive will be placed under the CI directory with the following name convention: deploy-YYMMDD-HHMMSS.log.tar.gz Fuel Internal deploy changes: Added an argument to ci/deploy.py -log [Deploy log path and file name], E.g. -log ~/jenkins/deploy/deploy-888.log.tar.gz This will create an tar gzip archive at the path and filename pointed out. If -log is not specified, the log archive will be placed under the CI directory with the following name convention: deploy-YYMMDD-HHMMSS.log.tar.gz READY TO MERGE! VERIFIED! Change-Id: Icb75d9d2e66bdd47f75dcca29071943444d5c823 Signed-off-by: Jonas Bjurel --- ci/deploy.sh | 11 +++++++--- deploy/README | 18 ++++++++-------- deploy/cloud/deployment.py | 52 +++++++++++++++++++++++++++++++++------------- deploy/common.py | 15 ++++++++++++- deploy/deploy.py | 15 ++++++++++--- deploy/deploy_env.py | 14 +++++++++++-- deploy/ssh_client.py | 4 ++-- 7 files changed, 95 insertions(+), 34 deletions(-) diff --git a/ci/deploy.sh b/ci/deploy.sh index dc13f1c77..c7a1d1858 100755 --- a/ci/deploy.sh +++ b/ci/deploy.sh @@ -43,6 +43,7 @@ OPTIONS: -h Print this message and exit -H No health check -l Lab-name + -L Deployment log path and file name -p Pod-name -s Deploy-scenario short-name/base-file-name -S Storage dir for VM images @@ -66,6 +67,7 @@ Input parameters to the build script is: -h Print this message and exit -H Do not run fuel built in health-check after successfull deployment -l Lab name as defined in the configuration directory, e.g. lf +-L Deployment log path and name, eg. -L /home/jenkins/logs/job888.log.tar.gz -p POD name as defined in the configuration directory, e.g. pod-1 -s Deployment-scenario, this points to a deployment/test scenario file as defined in the configuration directory: @@ -118,7 +120,7 @@ DRY_RUN=0 ############################################################################ # BEGIN of main # -while getopts "b:B:dfFHl:p:s:S:i:he" OPTION +while getopts "b:B:dfFHl:L:p:s:S:i:he" OPTION do case $OPTION in b) @@ -155,6 +157,9 @@ do l) TARGET_LAB=${OPTARG} ;; + L) + DEPLOY_LOG="-log ${OPTARG}" + ;; p) TARGET_POD=${OPTARG} ;; @@ -235,8 +240,8 @@ if [ $DRY_RUN -eq 0 ]; then ISO=${SCRIPT_PATH}/ISO/image.iso fi # Start deployment - echo "python deploy.py $STORAGE_DIR $PXE_BRIDGE $USE_EXISTING_FUEL $FUEL_CREATION_ONLY $NO_HEALTH_CHECK $NO_DEPLOY_ENVIRONMENT -dea ${SCRIPT_PATH}/config/dea.yaml -dha ${SCRIPT_PATH}/config/dha.yaml -iso $ISO" - python deploy.py $STORAGE_DIR $PXE_BRIDGE $USE_EXISTING_FUEL $FUEL_CREATION_ONLY $NO_HEALTH_CHECK $NO_DEPLOY_ENVIRONMENT -dea ${SCRIPT_PATH}/config/dea.yaml -dha ${SCRIPT_PATH}/config/dha.yaml -iso $ISO + echo "python deploy.py $DEPLOY_LOG $STORAGE_DIR $PXE_BRIDGE $USE_EXISTING_FUEL $FUEL_CREATION_ONLY $NO_HEALTH_CHECK $NO_DEPLOY_ENVIRONMENT -dea ${SCRIPT_PATH}/config/dea.yaml -dha ${SCRIPT_PATH}/config/dha.yaml -iso $ISO" + python deploy.py $DEPLOY_LOG $STORAGE_DIR $PXE_BRIDGE $USE_EXISTING_FUEL $FUEL_CREATION_ONLY $NO_HEALTH_CHECK $NO_DEPLOY_ENVIRONMENT -dea ${SCRIPT_PATH}/config/dea.yaml -dha ${SCRIPT_PATH}/config/dha.yaml -iso $ISO fi popd > /dev/null diff --git a/deploy/README b/deploy/README index fd1548344..8de4920c8 100644 --- a/deploy/README +++ b/deploy/README @@ -84,41 +84,41 @@ optional arguments: -np Do not install Fuel Plugins -dt DEPLOY_TIMEOUT Deployment timeout (in minutes) [default: 240] -nde Do not launch environment deployment - + -log [LOG_FILE] Deployment log path and file name * EXAMPLES: - Install Fuel Master and deploy OPNFV Cloud from scratch on Hardware Environment: - sudo python deploy.py -iso ~/ISO/opnfv.iso -dea ~/CONF/hardware/dea.yaml -dha ~/CONF/hardware/dha.yaml -s /mnt/images -b pxebr + sudo python deploy.py -iso ~/ISO/opnfv.iso -dea ~/CONF/hardware/dea.yaml -dha ~/CONF/hardware/dha.yaml -s /mnt/images -b pxebr -log ~/Deployment-888.log.tar.gz - Install Fuel Master and deploy OPNFV Cloud from scratch on Virtual Environment: - sudo python deploy.py -iso ~/ISO/opnfv.iso -dea ~/CONF/virtual/dea.yaml -dha ~/CONF/virtual/dha.yaml -s /mnt/images + sudo python deploy.py -iso ~/ISO/opnfv.iso -dea ~/CONF/virtual/dea.yaml -dha ~/CONF/virtual/dha.yaml -s /mnt/images -log ~/Deployment-888.log.tar.gz - Deploy OPNFV Cloud on an already active Environment where Fuel Master VM is running so no need to install Fuel again: - sudo python deploy.py -nf -dea ~/CONF/virtual/dea.yaml -dha ~/CONF/virtual/dha.yaml + sudo python deploy.py -nf -dea ~/CONF/virtual/dea.yaml -dha ~/CONF/virtual/dha.yaml -log ~/Deployment-888.log.tar.gz => with plugin installation - sudo python deploy.py -nf -dea ~/CONF/virtual/dea.yaml -dha ~/CONF/virtual/dha.yaml + sudo python deploy.py -nf -dea ~/CONF/virtual/dea.yaml -dha ~/CONF/virtual/dha.yaml -log ~/Deployment-888.log.tar.gz => with cleanup after deployment is finished - sudo python deploy.py -nf -dea ~/CONF/virtual/dea.yaml -dha ~/CONF/virtual/dha.yaml -c + sudo python deploy.py -nf -dea ~/CONF/virtual/dea.yaml -dha ~/CONF/virtual/dha.yaml -c -log ~/Deployment-888.log.tar.gz => no healthcheck after deployment is completed - sudo python deploy.py -nf -dea ~/CONF/virtual/dea.yaml -dha ~/CONF/virtual/dha.yaml -nh + sudo python deploy.py -nf -dea ~/CONF/virtual/dea.yaml -dha ~/CONF/virtual/dha.yaml -nh -log ~/Deployment-888.log.tar.gz - Install Fuel Master only (and Node VMs when using virtual environment): => for virtual environment: - sudo python deploy.py -iso ~/ISO/opnfv.iso -dea ~/CONF/virtual/dea.yaml -dha ~/CONF/virtual/dha.yaml -s /mnt/images + sudo python deploy.py -iso ~/ISO/opnfv.iso -dea ~/CONF/virtual/dea.yaml -dha ~/CONF/virtual/dha.yaml -s /mnt/images -log ~/Deployment-888.log.tar.gz => for hardware environment: - sudo python deploy.py -iso ~/ISO/opnfv.iso -dea ~/CONF/hardware/dea.yaml -dha ~/CONF/hardware/dha.yaml -s /mnt/images -b pxebr + sudo python deploy.py -iso ~/ISO/opnfv.iso -dea ~/CONF/hardware/dea.yaml -dha ~/CONF/hardware/dha.yaml -s /mnt/images -b pxebr -log ~/Deployment-888.log.tar.gz - Cleanup a running OPNFV environment: diff --git a/deploy/cloud/deployment.py b/deploy/cloud/deployment.py index 0127d2a52..f8e1617f8 100644 --- a/deploy/cloud/deployment.py +++ b/deploy/cloud/deployment.py @@ -7,7 +7,6 @@ # http://www.apache.org/licenses/LICENSE-2.0 ############################################################################### - import time import re @@ -16,6 +15,8 @@ from common import ( E, exec_cmd, run_proc, + run_proc_wait_terminated, + run_proc_kill, parse, err, log, @@ -30,6 +31,7 @@ LIST_OF_CHAR_TO_BE_ESCAPED = ['[', ']', '"'] class Deployment(object): + def __init__(self, dea, yaml_config_dir, env_id, node_id_roles_dict, no_health_check, deploy_timeout): self.dea = dea @@ -41,6 +43,7 @@ class Deployment(object): self.pattern = re.compile( '\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d') + def collect_error_logs(self): for node_id, roles_blade in self.node_id_roles_dict.iteritems(): log_list = [] @@ -96,13 +99,14 @@ class Deployment(object): for log_msg in log_list: print(log_msg + '\n') + def run_deploy(self): SLEEP_TIME = 60 LOG_FILE = 'cloud.log' log('Starting deployment of environment %s' % self.env_id) - p = run_proc('fuel --env %s deploy-changes | strings > %s' - % (self.env_id, LOG_FILE)) + deploy_proc = run_proc('fuel --env %s deploy-changes | strings > %s' + % (self.env_id, LOG_FILE)) ready = False for i in range(int(self.deploy_timeout)): @@ -120,19 +124,37 @@ class Deployment(object): else: time.sleep(SLEEP_TIME) - p.poll() - if p.returncode == None: - log('The process deploying the changes has not yet finished.') - log('''The file %s won't be deleted''' % LOG_FILE) - else: - delete(LOG_FILE) + if (env[0][E['status']] <> 'operational' + and env[0][E['status']] <> 'error' + and env[0][E['status']] <> 'stopped'): + err('Deployment timed out, environment %s is not operational, snapshot will not be performed' + % self.env_id, self.collect_logs) + + run_proc_wait_terminated(deploy_proc) + delete(LOG_FILE) if ready: log('Environment %s successfully deployed' % self.env_id) else: self.collect_error_logs() err('Deployment failed, environment %s is not operational' - % self.env_id) + % self.env_id, self.collect_logs) + + + def collect_logs(self): + log('Cleaning out any previous deployment logs') + exec_cmd('rm -f /var/log/remote/fuel-snapshot-*', False) + exec_cmd('rm -f /root/deploy-*', False) + log('Generating Fuel deploy snap-shot') + if exec_cmd('fuel snapshot < /dev/null &> snapshot.log', False)[1] <> 0: + log('Could not create a Fuel snapshot') + else: + exec_cmd('mv /root/fuel-snapshot* /var/log/remote/', False) + + log('Collecting all Fuel Snapshot & deploy log files') + r, _ = exec_cmd('tar -czhf /root/deploy-%s.log.tar.gz /var/log/remote' % time.strftime("%Y%m%d-%H%M%S"), False) + log(r) + def verify_node_status(self): node_list = parse(exec_cmd('fuel node list')) @@ -145,18 +167,20 @@ class Deployment(object): summary = '' for node, status in failed_nodes: summary += '[node %s, status %s]\n' % (node, status) - err('Deployment failed: %s' % summary) + err('Deployment failed: %s' % summary, self.collect_logs) + def health_check(self): log('Now running sanity and smoke health checks') - r = exec_cmd('fuel health --env %s --check sanity,smoke --force' - % self.env_id) + r = exec_cmd('fuel health --env %s --check sanity,smoke --force' % self.env_id) log(r) if 'failure' in r: - err('Healthcheck failed!') + err('Healthcheck failed!', self.collect_logs) + def deploy(self): self.run_deploy() self.verify_node_status() if not self.no_health_check: self.health_check() + self.collect_logs() diff --git a/deploy/common.py b/deploy/common.py index 3cd3e0e6e..9c0f8abd1 100644 --- a/deploy/common.py +++ b/deploy/common.py @@ -77,6 +77,17 @@ def run_proc(cmd): return process +def run_proc_wait_terminated(process): + response = process.communicate()[0].strip() + return_code = process.returncode + return response, return_code + + +def run_proc_kill(process): + response = process.kill() + return response + + def parse(printout): parsed_list = [] lines = printout.splitlines() @@ -99,8 +110,10 @@ def clean(lines): return parsed if len(parsed_list) == 1 else parsed_list -def err(message): +def err(message, fun = None, *args): LOG.error('%s\n' % message) + if fun: + fun(*args) sys.exit(1) diff --git a/deploy/deploy.py b/deploy/deploy.py index 179ee7bcb..8064af993 100755 --- a/deploy/deploy.py +++ b/deploy/deploy.py @@ -30,6 +30,7 @@ from common import ( err, warn, check_file_exists, + check_dir_exists, create_dir_if_not_exists, delete, check_if_root, @@ -61,7 +62,7 @@ class AutoDeploy(object): def __init__(self, no_fuel, fuel_only, no_health_check, cleanup_only, cleanup, storage_dir, pxe_bridge, iso_file, dea_file, dha_file, fuel_plugins_dir, fuel_plugins_conf_dir, - no_plugins, deploy_timeout, no_deploy_environment): + no_plugins, deploy_timeout, no_deploy_environment, deploy_log): self.no_fuel = no_fuel self.fuel_only = fuel_only self.no_health_check = no_health_check @@ -77,6 +78,7 @@ class AutoDeploy(object): self.no_plugins = no_plugins self.deploy_timeout = deploy_timeout self.no_deploy_environment = no_deploy_environment + self.deploy_log = deploy_log self.dea = (DeploymentEnvironmentAdapter(dea_file) if not cleanup_only else None) self.dha = DeploymentHardwareAdapter(dha_file) @@ -202,7 +204,7 @@ class AutoDeploy(object): self.fuel_username, self.fuel_password, self.dea_file, self.fuel_plugins_conf_dir, WORK_DIR, self.no_health_check, self.deploy_timeout, - self.no_deploy_environment) + self.no_deploy_environment, self.deploy_log) return dep.deploy() def setup_execution_environment(self): @@ -332,12 +334,17 @@ def parse_arguments(): parser.add_argument('-nde', dest='no_deploy_environment', action='store_true', default=False, help=('Do not launch environment deployment')) + parser.add_argument('-log', dest='deploy_log', + action='store', default='../ci/.', + help=('Path and name of the deployment log archive')) args = parser.parse_args() log(args) check_file_exists(args.dha_file) + check_dir_exists(os.path.dirname(args.deploy_log)) + if not args.cleanup_only: check_file_exists(args.dea_file) check_fuel_plugins_dir(args.fuel_plugins_dir) @@ -350,6 +357,7 @@ def parse_arguments(): create_dir_if_not_exists(args.storage_dir) check_bridge(args.pxe_bridge, args.dha_file) + kwargs = {'no_fuel': args.no_fuel, 'fuel_only': args.fuel_only, 'no_health_check': args.no_health_check, 'cleanup_only': args.cleanup_only, 'cleanup': args.cleanup, @@ -360,7 +368,8 @@ def parse_arguments(): 'fuel_plugins_conf_dir': args.fuel_plugins_conf_dir, 'no_plugins': args.no_plugins, 'deploy_timeout': args.deploy_timeout, - 'no_deploy_environment': args.no_deploy_environment} + 'no_deploy_environment': args.no_deploy_environment, + 'deploy_log': args.deploy_log} return kwargs diff --git a/deploy/deploy_env.py b/deploy/deploy_env.py index 5eeaf11e0..93dc3959b 100644 --- a/deploy/deploy_env.py +++ b/deploy/deploy_env.py @@ -20,6 +20,7 @@ from ssh_client import SSHClient from common import ( err, log, + exec_cmd, parse, N, E, @@ -35,7 +36,7 @@ class CloudDeploy(object): def __init__(self, dea, dha, fuel_ip, fuel_username, fuel_password, dea_file, fuel_plugins_conf_dir, work_dir, no_health_check, - deploy_timeout, no_deploy_environment): + deploy_timeout, no_deploy_environment, deploy_log): self.dea = dea self.dha = dha self.fuel_ip = fuel_ip @@ -51,6 +52,7 @@ class CloudDeploy(object): self.no_health_check = no_health_check self.deploy_timeout = deploy_timeout self.no_deploy_environment = no_deploy_environment + self.deploy_log = deploy_log self.file_dir = os.path.dirname(os.path.realpath(__file__)) self.ssh = SSHClient(self.fuel_ip, self.fuel_username, self.fuel_password) @@ -256,6 +258,10 @@ class CloudDeploy(object): self.set_boot_order(['pxe', 'disk']) self.power_on_nodes() + def get_put_deploy_log(self): + with self.ssh as s: + s.scp_get("deploy-*", local=self.deploy_log) + def deploy(self): self.set_boot_order_nodes() @@ -272,4 +278,8 @@ class CloudDeploy(object): delete(self.updated_dea_file) - return self.run_cloud_deploy(CLOUD_DEPLOY_FILE) + rc = self.run_cloud_deploy(CLOUD_DEPLOY_FILE) + + self.get_put_deploy_log() + + return rc diff --git a/deploy/ssh_client.py b/deploy/ssh_client.py index df780961f..f6888d52d 100644 --- a/deploy/ssh_client.py +++ b/deploy/ssh_client.py @@ -85,14 +85,14 @@ class SSHClient(object): def scp_get(self, remote, local='.', dir=False): try: - with scp.SCPClient(self.client.get_transport()) as _scp: + with scp.SCPClient(self.client.get_transport(), sanitize=lambda x: x) as _scp: _scp.get(remote, local, dir) except Exception as e: err(e) def scp_put(self, local, remote='.', dir=False): try: - with scp.SCPClient(self.client.get_transport()) as _scp: + with scp.SCPClient(self.client.get_transport(), sanitize=lambda x: x) as _scp: _scp.put(local, remote, dir) except Exception as e: err(e) -- cgit 1.2.3-korg