From e6708c869855ab69f9b53959befd82bb2f32f9ad Mon Sep 17 00:00:00 2001 From: Tomi Juvonen Date: Wed, 28 Nov 2018 11:48:27 +0200 Subject: Bug - Testing in Apex with OpenStack master fails Support yet another path to find config files. Tune config changes to take effect properly for maintenance. transport_url parcing enhanced. Nova reset state to error takes well over 1 second these days and at the end it then sends notification that we use. Only reasonable thing is to send notification straight from the Inspector as it should have been done in the first place. Now we can do 200ms as total time, with just a few millisends actuallly spent on sending the notification. Further one could improve this by having node specific Inspector agent to react even in more Telco grade speed. Change-Id: I787f8e9dd6484842c6c568b15767018d11b36862 Signed-off-by: Tomi Juvonen --- doctor_tests/installer/apex.py | 101 +++++++-------------- doctor_tests/installer/base.py | 58 +++++++++++- .../installer/common/restore_compute_config.py | 22 ++--- .../installer/common/set_compute_config.py | 30 ++---- 4 files changed, 104 insertions(+), 107 deletions(-) (limited to 'doctor_tests/installer') diff --git a/doctor_tests/installer/apex.py b/doctor_tests/installer/apex.py index 3c97378c..79c59e9a 100644 --- a/doctor_tests/installer/apex.py +++ b/doctor_tests/installer/apex.py @@ -6,7 +6,6 @@ # which accompanies this distribution, and is available at # http://www.apache.org/licenses/LICENSE-2.0 ############################################################################## -import re import time from doctor_tests.common.constants import Inspector @@ -36,8 +35,6 @@ class ApexInstaller(BaseInstaller): self.key_file = None self.controllers = list() self.computes = list() - self.controller_clients = list() - self.compute_clients = list() def setup(self): self.log.info('Setup Apex installer start......') @@ -83,26 +80,6 @@ class ApexInstaller(BaseInstaller): host_ips = self._run_cmd_remote(self.client, command) return host_ips[0] - def get_transport_url(self): - client = SSHClient(self.controllers[0], self.node_user_name, - key_filename=self.key_file) - if self.use_containers: - ncbase = "/var/lib/config-data/puppet-generated/nova" - else: - ncbase = "" - command = 'sudo grep "^transport_url" %s/etc/nova/nova.conf' % ncbase - - ret, url = client.ssh(command) - if ret: - raise Exception('Exec command to get host ip from controller(%s)' - 'in Apex installer failed, ret=%s, output=%s' - % (self.controllers[0], ret, url)) - # need to use ip instead of hostname - ret = (re.sub("@.*:", "@%s:" % self.controllers[0], - url[0].split("=", 1)[1])) - self.log.debug('get_transport_url %s' % ret) - return ret - def _set_docker_restart_cmd(self, service): # There can be multiple instances running so need to restart all cmd = "for container in `sudo docker ps | grep " @@ -114,22 +91,6 @@ class ApexInstaller(BaseInstaller): def set_apply_patches(self): self.log.info('Set apply patches start......') - if self.conf.test_case != 'fault_management': - if self.use_containers: - restart_cmd = self._set_docker_restart_cmd("nova-compute") - else: - restart_cmd = 'sudo systemctl restart' \ - ' openstack-nova-compute.service' - for node_ip in self.computes: - client = SSHClient(node_ip, self.node_user_name, - key_filename=self.key_file) - self.compute_clients.append(client) - self._run_apply_patches(client, - restart_cmd, - [self.nc_set_compute_script], - python=self.python) - time.sleep(10) - set_scripts = [self.cm_set_script] if self.use_containers: @@ -157,11 +118,28 @@ class ApexInstaller(BaseInstaller): for node_ip in self.controllers: client = SSHClient(node_ip, self.node_user_name, key_filename=self.key_file) - self.controller_clients.append(client) self._run_apply_patches(client, restart_cmd, set_scripts, python=self.python) + time.sleep(5) + + self.log.info('Set apply patches start......') + + if self.conf.test_case != 'fault_management': + if self.use_containers: + restart_cmd = self._set_docker_restart_cmd("nova") + else: + restart_cmd = 'sudo systemctl restart' \ + ' openstack-nova-compute.service' + for node_ip in self.computes: + client = SSHClient(node_ip, self.node_user_name, + key_filename=self.key_file) + self._run_apply_patches(client, + restart_cmd, + [self.nc_set_compute_script], + python=self.python) + time.sleep(5) def restore_apply_patches(self): self.log.info('restore apply patches start......') @@ -190,39 +168,22 @@ class ApexInstaller(BaseInstaller): restart_cmd += ' openstack-congress-server.service' restore_scripts.append(self.cg_restore_script) - for client, node_ip in zip(self.controller_clients, self.controllers): - retry = 0 - while retry < 2: - try: - self._run_apply_patches(client, - restart_cmd, - restore_scripts, - python=self.python) - except Exception: - if retry > 0: - raise Exception("SSHClient to %s feiled" % node_ip) - client = SSHClient(node_ip, self.node_user_name, - key_filename=self.key_file) - retry += 1 - break + for node_ip in self.controllers: + client = SSHClient(node_ip, self.node_user_name, + key_filename=self.key_file) + self._run_apply_patches(client, + restart_cmd, + restore_scripts, + python=self.python) + if self.conf.test_case != 'fault_management': if self.use_containers: restart_cmd = self._set_docker_restart_cmd("nova-compute") else: restart_cmd = 'sudo systemctl restart' \ ' openstack-nova-compute.service' - for client, node_ip in zip(self.compute_clients, self.computes): - retry = 0 - while retry < 2: - try: - self._run_apply_patches( - client, restart_cmd, - [self.nc_restore_compute_script], - python=self.python) - except Exception: - if retry > 0: - raise Exception("SSHClient to %s feiled" % node_ip) - client = SSHClient(node_ip, self.node_user_name, - key_filename=self.key_file) - retry += 1 - break + for node_ip in self.computes: + self._run_apply_patches( + client, restart_cmd, + [self.nc_restore_compute_script], + python=self.python) diff --git a/doctor_tests/installer/base.py b/doctor_tests/installer/base.py index 124b1910..df781ee1 100644 --- a/doctor_tests/installer/base.py +++ b/doctor_tests/installer/base.py @@ -11,6 +11,7 @@ import getpass import grp import os import pwd +import re import six import stat import subprocess @@ -126,6 +127,48 @@ class BaseInstaller(object): os.chmod(ssh_key, stat.S_IREAD) return ssh_key + def get_transport_url(self): + client = utils.SSHClient(self.controllers[0], self.node_user_name, + key_filename=self.key_file) + if self.use_containers: + ncbase = "/var/lib/config-data/puppet-generated/nova" + else: + ncbase = "" + try: + cmd = 'sudo grep "^transport_url" %s/etc/nova/nova.conf' % ncbase + ret, url = client.ssh(cmd) + if ret: + raise Exception('Exec command to get transport from ' + 'controller(%s) in Apex installer failed, ' + 'ret=%s, output=%s' + % (self.controllers[0], ret, url)) + else: + # need to use ip instead of hostname + ret = (re.sub("@.*:", "@%s:" % self.controllers[0], + url[0].split("=", 1)[1])) + except: + cmd = 'grep -i "^rabbit" %s/etc/nova/nova.conf' % ncbase + ret, lines = client.ssh(cmd) + if ret: + raise Exception('Exec command to get transport from ' + 'controller(%s) in Apex installer failed, ' + 'ret=%s, output=%s' + % (self.controllers[0], ret, url)) + else: + for line in lines.split('\n'): + if line.startswith("rabbit_userid"): + rabbit_userid = line.split("=") + if line.startswith("rabbit_port"): + rabbit_port = line.split("=") + if line.startswith("rabbit_password"): + rabbit_password = line.split("=") + ret = "rabbit://%s:%s@%s:%s/?ssl=0" % (rabbit_userid, + rabbit_password, + self.controllers[0], + rabbit_port) + self.log.debug('get_transport_url %s' % ret) + return ret + def _run_cmd_remote(self, client, command): self.log.info('Run command=%s in %s installer......' % (command, self.conf.installer.type)) @@ -161,14 +204,21 @@ class BaseInstaller(object): for script_name in script_names: script_abs_path = '{0}/{1}/{2}'.format(installer_dir, 'common', script_name) - client.scp(script_abs_path, script_name) - cmd = 'sudo %s %s' % (python, script_name) - ret, output = client.ssh(cmd) + try: + client.scp(script_abs_path, script_name) + except: + client.scp(script_abs_path, script_name) + try: + cmd = 'sudo %s %s' % (python, script_name) + ret, output = client.ssh(cmd) + except: + ret, output = client.ssh(cmd) + if ret: raise Exception('Do the command in remote' ' node failed, ret=%s, cmd=%s, output=%s' % (ret, cmd, output)) - if 'nova-scheduler' in restart_cmd: + if 'nova' in restart_cmd: # Make sure scheduler has proper cpu_allocation_ratio time.sleep(5) client.ssh(restart_cmd) diff --git a/doctor_tests/installer/common/restore_compute_config.py b/doctor_tests/installer/common/restore_compute_config.py index 0e9939fd..82e10a66 100644 --- a/doctor_tests/installer/common/restore_compute_config.py +++ b/doctor_tests/installer/common/restore_compute_config.py @@ -11,18 +11,16 @@ import shutil def restore_cpu_allocation_ratio(): - nova_base = "/var/lib/config-data/puppet-generated/nova" - if not os.path.isdir(nova_base): - nova_base = "" - nova_file = nova_base + '/etc/nova/nova.conf' - nova_file_bak = nova_base + '/etc/nova/nova.bak' - - if not os.path.isfile(nova_file_bak): - print('Bak_file:%s does not exist.' % nova_file_bak) - else: - print('restore: %s' % nova_file) - shutil.copyfile(nova_file_bak, nova_file) - os.remove(nova_file_bak) + for nova_file_bak in ["/var/lib/config-data/puppet-generated/nova_libvirt/etc/nova/nova.bak", # noqa + "/var/lib/config-data/puppet-generated/nova/etc/nova/nova.bak", # noqa + "/etc/nova/nova.bak"]: + if os.path.isfile(nova_file_bak): + nova_file = nova_file_bak.replace(".bak", ".conf") + print('restoring nova.bak.') + shutil.copyfile(nova_file_bak, nova_file) + os.remove(nova_file_bak) + return + print('nova.bak does not exist.') return restore_cpu_allocation_ratio() diff --git a/doctor_tests/installer/common/set_compute_config.py b/doctor_tests/installer/common/set_compute_config.py index 86266085..76ac649b 100644 --- a/doctor_tests/installer/common/set_compute_config.py +++ b/doctor_tests/installer/common/set_compute_config.py @@ -10,29 +10,17 @@ import os import shutil -def make_initial_config(service, dest): - for mk in ["", "/etc", "/%s" % service]: - dest += mk - os.mkdir(dest) - src = "/etc/%s/%s.conf" % (service, service) - dest += "/%s.conf" % service - shutil.copyfile(src, dest) - - def set_cpu_allocation_ratio(): - docker_conf_base_dir = "/var/lib/config-data/puppet-generated" - if not os.path.isdir(docker_conf_base_dir): - nova_base = "" - else: - nova_base = "%s/nova" % docker_conf_base_dir - if not os.path.isdir(nova_base): - # nova.conf to be used might not exist - make_initial_config("nova", nova_base) - nova_file = nova_base + '/etc/nova/nova.conf' - nova_file_bak = nova_base + '/etc/nova/nova.bak' + nova_file_bak = None + for nova_file in ["/var/lib/config-data/puppet-generated/nova_libvirt/etc/nova/nova.conf", # noqa + "/var/lib/config-data/puppet-generated/nova/etc/nova/nova.conf", # noqa + "/etc/nova/nova.conf"]: + if os.path.isfile(nova_file): + nova_file_bak = nova_file.replace(".conf", ".bak") + break - if not os.path.isfile(nova_file): - raise Exception("File doesn't exist: %s." % nova_file) + if nova_file_bak is None: + raise Exception("Could not find nova.conf") # TODO (tojuvone): Unfortunately ConfigParser did not produce working conf fcheck = open(nova_file) found_list = ([ca for ca in fcheck.readlines() if "cpu_allocation_ratio" -- cgit 1.2.3-korg