diff options
-rw-r--r-- | doctor_tests/inspector/__init__.py | 8 | ||||
-rw-r--r-- | doctor_tests/inspector/sample.py | 44 | ||||
-rw-r--r-- | doctor_tests/installer/apex.py | 101 | ||||
-rw-r--r-- | doctor_tests/installer/base.py | 58 | ||||
-rw-r--r-- | doctor_tests/installer/common/restore_compute_config.py | 22 | ||||
-rw-r--r-- | doctor_tests/installer/common/set_compute_config.py | 30 | ||||
-rw-r--r-- | doctor_tests/main.py | 7 | ||||
-rw-r--r-- | doctor_tests/scenario/fault_management.py | 4 | ||||
-rw-r--r-- | doctor_tests/scenario/maintenance.py | 14 |
9 files changed, 168 insertions, 120 deletions
diff --git a/doctor_tests/inspector/__init__.py b/doctor_tests/inspector/__init__.py index 31291baf..50365a61 100644 --- a/doctor_tests/inspector/__init__.py +++ b/doctor_tests/inspector/__init__.py @@ -42,6 +42,10 @@ _inspector_name_class_mapping = { } -def get_inspector(conf, log): +def get_inspector(conf, log, transport_url=None): inspector_class = _inspector_name_class_mapping[conf.inspector.type] - return importutils.import_object(inspector_class, conf, log) + if conf.inspector.type == 'sample': + return importutils.import_object(inspector_class, conf, log, + transport_url) + else: + return importutils.import_object(inspector_class, conf, log) diff --git a/doctor_tests/inspector/sample.py b/doctor_tests/inspector/sample.py index a55a12b7..baf0306f 100644 --- a/doctor_tests/inspector/sample.py +++ b/doctor_tests/inspector/sample.py @@ -10,6 +10,7 @@ import collections from flask import Flask from flask import request import json +import oslo_messaging import time from threading import Thread import requests @@ -26,7 +27,7 @@ from doctor_tests.inspector.base import BaseInspector class SampleInspector(BaseInspector): event_type = 'compute.host.down' - def __init__(self, conf, log): + def __init__(self, conf, log, trasport_url): super(SampleInspector, self).__init__(conf, log) self.inspector_url = self.get_inspector_url() self.novaclients = list() @@ -43,6 +44,17 @@ class SampleInspector(BaseInspector): self.hostnames = list() self.app = None + try: + transport = oslo_messaging.get_notification_transport(self.conf, + trasport_url) + self.notif = oslo_messaging.Notifier(transport, + 'compute.instance.update', + driver='messaging', + topics=['notifications']) + self.notif = self.notif.prepare(publisher_id='sample') + except: + self.notif = None + def _init_novaclients(self): self.NUMBER_OF_CLIENTS = self.conf.instance_count auth = get_identity_auth(project=self.conf.doctor_project) @@ -54,7 +66,7 @@ class SampleInspector(BaseInspector): def _init_servers_list(self): self.servers.clear() opts = {'all_tenants': True} - servers = self.nova.servers.list(search_opts=opts) + servers = self.nova.servers.list(detailed=True, search_opts=opts) for server in servers: try: host = server.__dict__.get('OS-EXT-SRV-ATTR:host') @@ -97,10 +109,14 @@ class SampleInspector(BaseInspector): event_type = event['type'] if event_type == self.event_type: self.hostnames.append(hostname) + if self.notif is not None: + thr0 = self._send_notif(hostname) thr1 = self._disable_compute_host(hostname) thr2 = self._vms_reset_state('error', hostname) if self.conf.inspector.update_neutron_port_dp_status: thr3 = self._set_ports_data_plane_status('DOWN', hostname) + if self.notif is not None: + thr0.join() thr1.join() thr2.join() if self.conf.inspector.update_neutron_port_dp_status: @@ -156,8 +172,8 @@ class SampleInspector(BaseInspector): nova.servers.reset_state(server, state) vmdown_time = time.time() self.vm_down_time = vmdown_time - self.log.info('doctor mark vm(%s) error at %s' - % (server, vmdown_time)) + self.log.info('doctor mark vm(%s) %s at %s' + % (server, state, vmdown_time)) thrs = [] for nova, server in zip(self.novaclients, self.servers[hostname]): @@ -167,6 +183,26 @@ class SampleInspector(BaseInspector): t.join() @utils.run_async + def _send_notif(self, hostname): + + @utils.run_async + def _send_notif(server): + payload = dict(tenant_id=server.tenant_id, + instance_id=server.id, + state="error") + self.notif.info({'some': 'context'}, 'compute.instance.update', + payload) + self.log.info('doctor compute.instance.update vm(%s) error %s' + % (server, time.time())) + + thrs = [] + for server in self.servers[hostname]: + t = _send_notif(server) + thrs.append(t) + for t in thrs: + t.join() + + @utils.run_async def _set_ports_data_plane_status(self, status, hostname): body = {'data_plane_status': status} diff --git a/doctor_tests/installer/apex.py b/doctor_tests/installer/apex.py index 3c97378c..79c59e9a 100644 --- a/doctor_tests/installer/apex.py +++ b/doctor_tests/installer/apex.py @@ -6,7 +6,6 @@ # which accompanies this distribution, and is available at # http://www.apache.org/licenses/LICENSE-2.0 ############################################################################## -import re import time from doctor_tests.common.constants import Inspector @@ -36,8 +35,6 @@ class ApexInstaller(BaseInstaller): self.key_file = None self.controllers = list() self.computes = list() - self.controller_clients = list() - self.compute_clients = list() def setup(self): self.log.info('Setup Apex installer start......') @@ -83,26 +80,6 @@ class ApexInstaller(BaseInstaller): host_ips = self._run_cmd_remote(self.client, command) return host_ips[0] - def get_transport_url(self): - client = SSHClient(self.controllers[0], self.node_user_name, - key_filename=self.key_file) - if self.use_containers: - ncbase = "/var/lib/config-data/puppet-generated/nova" - else: - ncbase = "" - command = 'sudo grep "^transport_url" %s/etc/nova/nova.conf' % ncbase - - ret, url = client.ssh(command) - if ret: - raise Exception('Exec command to get host ip from controller(%s)' - 'in Apex installer failed, ret=%s, output=%s' - % (self.controllers[0], ret, url)) - # need to use ip instead of hostname - ret = (re.sub("@.*:", "@%s:" % self.controllers[0], - url[0].split("=", 1)[1])) - self.log.debug('get_transport_url %s' % ret) - return ret - def _set_docker_restart_cmd(self, service): # There can be multiple instances running so need to restart all cmd = "for container in `sudo docker ps | grep " @@ -114,22 +91,6 @@ class ApexInstaller(BaseInstaller): def set_apply_patches(self): self.log.info('Set apply patches start......') - if self.conf.test_case != 'fault_management': - if self.use_containers: - restart_cmd = self._set_docker_restart_cmd("nova-compute") - else: - restart_cmd = 'sudo systemctl restart' \ - ' openstack-nova-compute.service' - for node_ip in self.computes: - client = SSHClient(node_ip, self.node_user_name, - key_filename=self.key_file) - self.compute_clients.append(client) - self._run_apply_patches(client, - restart_cmd, - [self.nc_set_compute_script], - python=self.python) - time.sleep(10) - set_scripts = [self.cm_set_script] if self.use_containers: @@ -157,11 +118,28 @@ class ApexInstaller(BaseInstaller): for node_ip in self.controllers: client = SSHClient(node_ip, self.node_user_name, key_filename=self.key_file) - self.controller_clients.append(client) self._run_apply_patches(client, restart_cmd, set_scripts, python=self.python) + time.sleep(5) + + self.log.info('Set apply patches start......') + + if self.conf.test_case != 'fault_management': + if self.use_containers: + restart_cmd = self._set_docker_restart_cmd("nova") + else: + restart_cmd = 'sudo systemctl restart' \ + ' openstack-nova-compute.service' + for node_ip in self.computes: + client = SSHClient(node_ip, self.node_user_name, + key_filename=self.key_file) + self._run_apply_patches(client, + restart_cmd, + [self.nc_set_compute_script], + python=self.python) + time.sleep(5) def restore_apply_patches(self): self.log.info('restore apply patches start......') @@ -190,39 +168,22 @@ class ApexInstaller(BaseInstaller): restart_cmd += ' openstack-congress-server.service' restore_scripts.append(self.cg_restore_script) - for client, node_ip in zip(self.controller_clients, self.controllers): - retry = 0 - while retry < 2: - try: - self._run_apply_patches(client, - restart_cmd, - restore_scripts, - python=self.python) - except Exception: - if retry > 0: - raise Exception("SSHClient to %s feiled" % node_ip) - client = SSHClient(node_ip, self.node_user_name, - key_filename=self.key_file) - retry += 1 - break + for node_ip in self.controllers: + client = SSHClient(node_ip, self.node_user_name, + key_filename=self.key_file) + self._run_apply_patches(client, + restart_cmd, + restore_scripts, + python=self.python) + if self.conf.test_case != 'fault_management': if self.use_containers: restart_cmd = self._set_docker_restart_cmd("nova-compute") else: restart_cmd = 'sudo systemctl restart' \ ' openstack-nova-compute.service' - for client, node_ip in zip(self.compute_clients, self.computes): - retry = 0 - while retry < 2: - try: - self._run_apply_patches( - client, restart_cmd, - [self.nc_restore_compute_script], - python=self.python) - except Exception: - if retry > 0: - raise Exception("SSHClient to %s feiled" % node_ip) - client = SSHClient(node_ip, self.node_user_name, - key_filename=self.key_file) - retry += 1 - break + for node_ip in self.computes: + self._run_apply_patches( + client, restart_cmd, + [self.nc_restore_compute_script], + python=self.python) diff --git a/doctor_tests/installer/base.py b/doctor_tests/installer/base.py index 124b1910..df781ee1 100644 --- a/doctor_tests/installer/base.py +++ b/doctor_tests/installer/base.py @@ -11,6 +11,7 @@ import getpass import grp import os import pwd +import re import six import stat import subprocess @@ -126,6 +127,48 @@ class BaseInstaller(object): os.chmod(ssh_key, stat.S_IREAD) return ssh_key + def get_transport_url(self): + client = utils.SSHClient(self.controllers[0], self.node_user_name, + key_filename=self.key_file) + if self.use_containers: + ncbase = "/var/lib/config-data/puppet-generated/nova" + else: + ncbase = "" + try: + cmd = 'sudo grep "^transport_url" %s/etc/nova/nova.conf' % ncbase + ret, url = client.ssh(cmd) + if ret: + raise Exception('Exec command to get transport from ' + 'controller(%s) in Apex installer failed, ' + 'ret=%s, output=%s' + % (self.controllers[0], ret, url)) + else: + # need to use ip instead of hostname + ret = (re.sub("@.*:", "@%s:" % self.controllers[0], + url[0].split("=", 1)[1])) + except: + cmd = 'grep -i "^rabbit" %s/etc/nova/nova.conf' % ncbase + ret, lines = client.ssh(cmd) + if ret: + raise Exception('Exec command to get transport from ' + 'controller(%s) in Apex installer failed, ' + 'ret=%s, output=%s' + % (self.controllers[0], ret, url)) + else: + for line in lines.split('\n'): + if line.startswith("rabbit_userid"): + rabbit_userid = line.split("=") + if line.startswith("rabbit_port"): + rabbit_port = line.split("=") + if line.startswith("rabbit_password"): + rabbit_password = line.split("=") + ret = "rabbit://%s:%s@%s:%s/?ssl=0" % (rabbit_userid, + rabbit_password, + self.controllers[0], + rabbit_port) + self.log.debug('get_transport_url %s' % ret) + return ret + def _run_cmd_remote(self, client, command): self.log.info('Run command=%s in %s installer......' % (command, self.conf.installer.type)) @@ -161,14 +204,21 @@ class BaseInstaller(object): for script_name in script_names: script_abs_path = '{0}/{1}/{2}'.format(installer_dir, 'common', script_name) - client.scp(script_abs_path, script_name) - cmd = 'sudo %s %s' % (python, script_name) - ret, output = client.ssh(cmd) + try: + client.scp(script_abs_path, script_name) + except: + client.scp(script_abs_path, script_name) + try: + cmd = 'sudo %s %s' % (python, script_name) + ret, output = client.ssh(cmd) + except: + ret, output = client.ssh(cmd) + if ret: raise Exception('Do the command in remote' ' node failed, ret=%s, cmd=%s, output=%s' % (ret, cmd, output)) - if 'nova-scheduler' in restart_cmd: + if 'nova' in restart_cmd: # Make sure scheduler has proper cpu_allocation_ratio time.sleep(5) client.ssh(restart_cmd) diff --git a/doctor_tests/installer/common/restore_compute_config.py b/doctor_tests/installer/common/restore_compute_config.py index 0e9939fd..82e10a66 100644 --- a/doctor_tests/installer/common/restore_compute_config.py +++ b/doctor_tests/installer/common/restore_compute_config.py @@ -11,18 +11,16 @@ import shutil def restore_cpu_allocation_ratio(): - nova_base = "/var/lib/config-data/puppet-generated/nova" - if not os.path.isdir(nova_base): - nova_base = "" - nova_file = nova_base + '/etc/nova/nova.conf' - nova_file_bak = nova_base + '/etc/nova/nova.bak' - - if not os.path.isfile(nova_file_bak): - print('Bak_file:%s does not exist.' % nova_file_bak) - else: - print('restore: %s' % nova_file) - shutil.copyfile(nova_file_bak, nova_file) - os.remove(nova_file_bak) + for nova_file_bak in ["/var/lib/config-data/puppet-generated/nova_libvirt/etc/nova/nova.bak", # noqa + "/var/lib/config-data/puppet-generated/nova/etc/nova/nova.bak", # noqa + "/etc/nova/nova.bak"]: + if os.path.isfile(nova_file_bak): + nova_file = nova_file_bak.replace(".bak", ".conf") + print('restoring nova.bak.') + shutil.copyfile(nova_file_bak, nova_file) + os.remove(nova_file_bak) + return + print('nova.bak does not exist.') return restore_cpu_allocation_ratio() diff --git a/doctor_tests/installer/common/set_compute_config.py b/doctor_tests/installer/common/set_compute_config.py index 86266085..76ac649b 100644 --- a/doctor_tests/installer/common/set_compute_config.py +++ b/doctor_tests/installer/common/set_compute_config.py @@ -10,29 +10,17 @@ import os import shutil -def make_initial_config(service, dest): - for mk in ["", "/etc", "/%s" % service]: - dest += mk - os.mkdir(dest) - src = "/etc/%s/%s.conf" % (service, service) - dest += "/%s.conf" % service - shutil.copyfile(src, dest) - - def set_cpu_allocation_ratio(): - docker_conf_base_dir = "/var/lib/config-data/puppet-generated" - if not os.path.isdir(docker_conf_base_dir): - nova_base = "" - else: - nova_base = "%s/nova" % docker_conf_base_dir - if not os.path.isdir(nova_base): - # nova.conf to be used might not exist - make_initial_config("nova", nova_base) - nova_file = nova_base + '/etc/nova/nova.conf' - nova_file_bak = nova_base + '/etc/nova/nova.bak' + nova_file_bak = None + for nova_file in ["/var/lib/config-data/puppet-generated/nova_libvirt/etc/nova/nova.conf", # noqa + "/var/lib/config-data/puppet-generated/nova/etc/nova/nova.conf", # noqa + "/etc/nova/nova.conf"]: + if os.path.isfile(nova_file): + nova_file_bak = nova_file.replace(".conf", ".bak") + break - if not os.path.isfile(nova_file): - raise Exception("File doesn't exist: %s." % nova_file) + if nova_file_bak is None: + raise Exception("Could not find nova.conf") # TODO (tojuvone): Unfortunately ConfigParser did not produce working conf fcheck = open(nova_file) found_list = ([ca for ca in fcheck.readlines() if "cpu_allocation_ratio" diff --git a/doctor_tests/main.py b/doctor_tests/main.py index 438d8324..351d5f19 100644 --- a/doctor_tests/main.py +++ b/doctor_tests/main.py @@ -53,9 +53,10 @@ class DoctorTest(object): def test_fault_management(self): try: LOG.info('doctor fault management test starting.......') - + transport_url = self.installer.get_transport_url() self.fault_management = \ - FaultManagement(self.conf, self.installer, self.user, LOG) + FaultManagement(self.conf, self.installer, self.user, LOG, + transport_url) # prepare test env self.fault_management.setup() @@ -79,6 +80,7 @@ class DoctorTest(object): except Exception as e: LOG.error('doctor fault management test failed, ' 'Exception=%s' % e) + LOG.error(format_exc()) sys.exit(1) finally: self.fault_management.cleanup() @@ -143,6 +145,7 @@ class DoctorTest(object): % function) except Exception as e: LOG.error('doctor test failed, Exception=%s' % e) + LOG.error(format_exc()) sys.exit(1) finally: self.cleanup() diff --git a/doctor_tests/scenario/fault_management.py b/doctor_tests/scenario/fault_management.py index 869311bd..a110b88a 100644 --- a/doctor_tests/scenario/fault_management.py +++ b/doctor_tests/scenario/fault_management.py @@ -40,7 +40,7 @@ sleep 1 class FaultManagement(object): - def __init__(self, conf, installer, user, log): + def __init__(self, conf, installer, user, log, transport_url): self.conf = conf self.log = log self.user = user @@ -55,7 +55,7 @@ class FaultManagement(object): self.network = Network(self.conf, log) self.instance = Instance(self.conf, log) self.alarm = Alarm(self.conf, log) - self.inspector = get_inspector(self.conf, log) + self.inspector = get_inspector(self.conf, log, transport_url) self.monitor = get_monitor(self.conf, self.inspector.get_inspector_url(), log) diff --git a/doctor_tests/scenario/maintenance.py b/doctor_tests/scenario/maintenance.py index 09795c2a..a2129f61 100644 --- a/doctor_tests/scenario/maintenance.py +++ b/doctor_tests/scenario/maintenance.py @@ -40,7 +40,7 @@ class Maintenance(object): else: self.endpoint = 'v1/maintenance' self.app_manager = get_app_manager(self.stack, self.conf, self.log) - self.inspector = get_inspector(self.conf, self.log) + self.inspector = get_inspector(self.conf, self.log, trasport_url) def get_external_network(self): ext_net = None @@ -68,8 +68,16 @@ class Maintenance(object): raise Exception('not enough vcpus (%d) on %s' % (vcpus, hostname)) if vcpus_used > 0: - raise Exception('%d vcpus used on %s' - % (vcpus_used, hostname)) + if self.conf.test_case == 'all': + # VCPU might not yet be free after fault_management test + self.log.info('%d vcpus used on %s, retry...' + % (vcpus_used, hostname)) + time.sleep(15) + hvisor = self.nova.hypervisors.get(hvisor.id) + vcpus_used = hvisor.__getattr__('vcpus_used') + if vcpus_used > 0: + raise Exception('%d vcpus used on %s' + % (vcpus_used, hostname)) if prev_vcpus != 0 and prev_vcpus != vcpus: raise Exception('%d vcpus on %s does not match to' '%d on %s' |