summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--doctor_tests/inspector/__init__.py8
-rw-r--r--doctor_tests/inspector/sample.py44
-rw-r--r--doctor_tests/installer/apex.py101
-rw-r--r--doctor_tests/installer/base.py58
-rw-r--r--doctor_tests/installer/common/restore_compute_config.py22
-rw-r--r--doctor_tests/installer/common/set_compute_config.py30
-rw-r--r--doctor_tests/main.py7
-rw-r--r--doctor_tests/scenario/fault_management.py4
-rw-r--r--doctor_tests/scenario/maintenance.py14
9 files changed, 168 insertions, 120 deletions
diff --git a/doctor_tests/inspector/__init__.py b/doctor_tests/inspector/__init__.py
index 31291baf..50365a61 100644
--- a/doctor_tests/inspector/__init__.py
+++ b/doctor_tests/inspector/__init__.py
@@ -42,6 +42,10 @@ _inspector_name_class_mapping = {
}
-def get_inspector(conf, log):
+def get_inspector(conf, log, transport_url=None):
inspector_class = _inspector_name_class_mapping[conf.inspector.type]
- return importutils.import_object(inspector_class, conf, log)
+ if conf.inspector.type == 'sample':
+ return importutils.import_object(inspector_class, conf, log,
+ transport_url)
+ else:
+ return importutils.import_object(inspector_class, conf, log)
diff --git a/doctor_tests/inspector/sample.py b/doctor_tests/inspector/sample.py
index a55a12b7..baf0306f 100644
--- a/doctor_tests/inspector/sample.py
+++ b/doctor_tests/inspector/sample.py
@@ -10,6 +10,7 @@ import collections
from flask import Flask
from flask import request
import json
+import oslo_messaging
import time
from threading import Thread
import requests
@@ -26,7 +27,7 @@ from doctor_tests.inspector.base import BaseInspector
class SampleInspector(BaseInspector):
event_type = 'compute.host.down'
- def __init__(self, conf, log):
+ def __init__(self, conf, log, trasport_url):
super(SampleInspector, self).__init__(conf, log)
self.inspector_url = self.get_inspector_url()
self.novaclients = list()
@@ -43,6 +44,17 @@ class SampleInspector(BaseInspector):
self.hostnames = list()
self.app = None
+ try:
+ transport = oslo_messaging.get_notification_transport(self.conf,
+ trasport_url)
+ self.notif = oslo_messaging.Notifier(transport,
+ 'compute.instance.update',
+ driver='messaging',
+ topics=['notifications'])
+ self.notif = self.notif.prepare(publisher_id='sample')
+ except:
+ self.notif = None
+
def _init_novaclients(self):
self.NUMBER_OF_CLIENTS = self.conf.instance_count
auth = get_identity_auth(project=self.conf.doctor_project)
@@ -54,7 +66,7 @@ class SampleInspector(BaseInspector):
def _init_servers_list(self):
self.servers.clear()
opts = {'all_tenants': True}
- servers = self.nova.servers.list(search_opts=opts)
+ servers = self.nova.servers.list(detailed=True, search_opts=opts)
for server in servers:
try:
host = server.__dict__.get('OS-EXT-SRV-ATTR:host')
@@ -97,10 +109,14 @@ class SampleInspector(BaseInspector):
event_type = event['type']
if event_type == self.event_type:
self.hostnames.append(hostname)
+ if self.notif is not None:
+ thr0 = self._send_notif(hostname)
thr1 = self._disable_compute_host(hostname)
thr2 = self._vms_reset_state('error', hostname)
if self.conf.inspector.update_neutron_port_dp_status:
thr3 = self._set_ports_data_plane_status('DOWN', hostname)
+ if self.notif is not None:
+ thr0.join()
thr1.join()
thr2.join()
if self.conf.inspector.update_neutron_port_dp_status:
@@ -156,8 +172,8 @@ class SampleInspector(BaseInspector):
nova.servers.reset_state(server, state)
vmdown_time = time.time()
self.vm_down_time = vmdown_time
- self.log.info('doctor mark vm(%s) error at %s'
- % (server, vmdown_time))
+ self.log.info('doctor mark vm(%s) %s at %s'
+ % (server, state, vmdown_time))
thrs = []
for nova, server in zip(self.novaclients, self.servers[hostname]):
@@ -167,6 +183,26 @@ class SampleInspector(BaseInspector):
t.join()
@utils.run_async
+ def _send_notif(self, hostname):
+
+ @utils.run_async
+ def _send_notif(server):
+ payload = dict(tenant_id=server.tenant_id,
+ instance_id=server.id,
+ state="error")
+ self.notif.info({'some': 'context'}, 'compute.instance.update',
+ payload)
+ self.log.info('doctor compute.instance.update vm(%s) error %s'
+ % (server, time.time()))
+
+ thrs = []
+ for server in self.servers[hostname]:
+ t = _send_notif(server)
+ thrs.append(t)
+ for t in thrs:
+ t.join()
+
+ @utils.run_async
def _set_ports_data_plane_status(self, status, hostname):
body = {'data_plane_status': status}
diff --git a/doctor_tests/installer/apex.py b/doctor_tests/installer/apex.py
index 3c97378c..79c59e9a 100644
--- a/doctor_tests/installer/apex.py
+++ b/doctor_tests/installer/apex.py
@@ -6,7 +6,6 @@
# which accompanies this distribution, and is available at
# http://www.apache.org/licenses/LICENSE-2.0
##############################################################################
-import re
import time
from doctor_tests.common.constants import Inspector
@@ -36,8 +35,6 @@ class ApexInstaller(BaseInstaller):
self.key_file = None
self.controllers = list()
self.computes = list()
- self.controller_clients = list()
- self.compute_clients = list()
def setup(self):
self.log.info('Setup Apex installer start......')
@@ -83,26 +80,6 @@ class ApexInstaller(BaseInstaller):
host_ips = self._run_cmd_remote(self.client, command)
return host_ips[0]
- def get_transport_url(self):
- client = SSHClient(self.controllers[0], self.node_user_name,
- key_filename=self.key_file)
- if self.use_containers:
- ncbase = "/var/lib/config-data/puppet-generated/nova"
- else:
- ncbase = ""
- command = 'sudo grep "^transport_url" %s/etc/nova/nova.conf' % ncbase
-
- ret, url = client.ssh(command)
- if ret:
- raise Exception('Exec command to get host ip from controller(%s)'
- 'in Apex installer failed, ret=%s, output=%s'
- % (self.controllers[0], ret, url))
- # need to use ip instead of hostname
- ret = (re.sub("@.*:", "@%s:" % self.controllers[0],
- url[0].split("=", 1)[1]))
- self.log.debug('get_transport_url %s' % ret)
- return ret
-
def _set_docker_restart_cmd(self, service):
# There can be multiple instances running so need to restart all
cmd = "for container in `sudo docker ps | grep "
@@ -114,22 +91,6 @@ class ApexInstaller(BaseInstaller):
def set_apply_patches(self):
self.log.info('Set apply patches start......')
- if self.conf.test_case != 'fault_management':
- if self.use_containers:
- restart_cmd = self._set_docker_restart_cmd("nova-compute")
- else:
- restart_cmd = 'sudo systemctl restart' \
- ' openstack-nova-compute.service'
- for node_ip in self.computes:
- client = SSHClient(node_ip, self.node_user_name,
- key_filename=self.key_file)
- self.compute_clients.append(client)
- self._run_apply_patches(client,
- restart_cmd,
- [self.nc_set_compute_script],
- python=self.python)
- time.sleep(10)
-
set_scripts = [self.cm_set_script]
if self.use_containers:
@@ -157,11 +118,28 @@ class ApexInstaller(BaseInstaller):
for node_ip in self.controllers:
client = SSHClient(node_ip, self.node_user_name,
key_filename=self.key_file)
- self.controller_clients.append(client)
self._run_apply_patches(client,
restart_cmd,
set_scripts,
python=self.python)
+ time.sleep(5)
+
+ self.log.info('Set apply patches start......')
+
+ if self.conf.test_case != 'fault_management':
+ if self.use_containers:
+ restart_cmd = self._set_docker_restart_cmd("nova")
+ else:
+ restart_cmd = 'sudo systemctl restart' \
+ ' openstack-nova-compute.service'
+ for node_ip in self.computes:
+ client = SSHClient(node_ip, self.node_user_name,
+ key_filename=self.key_file)
+ self._run_apply_patches(client,
+ restart_cmd,
+ [self.nc_set_compute_script],
+ python=self.python)
+ time.sleep(5)
def restore_apply_patches(self):
self.log.info('restore apply patches start......')
@@ -190,39 +168,22 @@ class ApexInstaller(BaseInstaller):
restart_cmd += ' openstack-congress-server.service'
restore_scripts.append(self.cg_restore_script)
- for client, node_ip in zip(self.controller_clients, self.controllers):
- retry = 0
- while retry < 2:
- try:
- self._run_apply_patches(client,
- restart_cmd,
- restore_scripts,
- python=self.python)
- except Exception:
- if retry > 0:
- raise Exception("SSHClient to %s feiled" % node_ip)
- client = SSHClient(node_ip, self.node_user_name,
- key_filename=self.key_file)
- retry += 1
- break
+ for node_ip in self.controllers:
+ client = SSHClient(node_ip, self.node_user_name,
+ key_filename=self.key_file)
+ self._run_apply_patches(client,
+ restart_cmd,
+ restore_scripts,
+ python=self.python)
+
if self.conf.test_case != 'fault_management':
if self.use_containers:
restart_cmd = self._set_docker_restart_cmd("nova-compute")
else:
restart_cmd = 'sudo systemctl restart' \
' openstack-nova-compute.service'
- for client, node_ip in zip(self.compute_clients, self.computes):
- retry = 0
- while retry < 2:
- try:
- self._run_apply_patches(
- client, restart_cmd,
- [self.nc_restore_compute_script],
- python=self.python)
- except Exception:
- if retry > 0:
- raise Exception("SSHClient to %s feiled" % node_ip)
- client = SSHClient(node_ip, self.node_user_name,
- key_filename=self.key_file)
- retry += 1
- break
+ for node_ip in self.computes:
+ self._run_apply_patches(
+ client, restart_cmd,
+ [self.nc_restore_compute_script],
+ python=self.python)
diff --git a/doctor_tests/installer/base.py b/doctor_tests/installer/base.py
index 124b1910..df781ee1 100644
--- a/doctor_tests/installer/base.py
+++ b/doctor_tests/installer/base.py
@@ -11,6 +11,7 @@ import getpass
import grp
import os
import pwd
+import re
import six
import stat
import subprocess
@@ -126,6 +127,48 @@ class BaseInstaller(object):
os.chmod(ssh_key, stat.S_IREAD)
return ssh_key
+ def get_transport_url(self):
+ client = utils.SSHClient(self.controllers[0], self.node_user_name,
+ key_filename=self.key_file)
+ if self.use_containers:
+ ncbase = "/var/lib/config-data/puppet-generated/nova"
+ else:
+ ncbase = ""
+ try:
+ cmd = 'sudo grep "^transport_url" %s/etc/nova/nova.conf' % ncbase
+ ret, url = client.ssh(cmd)
+ if ret:
+ raise Exception('Exec command to get transport from '
+ 'controller(%s) in Apex installer failed, '
+ 'ret=%s, output=%s'
+ % (self.controllers[0], ret, url))
+ else:
+ # need to use ip instead of hostname
+ ret = (re.sub("@.*:", "@%s:" % self.controllers[0],
+ url[0].split("=", 1)[1]))
+ except:
+ cmd = 'grep -i "^rabbit" %s/etc/nova/nova.conf' % ncbase
+ ret, lines = client.ssh(cmd)
+ if ret:
+ raise Exception('Exec command to get transport from '
+ 'controller(%s) in Apex installer failed, '
+ 'ret=%s, output=%s'
+ % (self.controllers[0], ret, url))
+ else:
+ for line in lines.split('\n'):
+ if line.startswith("rabbit_userid"):
+ rabbit_userid = line.split("=")
+ if line.startswith("rabbit_port"):
+ rabbit_port = line.split("=")
+ if line.startswith("rabbit_password"):
+ rabbit_password = line.split("=")
+ ret = "rabbit://%s:%s@%s:%s/?ssl=0" % (rabbit_userid,
+ rabbit_password,
+ self.controllers[0],
+ rabbit_port)
+ self.log.debug('get_transport_url %s' % ret)
+ return ret
+
def _run_cmd_remote(self, client, command):
self.log.info('Run command=%s in %s installer......'
% (command, self.conf.installer.type))
@@ -161,14 +204,21 @@ class BaseInstaller(object):
for script_name in script_names:
script_abs_path = '{0}/{1}/{2}'.format(installer_dir,
'common', script_name)
- client.scp(script_abs_path, script_name)
- cmd = 'sudo %s %s' % (python, script_name)
- ret, output = client.ssh(cmd)
+ try:
+ client.scp(script_abs_path, script_name)
+ except:
+ client.scp(script_abs_path, script_name)
+ try:
+ cmd = 'sudo %s %s' % (python, script_name)
+ ret, output = client.ssh(cmd)
+ except:
+ ret, output = client.ssh(cmd)
+
if ret:
raise Exception('Do the command in remote'
' node failed, ret=%s, cmd=%s, output=%s'
% (ret, cmd, output))
- if 'nova-scheduler' in restart_cmd:
+ if 'nova' in restart_cmd:
# Make sure scheduler has proper cpu_allocation_ratio
time.sleep(5)
client.ssh(restart_cmd)
diff --git a/doctor_tests/installer/common/restore_compute_config.py b/doctor_tests/installer/common/restore_compute_config.py
index 0e9939fd..82e10a66 100644
--- a/doctor_tests/installer/common/restore_compute_config.py
+++ b/doctor_tests/installer/common/restore_compute_config.py
@@ -11,18 +11,16 @@ import shutil
def restore_cpu_allocation_ratio():
- nova_base = "/var/lib/config-data/puppet-generated/nova"
- if not os.path.isdir(nova_base):
- nova_base = ""
- nova_file = nova_base + '/etc/nova/nova.conf'
- nova_file_bak = nova_base + '/etc/nova/nova.bak'
-
- if not os.path.isfile(nova_file_bak):
- print('Bak_file:%s does not exist.' % nova_file_bak)
- else:
- print('restore: %s' % nova_file)
- shutil.copyfile(nova_file_bak, nova_file)
- os.remove(nova_file_bak)
+ for nova_file_bak in ["/var/lib/config-data/puppet-generated/nova_libvirt/etc/nova/nova.bak", # noqa
+ "/var/lib/config-data/puppet-generated/nova/etc/nova/nova.bak", # noqa
+ "/etc/nova/nova.bak"]:
+ if os.path.isfile(nova_file_bak):
+ nova_file = nova_file_bak.replace(".bak", ".conf")
+ print('restoring nova.bak.')
+ shutil.copyfile(nova_file_bak, nova_file)
+ os.remove(nova_file_bak)
+ return
+ print('nova.bak does not exist.')
return
restore_cpu_allocation_ratio()
diff --git a/doctor_tests/installer/common/set_compute_config.py b/doctor_tests/installer/common/set_compute_config.py
index 86266085..76ac649b 100644
--- a/doctor_tests/installer/common/set_compute_config.py
+++ b/doctor_tests/installer/common/set_compute_config.py
@@ -10,29 +10,17 @@ import os
import shutil
-def make_initial_config(service, dest):
- for mk in ["", "/etc", "/%s" % service]:
- dest += mk
- os.mkdir(dest)
- src = "/etc/%s/%s.conf" % (service, service)
- dest += "/%s.conf" % service
- shutil.copyfile(src, dest)
-
-
def set_cpu_allocation_ratio():
- docker_conf_base_dir = "/var/lib/config-data/puppet-generated"
- if not os.path.isdir(docker_conf_base_dir):
- nova_base = ""
- else:
- nova_base = "%s/nova" % docker_conf_base_dir
- if not os.path.isdir(nova_base):
- # nova.conf to be used might not exist
- make_initial_config("nova", nova_base)
- nova_file = nova_base + '/etc/nova/nova.conf'
- nova_file_bak = nova_base + '/etc/nova/nova.bak'
+ nova_file_bak = None
+ for nova_file in ["/var/lib/config-data/puppet-generated/nova_libvirt/etc/nova/nova.conf", # noqa
+ "/var/lib/config-data/puppet-generated/nova/etc/nova/nova.conf", # noqa
+ "/etc/nova/nova.conf"]:
+ if os.path.isfile(nova_file):
+ nova_file_bak = nova_file.replace(".conf", ".bak")
+ break
- if not os.path.isfile(nova_file):
- raise Exception("File doesn't exist: %s." % nova_file)
+ if nova_file_bak is None:
+ raise Exception("Could not find nova.conf")
# TODO (tojuvone): Unfortunately ConfigParser did not produce working conf
fcheck = open(nova_file)
found_list = ([ca for ca in fcheck.readlines() if "cpu_allocation_ratio"
diff --git a/doctor_tests/main.py b/doctor_tests/main.py
index 438d8324..351d5f19 100644
--- a/doctor_tests/main.py
+++ b/doctor_tests/main.py
@@ -53,9 +53,10 @@ class DoctorTest(object):
def test_fault_management(self):
try:
LOG.info('doctor fault management test starting.......')
-
+ transport_url = self.installer.get_transport_url()
self.fault_management = \
- FaultManagement(self.conf, self.installer, self.user, LOG)
+ FaultManagement(self.conf, self.installer, self.user, LOG,
+ transport_url)
# prepare test env
self.fault_management.setup()
@@ -79,6 +80,7 @@ class DoctorTest(object):
except Exception as e:
LOG.error('doctor fault management test failed, '
'Exception=%s' % e)
+ LOG.error(format_exc())
sys.exit(1)
finally:
self.fault_management.cleanup()
@@ -143,6 +145,7 @@ class DoctorTest(object):
% function)
except Exception as e:
LOG.error('doctor test failed, Exception=%s' % e)
+ LOG.error(format_exc())
sys.exit(1)
finally:
self.cleanup()
diff --git a/doctor_tests/scenario/fault_management.py b/doctor_tests/scenario/fault_management.py
index 869311bd..a110b88a 100644
--- a/doctor_tests/scenario/fault_management.py
+++ b/doctor_tests/scenario/fault_management.py
@@ -40,7 +40,7 @@ sleep 1
class FaultManagement(object):
- def __init__(self, conf, installer, user, log):
+ def __init__(self, conf, installer, user, log, transport_url):
self.conf = conf
self.log = log
self.user = user
@@ -55,7 +55,7 @@ class FaultManagement(object):
self.network = Network(self.conf, log)
self.instance = Instance(self.conf, log)
self.alarm = Alarm(self.conf, log)
- self.inspector = get_inspector(self.conf, log)
+ self.inspector = get_inspector(self.conf, log, transport_url)
self.monitor = get_monitor(self.conf,
self.inspector.get_inspector_url(),
log)
diff --git a/doctor_tests/scenario/maintenance.py b/doctor_tests/scenario/maintenance.py
index 09795c2a..a2129f61 100644
--- a/doctor_tests/scenario/maintenance.py
+++ b/doctor_tests/scenario/maintenance.py
@@ -40,7 +40,7 @@ class Maintenance(object):
else:
self.endpoint = 'v1/maintenance'
self.app_manager = get_app_manager(self.stack, self.conf, self.log)
- self.inspector = get_inspector(self.conf, self.log)
+ self.inspector = get_inspector(self.conf, self.log, trasport_url)
def get_external_network(self):
ext_net = None
@@ -68,8 +68,16 @@ class Maintenance(object):
raise Exception('not enough vcpus (%d) on %s' %
(vcpus, hostname))
if vcpus_used > 0:
- raise Exception('%d vcpus used on %s'
- % (vcpus_used, hostname))
+ if self.conf.test_case == 'all':
+ # VCPU might not yet be free after fault_management test
+ self.log.info('%d vcpus used on %s, retry...'
+ % (vcpus_used, hostname))
+ time.sleep(15)
+ hvisor = self.nova.hypervisors.get(hvisor.id)
+ vcpus_used = hvisor.__getattr__('vcpus_used')
+ if vcpus_used > 0:
+ raise Exception('%d vcpus used on %s'
+ % (vcpus_used, hostname))
if prev_vcpus != 0 and prev_vcpus != vcpus:
raise Exception('%d vcpus on %s does not match to'
'%d on %s'