summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordongwenjuan <dong.wenjuan@zte.com.cn>2017-08-14 16:58:13 +0800
committerdongwenjuan <dong.wenjuan@zte.com.cn>2017-09-04 22:30:39 +0800
commit19f7ba75850c52e1ae163766b64b6d153e8d7e1b (patch)
treef76b13d9d9092209788624609f65872ca8d0dea5
parent836a8932d6c6a502980009b9578f0c6ecf64cb47 (diff)
refactor failure inject
JIRA: DOCTOR-116 Change-Id: I14deda4ccb47414cff139a522a9196b68e92500e Signed-off-by: dongwenjuan <dong.wenjuan@zte.com.cn>
-rw-r--r--tests/alarm.py4
-rw-r--r--tests/common/__init__.py8
-rw-r--r--tests/common/constants.py12
-rw-r--r--tests/common/utils.py (renamed from tests/utils.py)7
-rw-r--r--tests/consumer/sample.py6
-rw-r--r--tests/inspector/sample.py2
-rw-r--r--tests/installer/apex.py40
-rw-r--r--tests/installer/base.py4
-rw-r--r--tests/installer/local.py18
-rw-r--r--tests/logger.py5
-rw-r--r--tests/main.py78
-rw-r--r--tests/monitor/base.py2
-rw-r--r--tests/monitor/collectd.py13
-rw-r--r--tests/monitor/sample.py23
-rw-r--r--tests/scenario/__init__.py8
-rw-r--r--tests/scenario/common.py40
-rw-r--r--tests/scenario/network_failure.py71
17 files changed, 287 insertions, 54 deletions
diff --git a/tests/alarm.py b/tests/alarm.py
index 0582085e..916f4405 100644
--- a/tests/alarm.py
+++ b/tests/alarm.py
@@ -26,9 +26,7 @@ class Alarm(object):
def __init__(self, conf, log):
self.conf = conf
self.log = log
- self.auth = get_identity_auth(username=self.conf.doctor_user,
- password=self.conf.doctor_passwd,
- project=self.conf.doctor_project)
+ self.auth = get_identity_auth(project=self.conf.doctor_project)
self.aodh = \
aodh_client(conf.aodh_version,
get_session(auth=self.auth))
diff --git a/tests/common/__init__.py b/tests/common/__init__.py
new file mode 100644
index 00000000..e68a3070
--- /dev/null
+++ b/tests/common/__init__.py
@@ -0,0 +1,8 @@
+##############################################################################
+# Copyright (c) 2017 ZTE Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+############################################################################## \ No newline at end of file
diff --git a/tests/common/constants.py b/tests/common/constants.py
new file mode 100644
index 00000000..72d037af
--- /dev/null
+++ b/tests/common/constants.py
@@ -0,0 +1,12 @@
+##############################################################################
+# Copyright (c) 2017 ZTE Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+from collections import namedtuple
+
+
+Host = namedtuple('Host', ['name', 'ip'])
diff --git a/tests/utils.py b/tests/common/utils.py
index fd8c4cd7..38fd97d8 100644
--- a/tests/utils.py
+++ b/tests/common/utils.py
@@ -51,7 +51,7 @@ class SSHClient(object):
def ssh(self, command):
if self.log:
- self.log.debug("Executing: %s" % command)
+ self.log.info("Executing: %s" % command)
stdin, stdout, stderr = self.client.exec_command(command)
ret = stdout.channel.recv_exit_status()
output = list()
@@ -59,12 +59,12 @@ class SSHClient(object):
output.append(line.decode('utf-8'))
if ret:
if self.log:
- self.log.debug("*** FAILED to run command %s (%s)" % (command, ret))
+ self.log.info("*** FAILED to run command %s (%s)" % (command, ret))
raise Exception(
"Unable to run \ncommand: %s\nret: %s"
% (command, ret))
if self.log:
- self.log.debug("*** SUCCESSFULLY run command %s" % command)
+ self.log.info("*** SUCCESSFULLY run command %s" % command)
return ret, output
def scp(self, source, dest, method='put'):
@@ -77,6 +77,7 @@ class SSHClient(object):
ftp.get(source, dest)
ftp.close()
+
def run_async(func):
from threading import Thread
from functools import wraps
diff --git a/tests/consumer/sample.py b/tests/consumer/sample.py
index a698623a..20ad9d57 100644
--- a/tests/consumer/sample.py
+++ b/tests/consumer/sample.py
@@ -55,9 +55,9 @@ class ConsumerApp(Thread):
@app.route('/failure', methods=['POST'])
def event_posted():
self.log.info('doctor consumer notified at %s' % time.time())
- self.log.info('received data = %s' % request.data)
- data = json.loads(request.data)
- return "OK"
+ self.log.info('sample consumer received data = %s' % request.data)
+ data = json.loads(request.data.decode('utf8'))
+ return 'OK'
@app.route('/shutdown', methods=['POST'])
def shutdown():
diff --git a/tests/inspector/sample.py b/tests/inspector/sample.py
index b364e825..1c05cede 100644
--- a/tests/inspector/sample.py
+++ b/tests/inspector/sample.py
@@ -14,12 +14,12 @@ import time
from threading import Thread
import requests
+from common import utils
from identity_auth import get_identity_auth
from identity_auth import get_session
from os_clients import nova_client
from os_clients import neutron_client
from inspector.base import BaseInspector
-import utils
class SampleInspector(BaseInspector):
diff --git a/tests/installer/apex.py b/tests/installer/apex.py
index e0960a5f..98eb6c9c 100644
--- a/tests/installer/apex.py
+++ b/tests/installer/apex.py
@@ -11,10 +11,11 @@ import grp
import os
import pwd
import stat
+import subprocess
import sys
+from common.utils import SSHClient
from installer.base import BaseInstaller
-from utils import SSHClient
class ApexInstaller(BaseInstaller):
@@ -30,20 +31,28 @@ class ApexInstaller(BaseInstaller):
self.key_file = None
self.controllers = list()
self.controller_clients = list()
+ self.servers = list()
def setup(self):
self.log.info('Setup Apex installer start......')
- self.key_file = self.get_ssh_key_from_installer()
+ self.get_ssh_key_from_installer()
self.get_controller_ips()
self.set_apply_patches()
+ self.setup_stunnel()
def cleanup(self):
self.restore_apply_patches()
+ for server in self.servers:
+ server.terminate()
def get_ssh_key_from_installer(self):
self.log.info('Get SSH keys from Apex installer......')
+ if self.key_file is not None:
+ self.log.info('Already have SSH keys from Apex installer......')
+ return self.key_file
+
self.client.scp('/home/stack/.ssh/id_rsa', './instack_key', method='get')
user = getpass.getuser()
uid = pwd.getpwnam(user).pw_uid
@@ -51,7 +60,8 @@ class ApexInstaller(BaseInstaller):
os.chown('./instack_key', uid, gid)
os.chmod('./instack_key', stat.S_IREAD)
current_dir = sys.path[0]
- return '{0}/{1}'.format(current_dir, 'instack_key')
+ self.key_file = '{0}/{1}'.format(current_dir, 'instack_key')
+ return self.key_file
def get_controller_ips(self):
self.log.info('Get controller ips from Apex installer......')
@@ -63,8 +73,32 @@ class ApexInstaller(BaseInstaller):
if ret:
raise Exception('Exec command to get controller ips in Apex installer failed'
'ret=%s, output=%s' % (ret, controllers))
+ self.log.info('Get controller_ips:%s from Apex installer' % controllers)
self.controllers = controllers
+ def get_host_ip_from_hostname(self, hostname):
+ self.log.info('Get host ip from host name in Apex installer......')
+
+ hostname_in_undercloud = hostname.split('.')[0]
+
+ command = "source stackrc; nova show %s | awk '/ ctlplane network /{print $5}'" % (hostname_in_undercloud)
+ ret, host_ip = self.client.ssh(command)
+ if ret:
+ raise Exception('Exec command to get host ip from hostname(%s) in Apex installer failed'
+ 'ret=%s, output=%s' % (hostname, ret, host_ip))
+ self.log.info('Get host_ip:%s from host_name:%s in Apex installer' % (host_ip, hostname))
+ return host_ip[0]
+
+ def setup_stunnel(self):
+ self.log.info('Setup ssh stunnel in controller nodes in Apex installer......')
+ for node_ip in self.controllers:
+ cmd = "sudo ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -i %s %s@%s -R %s:localhost:%s sleep 600 > ssh_tunnel.%s.log 2>&1 < /dev/null &" \
+ % (self.key_file, self.node_user_name, node_ip,
+ self.conf.consumer.port, self.conf.consumer.port, node_ip)
+ server = subprocess.Popen(cmd, shell=True)
+ self.servers.append(server)
+ server.communicate()
+
def set_apply_patches(self):
self.log.info('Set apply patches start......')
diff --git a/tests/installer/base.py b/tests/installer/base.py
index f3837f15..fa39816a 100644
--- a/tests/installer/base.py
+++ b/tests/installer/base.py
@@ -24,6 +24,10 @@ class BaseInstaller(object):
pass
@abc.abstractmethod
+ def get_host_ip_from_hostname(self, hostname):
+ pass
+
+ @abc.abstractmethod
def setup(self):
pass
diff --git a/tests/installer/local.py b/tests/installer/local.py
index abe0ba25..dcdf41e3 100644
--- a/tests/installer/local.py
+++ b/tests/installer/local.py
@@ -8,10 +8,11 @@
##############################################################################
import os
import shutil
+import subprocess
from installer.base import BaseInstaller
-from utils import load_json_file
-from utils import write_json_file
+from common.utils import load_json_file
+from common.utils import write_json_file
class LocalInstaller(BaseInstaller):
@@ -34,7 +35,18 @@ class LocalInstaller(BaseInstaller):
def get_ssh_key_from_installer(self):
self.log.info('Assuming SSH keys already exchanged with computer for local installer type')
- return
+ return None
+
+ def get_host_ip_from_hostname(self, hostname):
+ self.log.info('Get host ip from host name in local installer......')
+
+ cmd = "getent hosts %s | awk '{ print $1 }'" % (hostname)
+ server = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
+ stdout, stderr = server.communicate()
+ host_ip = stdout.strip()
+
+ self.log.info('Get host_ip:%s from host_name:%s in local installer' % (host_ip, hostname))
+ return host_ip
def set_apply_patches(self):
self._set_nova_policy()
diff --git a/tests/logger.py b/tests/logger.py
index 80d19bb9..b7a49fdb 100644
--- a/tests/logger.py
+++ b/tests/logger.py
@@ -21,8 +21,6 @@ class Logger(object):
CI_DEBUG = os.getenv('CI_DEBUG')
- filename = '%s.log' % logger_name
- logging.basicConfig(filemode='w', filename=filename)
self.logger = logging.getLogger(logger_name)
self.logger.propagate = 0
self.logger.setLevel(logging.DEBUG)
@@ -38,7 +36,8 @@ class Logger(object):
ch.setLevel(logging.INFO)
self.logger.addHandler(ch)
- file_handler = logging.FileHandler(filename)
+ filename = '%s.log' % logger_name
+ file_handler = logging.FileHandler(filename, mode='w')
file_handler.setFormatter(formatter)
file_handler.setLevel(logging.DEBUG)
self.logger.addHandler(file_handler)
diff --git a/tests/main.py b/tests/main.py
index db2fafd9..7e7c3bc2 100644
--- a/tests/main.py
+++ b/tests/main.py
@@ -8,19 +8,27 @@
##############################################################################
import os
from os.path import isfile, join
+import random
import sys
+import time
from alarm import Alarm
+from common.constants import Host
import config
from consumer import get_consumer
+from identity_auth import get_identity_auth
+from identity_auth import get_session
from image import Image
from instance import Instance
from inspector import get_inspector
from installer import get_installer
import logger as doctor_log
-from user import User
from network import Network
from monitor import get_monitor
+from os_clients import nova_client
+from scenario.common import calculate_notification_time
+from scenario.network_failure import NetworkFault
+from user import User
LOG = doctor_log.Logger('doctor').getLogger()
@@ -35,12 +43,17 @@ class DoctorTest(object):
self.network = Network(self.conf, LOG)
self.instance = Instance(self.conf, LOG)
self.alarm = Alarm(self.conf, LOG)
+ self.installer = get_installer(self.conf, LOG)
self.inspector = get_inspector(self.conf, LOG)
self.monitor = get_monitor(self.conf,
self.inspector.get_inspector_url(),
LOG)
self.consumer = get_consumer(self.conf, LOG)
- self.installer = get_installer(self.conf, LOG)
+ self.fault = NetworkFault(self.conf, self.installer, LOG)
+ auth = get_identity_auth(project=self.conf.doctor_project)
+ self.nova = nova_client(self.conf.nova_version,
+ get_session(auth=auth))
+ self.down_host = None
def setup(self):
# prepare the cloud env
@@ -63,7 +76,10 @@ class DoctorTest(object):
# starting doctor sample components...
self.inspector.start()
- self.monitor.start()
+
+ self.down_host = self.get_host_info_for_random_vm()
+ self.monitor.start(self.down_host)
+
self.consumer.start()
def run(self):
@@ -71,30 +87,76 @@ class DoctorTest(object):
try:
LOG.info('doctor test starting.......')
+ # prepare test env
self.setup()
+ # wait for aodh alarms are updated in caches for event evaluator,
+ # sleep time should be larger than event_alarm_cache_ttl(default 60)
+ time.sleep(60)
+
# injecting host failure...
# NOTE (umar) add INTERFACE_NAME logic to host injection
+ self.fault.start(self.down_host)
+ time.sleep(10)
+
# verify the test results
# NOTE (umar) copy remote monitor.log file when monitor=collectd
-
+ self.check_host_status(self.down_host.name, 'down')
+
+ notification_time = calculate_notification_time()
+ if notification_time < 1 and notification_time > 0:
+ LOG.info('doctor test successfully, notification_time=%s' % notification_time)
+ else:
+ LOG.error('doctor test failed, notification_time=%s' % notification_time)
+ sys.exit(1)
except Exception as e:
LOG.error('doctor test failed, Exception=%s' % e)
sys.exit(1)
finally:
self.cleanup()
+ def get_host_info_for_random_vm(self):
+ num = random.randint(0, self.conf.instance_count - 1)
+ vm_name = "%s%d" % (self.conf.instance_basename, num)
+
+ servers = \
+ {getattr(server, 'name'): server
+ for server in self.nova.servers.list()}
+ server = servers.get(vm_name)
+ if not server:
+ raise \
+ Exception('Can not find instance: vm_name(%s)' % vm_name)
+ host_name = server.__dict__.get('OS-EXT-SRV-ATTR:hypervisor_hostname')
+ host_ip = self.installer.get_host_ip_from_hostname(host_name)
+
+ LOG.info('Get host info(name:%s, ip:%s) which vm(%s) launched at'
+ % (host_name, host_ip, vm_name))
+ return Host(host_name, host_ip)
+
+ def check_host_status(self, hostname, state):
+ service = self.nova.services.list(host=hostname, binary='nova-compute')
+ host_state = service[0].__dict__.get('state')
+ assert host_state == state
+
+ def unset_forced_down_hosts(self):
+ if self.down_host:
+ self.nova.services.force_down(self.down_host.name, 'nova-compute', False)
+ time.sleep(2)
+ self.check_host_status(self.down_host.name, 'up')
+
def cleanup(self):
+ self.unset_forced_down_hosts()
+ self.inspector.stop()
+ self.monitor.stop()
+ self.consumer.stop()
+ self.installer.cleanup()
self.alarm.delete()
self.instance.delete()
self.network.delete()
self.image.delete()
- self.inspector.stop()
+ self.fault.cleanup()
self.user.delete()
- self.monitor.stop()
- self.consumer.stop()
- self.installer.cleanup()
def main():
diff --git a/tests/monitor/base.py b/tests/monitor/base.py
index ccb647cf..119c8a1c 100644
--- a/tests/monitor/base.py
+++ b/tests/monitor/base.py
@@ -19,7 +19,7 @@ class BaseMonitor(object):
self.inspector_url = inspector_url
@abc.abstractmethod
- def start(self):
+ def start(self, host):
pass
@abc.abstractmethod
diff --git a/tests/monitor/collectd.py b/tests/monitor/collectd.py
index f7a4f442..e2a800ea 100644
--- a/tests/monitor/collectd.py
+++ b/tests/monitor/collectd.py
@@ -12,8 +12,6 @@ import socket
import getpass
import sys
-from identity_auth import get_session
-from os_clients import nova_client
from monitor.base import BaseMonitor
@@ -21,13 +19,6 @@ class CollectdMonitor(BaseMonitor):
def __init__(self, conf, inspector_url, log):
super(CollectdMonitor, self).__init__(conf, inspector_url, log)
self.top_dir = os.path.dirname(sys.path[0])
- self.session = get_session()
- self.nova = nova_client(conf.nova_version, self.session)
- self.compute_hosts = self.nova.hypervisors.list(detailed=True)
- for host in self.compute_hosts:
- host_dict = host.__dict__
- self.compute_host = host_dict['hypervisor_hostname']
- self.compute_ip = host_dict['host_ip']
tmp_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
tmp_sock.connect(("8.8.8.8", 80))
@@ -50,8 +41,10 @@ class CollectdMonitor(BaseMonitor):
self.project_domain_id = os.environ.get('OS_PROJECT_DOMAIN_ID')
self.ssh_opts_cpu = '-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no'
- def start(self):
+ def start(self, host):
self.log.info("Collectd monitor start.........")
+ self.compute_host = host.name
+ self.compute_ip = host.ip
f = open("%s/tests/collectd.conf" % self.top_dir, 'w')
collectd_conf_file = """
Hostname %s
diff --git a/tests/monitor/sample.py b/tests/monitor/sample.py
index 1333a2ec..9ac1bccf 100644
--- a/tests/monitor/sample.py
+++ b/tests/monitor/sample.py
@@ -14,7 +14,6 @@ from threading import Thread
import time
from identity_auth import get_session
-from os_clients import nova_client
from monitor.base import BaseMonitor
@@ -24,26 +23,18 @@ class SampleMonitor(BaseMonitor):
def __init__(self, conf, inspector_url, log):
super(SampleMonitor, self).__init__(conf, inspector_url, log)
self.session = get_session()
- self.nova = nova_client(conf.nova_version, self.session)
- self.hosts = self.nova.hypervisors.list(detailed=True)
- self.pingers = []
+ self.pinger = None
- def start(self):
+ def start(self, host):
self.log.info('sample monitor start......')
- for host in self.hosts:
- host_dict = host.__dict__
- host_name = host_dict['hypervisor_hostname']
- host_ip = host_dict['host_ip']
- pinger = Pinger(host_name, host_ip, self, self.log)
- pinger.start()
- self.pingers.append(pinger)
+ self.pinger = Pinger(host.name, host.ip, self, self.log)
+ self.pinger.start()
def stop(self):
self.log.info('sample monitor stop......')
- for pinger in self.pingers:
- pinger.stop()
- pinger.join()
- del self.pingers
+ if self.pinger is not None:
+ self.pinger.stop()
+ self.pinger.join()
def report_error(self, hostname):
self.log.info('sample monitor report error......')
diff --git a/tests/scenario/__init__.py b/tests/scenario/__init__.py
new file mode 100644
index 00000000..48893ae6
--- /dev/null
+++ b/tests/scenario/__init__.py
@@ -0,0 +1,8 @@
+##############################################################################
+# Copyright (c) 2017 ZTE Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
diff --git a/tests/scenario/common.py b/tests/scenario/common.py
new file mode 100644
index 00000000..e880e8b2
--- /dev/null
+++ b/tests/scenario/common.py
@@ -0,0 +1,40 @@
+##############################################################################
+# Copyright (c) 2017 ZTE Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+import os
+import re
+import sys
+
+
+def match_rep_in_file(regex, full_path):
+ if not os.path.isfile(full_path):
+ raise Exception('File(%s) does not exist' % full_path)
+
+ with open(full_path, 'r') as file:
+ for line in file:
+ result = re.search(regex, line)
+ if result:
+ return result.group(0)
+
+ return None
+
+
+def calculate_notification_time():
+ log_file = '{0}/{1}'.format(sys.path[0], 'doctor.log')
+
+ reg = '(?<=doctor monitor detected at )\d+.\d+'
+ detected = match_rep_in_file(reg, log_file)
+ if not detected:
+ raise Exception('Can not find detected time')
+
+ reg = '(?<=doctor consumer notified at )\d+.\d+'
+ notified = match_rep_in_file(reg, log_file)
+ if not notified:
+ raise Exception('Can not find notified time')
+
+ return float(notified) - float(detected) \ No newline at end of file
diff --git a/tests/scenario/network_failure.py b/tests/scenario/network_failure.py
new file mode 100644
index 00000000..1d9027a2
--- /dev/null
+++ b/tests/scenario/network_failure.py
@@ -0,0 +1,71 @@
+##############################################################################
+# Copyright (c) 2017 ZTE Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+from identity_auth import get_session
+from os_clients import nova_client
+from common.utils import SSHClient
+
+LINK_DOWN_SCRIPT = """
+#!/bin/bash -x
+dev=$(sudo ip a | awk '/ {compute_ip}\//{{print $NF}}')
+sleep 1
+sudo ip link set $dev down
+echo "doctor set link down at" $(date "+%s.%N")
+sleep 10
+sudo ip link set $dev up
+sleep 1
+"""
+
+
+class NetworkFault(object):
+
+ def __init__(self, conf, installer, log):
+ self.conf = conf
+ self.log = log
+ self.installer = installer
+ self.nova = nova_client(self.conf.nova_version, get_session())
+ self.host = None
+ self.GetLog = False
+
+ def start(self, host):
+ self.log.info('fault inject start......')
+ self._set_link_down(host.ip)
+ self.host = host
+ self.log.info('fault inject end......')
+
+ def cleanup(self):
+ self.log.info('fault inject cleanup......')
+ self.get_diable_network_log()
+
+ def get_diable_network_log(self):
+ if not self.GetLog:
+ self.log.info('Already get the disable_netork.log from down_host......')
+ return
+ if self.host is not None:
+ client = SSHClient(self.host.ip,
+ self.installer.node_user_name,
+ key_filename=self.installer.get_ssh_key_from_installer(),
+ look_for_keys=True,
+ log=self.log)
+ client.scp('disable_network.log', './disable_network.log', method='get')
+ self.log.info('Get the disable_netork.log from down_host(host_name:%s, host_ip:%s)'
+ % (self.host.name, self.host.ip))
+ self.GetLog = True
+
+ def _set_link_down(self, compute_ip):
+ file_name = './disable_network.sh'
+ with open(file_name, 'w') as file:
+ file.write(LINK_DOWN_SCRIPT.format(compute_ip=compute_ip))
+ client = SSHClient(compute_ip,
+ self.installer.node_user_name,
+ key_filename=self.installer.get_ssh_key_from_installer(),
+ look_for_keys=True,
+ log=self.log)
+ client.scp('./disable_network.sh', 'disable_network.sh')
+ command = 'bash disable_network.sh > disable_network.log 2>&1 &'
+ client.ssh(command)