summaryrefslogtreecommitdiffstats
path: root/doctor_tests/main.py
diff options
context:
space:
mode:
Diffstat (limited to 'doctor_tests/main.py')
-rw-r--r--doctor_tests/main.py215
1 files changed, 215 insertions, 0 deletions
diff --git a/doctor_tests/main.py b/doctor_tests/main.py
new file mode 100644
index 00000000..006aac9f
--- /dev/null
+++ b/doctor_tests/main.py
@@ -0,0 +1,215 @@
+##############################################################################
+# Copyright (c) 2017 ZTE Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+import os
+from os.path import isfile, join
+import random
+import sys
+import time
+
+from doctor_tests.alarm import Alarm
+from doctor_tests.common.constants import Host
+from doctor_tests.common.utils import match_rep_in_file
+from doctor_tests import config
+from doctor_tests.consumer import get_consumer
+from doctor_tests.identity_auth import get_identity_auth
+from doctor_tests.identity_auth import get_session
+from doctor_tests.image import Image
+from doctor_tests.instance import Instance
+from doctor_tests.inspector import get_inspector
+from doctor_tests.installer import get_installer
+import doctor_tests.logger as doctor_log
+from doctor_tests.network import Network
+from doctor_tests.monitor import get_monitor
+from doctor_tests.os_clients import nova_client
+from doctor_tests.profiler_poc import main as profiler_main
+from doctor_tests.scenario.common import calculate_notification_time
+from doctor_tests.scenario.network_failure import NetworkFault
+from doctor_tests.user import User
+
+
+LOG = doctor_log.Logger('doctor').getLogger()
+
+
+class DoctorTest(object):
+
+ def __init__(self, conf):
+ self.conf = conf
+ self.image = Image(self.conf, LOG)
+ self.user = User(self.conf, LOG)
+ self.network = Network(self.conf, LOG)
+ self.instance = Instance(self.conf, LOG)
+ self.alarm = Alarm(self.conf, LOG)
+ self.installer = get_installer(self.conf, LOG)
+ self.inspector = get_inspector(self.conf, LOG)
+ self.monitor = get_monitor(self.conf,
+ self.inspector.get_inspector_url(),
+ LOG)
+ self.consumer = get_consumer(self.conf, LOG)
+ self.fault = NetworkFault(self.conf, self.installer, LOG)
+ auth = get_identity_auth(project=self.conf.doctor_project)
+ self.nova = nova_client(self.conf.nova_version,
+ get_session(auth=auth))
+ self.down_host = None
+
+ def setup(self):
+ # prepare the cloud env
+ self.installer.setup()
+
+ # preparing VM image...
+ self.image.create()
+
+ # creating test user...
+ self.user.create()
+ self.user.update_quota()
+
+ # creating VM...
+ self.network.create()
+ self.instance.create()
+ self.instance.wait_for_vm_launch()
+
+ # creating alarm...
+ self.alarm.create()
+
+ # starting doctor sample components...
+ self.inspector.start()
+
+ self.down_host = self.get_host_info_for_random_vm()
+ self.monitor.start(self.down_host)
+
+ self.consumer.start()
+
+ def run(self):
+ """run doctor test"""
+ try:
+ LOG.info('doctor test starting.......')
+
+ # prepare test env
+ self.setup()
+
+ # wait for aodh alarms are updated in caches for event evaluator,
+ # sleep time should be larger than event_alarm_cache_ttl(default 60)
+ time.sleep(60)
+
+ # injecting host failure...
+ # NOTE (umar) add INTERFACE_NAME logic to host injection
+
+ self.fault.start(self.down_host)
+ time.sleep(10)
+
+ # verify the test results
+ # NOTE (umar) copy remote monitor.log file when monitor=collectd
+ self.check_host_status(self.down_host.name, 'down')
+
+ notification_time = calculate_notification_time()
+ if notification_time < 1 and notification_time > 0:
+ LOG.info('doctor test successfully, notification_time=%s' % notification_time)
+ else:
+ LOG.error('doctor test failed, notification_time=%s' % notification_time)
+ sys.exit(1)
+
+ if self.conf.profiler_type:
+ LOG.info('doctor test begin to run profile.......')
+ self.collect_logs()
+ self.run_profiler()
+ except Exception as e:
+ LOG.error('doctor test failed, Exception=%s' % e)
+ sys.exit(1)
+ finally:
+ self.cleanup()
+
+ def get_host_info_for_random_vm(self):
+ num = random.randint(0, self.conf.instance_count - 1)
+ vm_name = "%s%d" % (self.conf.instance_basename, num)
+
+ servers = \
+ {getattr(server, 'name'): server
+ for server in self.nova.servers.list()}
+ server = servers.get(vm_name)
+ if not server:
+ raise \
+ Exception('Can not find instance: vm_name(%s)' % vm_name)
+ host_name = server.__dict__.get('OS-EXT-SRV-ATTR:hypervisor_hostname')
+ host_ip = self.installer.get_host_ip_from_hostname(host_name)
+
+ LOG.info('Get host info(name:%s, ip:%s) which vm(%s) launched at'
+ % (host_name, host_ip, vm_name))
+ return Host(host_name, host_ip)
+
+ def check_host_status(self, hostname, state):
+ service = self.nova.services.list(host=hostname, binary='nova-compute')
+ host_state = service[0].__dict__.get('state')
+ assert host_state == state
+
+ def unset_forced_down_hosts(self):
+ if self.down_host:
+ self.nova.services.force_down(self.down_host.name, 'nova-compute', False)
+ time.sleep(2)
+ self.check_host_status(self.down_host.name, 'up')
+
+ def collect_logs(self):
+ self.fault.get_disable_network_log()
+
+ def run_profiler(self):
+
+ log_file = '{0}/{1}'.format(sys.path[0], 'disable_network.log')
+ reg = '(?<=doctor set link down at )\d+.\d+'
+ linkdown = float(match_rep_in_file(reg, log_file).group(0))
+
+ log_file = '{0}/{1}'.format(sys.path[0], 'doctor.log')
+ reg = '(.* doctor mark vm.* error at )(\d+.\d+)'
+ vmdown = float(match_rep_in_file(reg, log_file).group(2))
+
+ reg = '(?<=doctor mark host.* down at )\d+.\d+'
+ hostdown = float(match_rep_in_file(reg, log_file).group(2))
+
+ reg = '(?<=doctor monitor detected at )\d+.\d+'
+ detected = float(match_rep_in_file(reg, log_file).group(0))
+
+ reg = '(?<=doctor consumer notified at )\d+.\d+'
+ notified = float(match_rep_in_file(reg, log_file).group(0))
+
+ # TODO(yujunz) check the actual delay to verify time sync status
+ # expected ~1s delay from $trigger to $linkdown
+ relative_start = linkdown
+ os.environ['DOCTOR_PROFILER_T00'] = str(int((linkdown - relative_start)*1000))
+ os.environ['DOCTOR_PROFILER_T01'] = str(int((detected - relative_start) * 1000))
+ os.environ['DOCTOR_PROFILER_T03'] = str(int((vmdown - relative_start) * 1000))
+ os.environ['DOCTOR_PROFILER_T04'] = str(int((hostdown - relative_start) * 1000))
+ os.environ['DOCTOR_PROFILER_T09'] = str(int((notified - relative_start) * 1000))
+
+ profiler_main(log=LOG)
+
+ def cleanup(self):
+ self.unset_forced_down_hosts()
+ self.inspector.stop()
+ self.monitor.stop()
+ self.consumer.stop()
+ self.installer.cleanup()
+ self.alarm.delete()
+ self.instance.delete()
+ self.network.delete()
+ self.image.delete()
+ self.fault.cleanup()
+ self.user.delete()
+
+
+def main():
+ """doctor main"""
+ test_dir = os.path.split(os.path.realpath(__file__))[0]
+ doctor_root_dir = os.path.dirname(test_dir)
+
+ config_file_dir = '{0}/{1}'.format(doctor_root_dir, 'etc/')
+ config_files = [join(config_file_dir, f) for f in os.listdir(config_file_dir)
+ if isfile(join(config_file_dir, f))]
+
+ conf = config.prepare_conf(args=sys.argv[1:],
+ config_files=config_files)
+
+ doctor = DoctorTest(conf)
+ doctor.run()