diff options
Diffstat (limited to 'tests')
-rw-r--r-- | tests/consumer.py | 9 | ||||
-rw-r--r-- | tests/functions-common | 72 | ||||
-rw-r--r-- | tests/inspector.py | 26 | ||||
-rw-r--r-- | tests/lib/inspector | 30 | ||||
-rw-r--r-- | tests/lib/inspectors/congress | 69 | ||||
-rw-r--r-- | tests/lib/inspectors/sample | 16 | ||||
-rw-r--r-- | tests/lib/installer | 34 | ||||
-rw-r--r-- | tests/lib/installers/apex | 24 | ||||
-rw-r--r-- | tests/lib/installers/fuel | 107 | ||||
-rw-r--r-- | tests/lib/installers/local | 21 | ||||
-rw-r--r-- | tests/logger.py | 47 | ||||
-rw-r--r-- | tests/monitor.py | 45 | ||||
-rw-r--r-- | tests/profiler-poc.py | 87 | ||||
-rwxr-xr-x | tests/run.sh | 350 |
14 files changed, 620 insertions, 317 deletions
diff --git a/tests/consumer.py b/tests/consumer.py index 9b3230fe..3c012b4f 100644 --- a/tests/consumer.py +++ b/tests/consumer.py @@ -11,17 +11,20 @@ import argparse from flask import Flask from flask import request import json +import logger as doctor_log import os import time +LOG = doctor_log.Logger('doctor_consumer').getLogger() + app = Flask(__name__) @app.route('/failure', methods=['POST']) def event_posted(): - app.logger.debug('doctor consumer notified at %s' % time.time()) - app.logger.debug('received data = %s' % request.data) + LOG.info('doctor consumer notified at %s' % time.time()) + LOG.info('received data = %s' % request.data) d = json.loads(request.data) return "OK" @@ -35,7 +38,7 @@ def get_args(): def main(): args = get_args() - app.run(host="0.0.0.0", port=args.port, debug=True) + app.run(host="0.0.0.0", port=args.port) if __name__ == '__main__': diff --git a/tests/functions-common b/tests/functions-common new file mode 100644 index 00000000..db2565a3 --- /dev/null +++ b/tests/functions-common @@ -0,0 +1,72 @@ +#!/bin/bash + +# Test if the named environment variable is set and not zero length +# is_set env-var +function is_set { + local var=\$"$1" + eval "[ -n \"$var\" ]" +} + +# Prints backtrace info +# filename:lineno:function +# backtrace level +function backtrace { + local level=$1 + local deep + deep=$((${#BASH_SOURCE[@]} - 1)) + echo "[Call Trace]" + while [ $level -le $deep ]; do + echo "${BASH_SOURCE[$deep]}:${BASH_LINENO[$deep-1]}:${FUNCNAME[$deep-1]}" + deep=$((deep - 1)) + done +} + +# Prints line number and "message" in error format +# err $LINENO "message" +function err { + local exitcode=$? + local xtrace + xtrace=$(set +o | grep xtrace) + set +o xtrace + local msg="[ERROR] ${BASH_SOURCE[2]}:$1 $2" + echo $msg 1>&2; + if [[ -n ${LOGDIR} ]]; then + echo $msg >> "${LOGDIR}/error.log" + fi + $xtrace + return $exitcode +} + +# Prints line number and "message" then exits +# die $LINENO "message" +function die { + local exitcode=$? + set +o xtrace + local line=$1; shift + if [ $exitcode == 0 ]; then + exitcode=1 + fi + backtrace 2 + err $line "$*" + # Give buffers a second to flush + sleep 1 + exit $exitcode +} + +# Checks an environment variable is not set or has length 0 OR if the +# exit code is non-zero and prints "message" and exits +# NOTE: env-var is the variable name without a '$' +# die_if_not_set $LINENO env-var "message" +function die_if_not_set { + local exitcode=$? + local xtrace + xtrace=$(set +o | grep xtrace) + set +o xtrace + local line=$1; shift + local evar=$1; shift + if ! is_set $evar || [ $exitcode != 0 ]; then + die $line "$*" + fi + $xtrace +} + diff --git a/tests/inspector.py b/tests/inspector.py index 62614158..c1f95697 100644 --- a/tests/inspector.py +++ b/tests/inspector.py @@ -12,6 +12,7 @@ import collections from flask import Flask from flask import request import json +import logger as doctor_log import os import time @@ -19,6 +20,8 @@ import novaclient.client as novaclient import nova_force_down +LOG = doctor_log.Logger('doctor_inspector').getLogger() + class DoctorInspectorSample(object): @@ -44,13 +47,14 @@ class DoctorInspectorSample(object): try: host=server.__dict__.get('OS-EXT-SRV-ATTR:host') self.servers[host].append(server) - app.logger.debug('get hostname=%s from server=%s' % (host, server)) + LOG.debug('get hostname=%s from server=%s' % (host, server)) except Exception as e: - app.logger.debug('can not get hostname from server=%s' % server) + LOG.error('can not get hostname from server=%s' % server) def disable_compute_host(self, hostname): for server in self.servers[hostname]: self.nova.servers.reset_state(server, 'error') + LOG.info('doctor mark vm(%s) error at %s' % (server, time.time())) # NOTE: We use our own client here instead of this novaclient for a # workaround. Once keystone provides v2.1 nova api endpoint @@ -60,23 +64,24 @@ class DoctorInspectorSample(object): # self.nova.services.force_down(hostname, 'nova-compute', True) # nova_force_down.force_down(hostname) + LOG.info('doctor mark host(%s) down at %s' % (hostname, time.time())) app = Flask(__name__) -app.debug = True inspector = DoctorInspectorSample() @app.route('/events', methods=['POST']) def event_posted(): - app.logger.debug('event posted at %s' % time.time()) - app.logger.debug('inspector = %s' % inspector) - app.logger.debug('received data = %s' % request.data) + LOG.info('event posted at %s' % time.time()) + LOG.info('inspector = %s' % inspector) + LOG.info('received data = %s' % request.data) d = json.loads(request.data) - hostname = d['hostname'] - event_type = d['type'] - if event_type == 'compute.host.down': - inspector.disable_compute_host(hostname) + for event in d: + hostname = event['details']['hostname'] + event_type = event['type'] + if event_type == 'compute.host.down': + inspector.disable_compute_host(hostname) return "OK" @@ -91,5 +96,6 @@ def main(): args = get_args() app.run(port=args.port) + if __name__ == '__main__': main() diff --git a/tests/lib/inspector b/tests/lib/inspector new file mode 100644 index 00000000..2fb7c409 --- /dev/null +++ b/tests/lib/inspector @@ -0,0 +1,30 @@ +#!/bin/bash + +INSPECTOR_TYPE=${INSPECTOR_TYPE:-sample} + +function is_inspector_supported { + local inspector="$1" + [[ -f $TOP_DIR/lib/inspectors/$inspector ]] +} + +function is_inspector { + local inspector="$1" + [[ $inspector == $INSPECTOR_TYPE ]] +} + +function start_inspector { + if ! is_inspector_supported $INSPECTOR_TYPE; then + die $LINENO"INSPECTOR_TYPE=$INSPECTOR_TYPE is not supported." + fi + + source $TOP_DIR/lib/inspectors/$INSPECTOR_TYPE + start_inspector_$INSPECTOR_TYPE +} + +function stop_inspector { + stop_inspector_$INSPECTOR_TYPE +} + +function cleanup_inspector { + cleanup_inspector_$INSPECTOR_TYPE +} diff --git a/tests/lib/inspectors/congress b/tests/lib/inspectors/congress new file mode 100644 index 00000000..04825252 --- /dev/null +++ b/tests/lib/inspectors/congress @@ -0,0 +1,69 @@ +#!/bin/bash + +function _congress_add_rule { + name=$1 + policy=$2 + rule=$3 + + if ! openstack congress policy rule list $policy | grep -q -e "// Name: $name$" ; then + openstack congress policy rule create --name $name $policy "$rule" + fi +} + +function _congress_del_rule { + name=$1 + policy=$2 + + if openstack congress policy rule list $policy | grep -q -e "^// Name: $name$" ; then + openstack congress policy rule delete $policy $name + fi +} + +function _congress_add_rules { + _congress_add_rule host_down classification \ + 'host_down(host) :- + doctor:events(hostname=host, type="compute.host.down", status="down")' + + _congress_add_rule active_instance_in_host classification \ + 'active_instance_in_host(vmid, host) :- + nova:servers(id=vmid, host_name=host, status="ACTIVE")' + + _congress_add_rule host_force_down classification \ + 'execute[nova:services.force_down(host, "nova-compute", "True")] :- + host_down(host)' + + _congress_add_rule error_vm_states classification \ + 'execute[nova:servers.reset_state(vmid, "error")] :- + host_down(host), + active_instance_in_host(vmid, host)' +} + +function start_inspector_congress { + nova_api_min_version="2.11" + nova_api_version=$(openstack congress datasource list | \ + grep nova | grep -Po "(?<='api_version': ')[^']*") + [[ -z $nova_api_version ]] && nova_api_version="2.0" + if [[ "$nova_api_version" < "$nova_api_min_version" ]]; then + echo "ERROR: Congress Nova datasource API version < $nova_api_min_version ($nova_api_version)" + exit 1 + fi + openstack congress driver list | grep -q " doctor " + openstack congress datasource list | grep -q " doctor " || { + openstack congress datasource create doctor doctor + } + _congress_add_rules + +} + +function stop_inspector_congress { + _congress_del_rule host_force_down classification + _congress_del_rule error_vm_states classification + _congress_del_rule active_instance_in_host classification + _congress_del_rule host_down classification + +} + +function cleanup_inspector_congress { + # Noop + return +} diff --git a/tests/lib/inspectors/sample b/tests/lib/inspectors/sample new file mode 100644 index 00000000..cd21a008 --- /dev/null +++ b/tests/lib/inspectors/sample @@ -0,0 +1,16 @@ +#!/bin/bash + +function start_inspector_sample { + pgrep -f "python inspector.py" && return 0 + python inspector.py "$INSPECTOR_PORT" > inspector.log 2>&1 & +} + +function stop_inspector_sample { + pgrep -f "python inspector.py" || return 0 + kill $(pgrep -f "python inspector.py") +} + +function cleanup_inspector_sample { + # Noop + return +} diff --git a/tests/lib/installer b/tests/lib/installer new file mode 100644 index 00000000..cdde6eff --- /dev/null +++ b/tests/lib/installer @@ -0,0 +1,34 @@ +#!/bin/bash + +INSTALLER_TYPE=${INSTALLER_TYPE:-local} +INSTALLER_IP=${INSTALLER_IP:-none} +ssh_opts_cpu="$ssh_opts" + +function is_installer_supported { + local installer="$1" + [[ -f $TOP_DIR/lib/installers/$installer ]] +} + +function is_installer { + local installer="$1" + [[ $installer == $INSTALLER_TYPE ]] +} + +function setup_installer { + if ! is_installer_supported $INSTALLER_TYPE; then + die $LINENO"INSTALLER_TYPE=$INSTALLER_TYPE is not supported." + fi + + source $TOP_DIR/lib/installers/$INSTALLER_TYPE + + if ! is_set INSTALLER_IP; then + get_installer_ip + fi + + installer_get_ssh_keys + installer_apply_patches +} + +function cleanup_installer { + cleanup_installer_$INSTALLER_TYPE +} diff --git a/tests/lib/installers/apex b/tests/lib/installers/apex new file mode 100644 index 00000000..54b3dce2 --- /dev/null +++ b/tests/lib/installers/apex @@ -0,0 +1,24 @@ +#!/bin/bash + +function get_installer_ip { + local instack_mac=$(sudo virsh domiflist instack | awk '/default/{print $5}') + INSTALLER_IP=$(/usr/sbin/arp -e | grep ${instack_mac} | awk '{print $1}') + die_if_not_set $LINENO $INSTALLER_IP "No installer IP" +} + +function installer_get_ssh_keys { + sudo scp $ssh_opts root@"$INSTALLER_IP":/home/stack/.ssh/id_rsa instack_key + sudo chown $(whoami):$(whoami) instack_key + chmod 400 instack_key + ssh_opts_cpu+=" -i instack_key" +} + +function installer_apply_patches { + # Noop + return +} + +function cleanup_installer_apex { + # Noop + return +} diff --git a/tests/lib/installers/fuel b/tests/lib/installers/fuel new file mode 100644 index 00000000..34a86922 --- /dev/null +++ b/tests/lib/installers/fuel @@ -0,0 +1,107 @@ +#!/bin/bash + +function get_installer_ip { + local instack_mac=$(sudo virsh domiflist fuel-opnfv | awk '/pxebr/{print $5}') + INSTALLER_IP=$(/usr/sbin/arp -e | grep ${instack_mac} | awk '{print $1}') + die_if_not_set $LINENO $INSTALLER_IP "No installer IP" +} + +function installer_get_ssh_keys { + sshpass -p r00tme scp $ssh_opts root@${INSTALLER_IP}:.ssh/id_rsa instack_key + sudo chown $(whoami):$(whoami) instack_key + chmod 400 instack_key + ssh_opts_cpu+=" -i instack_key" +} + +function installer_apply_patches { + cat > set_conf.sh << 'END_TXT' +#!/bin/bash +if [ -e /etc/ceilometer/event_pipeline.yaml ]; then + if ! grep -q '^ *- notifier://?topic=alarm.all$' /etc/ceilometer/event_pipeline.yaml; then + sed -i 's|- notifier://|- notifier://?topic=alarm.all|' /etc/ceilometer/event_pipeline.yaml + echo "modify the ceilometer config" + service ceilometer-agent-notification restart + fi +else + echo "ceilometer event_pipeline.yaml file does not exist" + exit 1 +fi +if [ -e /etc/nova/nova.conf ]; then + if ! grep -q '^notification_driver=messaging$' /etc/nova/nova.conf; then + sed -i -r 's/notification_driver=/notification_driver=messaging/g' /etc/nova/nova.conf + echo "modify nova config" + service nova-api restart + fi +else + echo "nova.conf file does not exist" + exit 1 +fi +exit 0 +END_TXT + + chmod +x set_conf.sh + CONTROLLER_IP=$(sshpass -p r00tme ssh 2>/dev/null $ssh_opts root@${INSTALLER_IP} \ + "fuel node | grep controller | cut -d '|' -f 5|xargs") + for node in $CONTROLLER_IP;do + scp $ssh_opts_cpu set_conf.sh "root@$node:" + ssh $ssh_opts_cpu "root@$node" './set_conf.sh > set_conf.log 2>&1 &' + sleep 1 + scp $ssh_opts_cpu "root@$node:set_conf.log" set_conf_$node.log + done + + if grep -q "modify the ceilometer config" set_conf_*.log ; then + NEED_TO_RESTORE_CEILOMETER=true + fi + if grep -q "modify nova config" set_conf_*.log ; then + NEED_TO_RESTORE_NOVA=true + fi + + echo "waiting service restart..." + sleep 60 + +} + +function cleanup_installer_fuel { + if ! ($NEED_TO_RESTORE_CEILOMETER || $NEED_TO_RESTORE_NOVA) ; then + echo "Don't need to restore config" + exit 0 + fi + + echo "restore the configuration..." + cat > restore_conf.sh << 'END_TXT' +#!/bin/bash +if @NEED_TO_RESTORE_CEILOMETER@ ; then + if [ -e /etc/ceilometer/event_pipeline.yaml ]; then + if grep -q '^ *- notifier://?topic=alarm.all$' /etc/ceilometer/event_pipeline.yaml; then + sed -i 's|- notifier://?topic=alarm.all|- notifier://|' /etc/ceilometer/event_pipeline.yaml + service ceilometer-agent-notification restart + fi + else + echo "ceilometer event_pipeline.yaml file does not exist" + exit 1 + fi +fi +if @NEED_TO_RESTORE_NOVA@ ; then + if [ -e /etc/nova/nova.conf ]; then + if grep -q '^notification_driver=messaging$' /etc/nova/nova.conf; then + sed -i -r 's/notification_driver=messaging/notification_driver=/g' /etc/nova/nova.conf + service nova-api restart + fi + else + echo "nova.conf file does not exist" + exit 1 + fi +fi +exit 0 +END_TXT + sed -i -e "s/@NEED_TO_RESTORE_CEILOMETER@/$NEED_TO_RESTORE_CEILOMETER/" restore_conf.sh + sed -i -e "s/@NEED_TO_RESTORE_NOVA@/$NEED_TO_RESTORE_NOVA/" restore_conf.sh + chmod +x restore_conf.sh + for node in $CONTROLLER_IP;do + scp $ssh_opts_cpu restore_conf.sh "root@$node:" + ssh $ssh_opts_cpu "root@$node" './restore_conf.sh > restore_conf.log 2>&1 &' + done + + echo "waiting service restart..." + sleep 60 +} diff --git a/tests/lib/installers/local b/tests/lib/installers/local new file mode 100644 index 00000000..e7aed14f --- /dev/null +++ b/tests/lib/installers/local @@ -0,0 +1,21 @@ +#!/bin/bash + +function get_installer_ip { + # Noop + return +} + +function installer_get_ssh_keys { + echo "INSTALLER_TYPE set to 'local'. Assuming SSH keys already exchanged with $COMPUTE_HOST" + return +} + +function installer_apply_patches { + # Noop + return +} + +function cleanup_installer_local { + # Noop + return +} diff --git a/tests/logger.py b/tests/logger.py new file mode 100644 index 00000000..a4f33234 --- /dev/null +++ b/tests/logger.py @@ -0,0 +1,47 @@ +############################################################################## +# Copyright (c) 2016 ZTE Corporation and others. +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## +# Usage: +# import doctor_logger +# logger = doctor_logger.Logger("script_name").getLogger() +# logger.info("message to be shown with - INFO - ") +# logger.debug("message to be shown with - DEBUG -") + +import logging +import os + + +class Logger: + def __init__(self, logger_name): + + CI_DEBUG = os.getenv('CI_DEBUG') + + self.logger = logging.getLogger(logger_name) + self.logger.propagate = 0 + self.logger.setLevel(logging.DEBUG) + + formatter = logging.Formatter('%(asctime)s %(filename)s %(lineno)d ' + '%(levelname)-6s %(message)s') + + ch = logging.StreamHandler() + ch.setFormatter(formatter) + if CI_DEBUG is not None and CI_DEBUG.lower() == "true": + ch.setLevel(logging.DEBUG) + else: + ch.setLevel(logging.INFO) + self.logger.addHandler(ch) + + file_handler = logging.FileHandler('%s.log' % logger_name) + file_handler.setFormatter(formatter) + file_handler.setLevel(logging.DEBUG) + self.logger.addHandler(file_handler) + + + def getLogger(self): + return self.logger + diff --git a/tests/monitor.py b/tests/monitor.py index caf4c321..75d82392 100644 --- a/tests/monitor.py +++ b/tests/monitor.py @@ -10,6 +10,7 @@ import argparse from datetime import datetime import json +import logger as doctor_log import os import requests import socket @@ -26,6 +27,8 @@ ICMP_ECHO_MESSAGE = '\x08\x00\xf7\xff\x00\x00\x00\x00' SUPPORTED_INSPECTOR_TYPES = ['sample', 'congress'] +LOG = doctor_log.Logger('doctor_monitor').getLogger() + class DoctorMonitorSample(object): interval = 0.1 # second @@ -58,8 +61,8 @@ class DoctorMonitorSample(object): (congress_endpoint, doctor_ds['id'])) def start_loop(self): - print "start ping to host %(h)s (ip=%(i)s)" % {'h': self.hostname, - 'i': self.ip_addr} + LOG.debug("start ping to host %(h)s (ip=%(i)s)" % {'h': self.hostname, + 'i': self.ip_addr}) sock = socket.socket(socket.AF_INET, socket.SOCK_RAW, socket.IPPROTO_ICMP) sock.settimeout(self.timeout) @@ -68,40 +71,38 @@ class DoctorMonitorSample(object): sock.sendto(ICMP_ECHO_MESSAGE, (self.ip_addr, 0)) data = sock.recv(4096) except socket.timeout: - print "doctor monitor detected at %s" % time.time() + LOG.info("doctor monitor detected at %s" % time.time()) self.report_error() - print "ping timeout, quit monitoring..." + LOG.info("ping timeout, quit monitoring...") return time.sleep(self.interval) def report_error(self): + payload = [ + { + 'id': 'monitor_sample_id1', + 'time': datetime.now().isoformat(), + 'type': self.event_type, + 'details': { + 'hostname': self.hostname, + 'status': 'down', + 'monitor': 'monitor_sample', + 'monitor_event_id': 'monitor_sample_event1' + }, + }, + ] + data = json.dumps(payload) + if self.inspector_type == 'sample': - payload = {"type": self.event_type, "hostname": self.hostname} - data = json.dumps(payload) headers = {'content-type': 'application/json'} requests.post(self.inspector_url, data=data, headers=headers) elif self.inspector_type == 'congress': - data = [ - { - 'id': 'monitor_sample_id1', - 'time': datetime.now().isoformat(), - 'type': self.event_type, - 'details': { - 'hostname': self.hostname, - 'status': 'down', - 'monitor': 'monitor_sample', - 'monitor_event_id': 'monitor_sample_event1' - }, - }, - ] - headers = { 'Content-Type': 'application/json', 'Accept': 'application/json', 'X-Auth-Token':self.session.get_token(), } - - requests.put(self.inspector_url, data=json.dumps(data), headers=headers) + requests.put(self.inspector_url, data=data, headers=headers) def get_args(): diff --git a/tests/profiler-poc.py b/tests/profiler-poc.py new file mode 100644 index 00000000..71034781 --- /dev/null +++ b/tests/profiler-poc.py @@ -0,0 +1,87 @@ +############################################################################## +# Copyright (c) 2016 ZTE Corporation and others. +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## + +""" +PoC of performance profiler for OPNFV doctor project + +Usage: + +Export environment variables to set timestamp at each checkpoint in millisecond. +Valid check points are: DOCTOR_PROFILER_T{00-09} + +See also: https://goo.gl/98Osig +""" + +import json +import os + +LOGFILE = 'performance-profile' +PREFIX = 'DOCTOR_PROFILER' +TOTAL_CHECK_POINTS = 10 +MODULE_CHECK_POINTS = ['T00', 'T01', 'T04', 'T05', 'T06', 'T09'] +TAG_FORMAT = "{:<5}" +# Inspired by https://github.com/reorx/httpstat +TEMPLATE = """ +Total time cost: {total}(ms) +==============================================================================> + |Monitor|Inspector |Controller|Notifier|Evaluator | + |{M00} |{M01} |{M02} |{M03} |{M04} | + | | | | | | | | | | +host down:{T00}| | | | | | | | | + raw failure:{T01}| | | | | | | | + found affected:{T02}| | | | | | | + set VM error:{T03}| | | | | | + marked host down:{T04}| | | | | + notified VM error:{T05} | | | | + transformed event:{T06}| | | + evaluated event:{T07}| | + fired alarm:{T08}| + received alarm:{T09} +""" + + +def main(): + check_points = ["T{:02d}".format(i) for i in range(TOTAL_CHECK_POINTS)] + module_map = {"M{:02d}".format(i): + (MODULE_CHECK_POINTS[i], MODULE_CHECK_POINTS[i + 1]) + for i in range(len(MODULE_CHECK_POINTS) - 1)} + + # check point tags + elapsed_ms = {cp: os.getenv("{}_{}".format(PREFIX, cp)) + for cp in check_points} + + def format_tag(tag): + return TAG_FORMAT.format(tag or '?') + + tags = {cp: format_tag(ms) for cp, ms in elapsed_ms.iteritems()} + + def time_cost(cp): + if elapsed_ms[cp[0]] and elapsed_ms[cp[1]]: + return int(elapsed_ms[cp[1]]) - int(elapsed_ms[cp[0]]) + else: + return None + + # module time cost tags + modules_cost_ms = {module: time_cost(cp) + for module, cp in module_map.iteritems()} + + tags.update({module: format_tag(cost) + for module, cost in modules_cost_ms.iteritems()}) + + tags.update({'total': time_cost((check_points[0], check_points[-1]))}) + + profile = TEMPLATE.format(**tags) + + logfile = open('{}.json'.format(LOGFILE), 'w') + logfile.write(json.dumps(tags)) + + print profile + +if __name__ == '__main__': + main() diff --git a/tests/run.sh b/tests/run.sh index 99e8feff..206f6a40 100755 --- a/tests/run.sh +++ b/tests/run.sh @@ -8,6 +8,8 @@ # http://www.apache.org/licenses/LICENSE-2.0 ############################################################################## +# Configuration + [[ "${CI_DEBUG:-true}" == [Tt]rue ]] && set -x IMAGE_URL=https://launchpad.net/cirros/trunk/0.3.0/+download/cirros-0.3.0-x86_64-disk.img @@ -25,231 +27,71 @@ DOCTOR_PW=doctor DOCTOR_PROJECT=doctor #TODO: change back to `_member_` when JIRA DOCTOR-55 is done DOCTOR_ROLE=admin +PROFILER_TYPE=${PROFILER_TYPE:-none} -SUPPORTED_INSTALLER_TYPES="apex fuel local" -INSTALLER_TYPE=${INSTALLER_TYPE:-local} -INSTALLER_IP=${INSTALLER_IP:-none} - -SUPPORTED_INSPECTOR_TYPES="sample congress" -INSPECTOR_TYPE=${INSPECTOR_TYPE:-sample} +TOP_DIR=$(cd $(dirname "$0") && pwd) ssh_opts="-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" as_doctor_user="--os-username $DOCTOR_USER --os-password $DOCTOR_PW --os-tenant-name $DOCTOR_PROJECT" -if [[ ! "$SUPPORTED_INSTALLER_TYPES" =~ "$INSTALLER_TYPE" ]] ; then - echo "ERROR: INSTALLER_TYPE=$INSTALLER_TYPE is not supported." - exit 1 -fi - -if [[ ! "$SUPPORTED_INSPECTOR_TYPES" =~ "$INSPECTOR_TYPE" ]] ; then - echo "ERROR: INSPECTOR_TYPE=$INSPECTOR_TYPE is not supported." - exit 1 -fi - -get_installer_ip() { - if [[ "$INSTALLER_TYPE" == "apex" ]] ; then - if [[ "$INSTALLER_IP" == "none" ]] ; then - instack_mac=$(sudo virsh domiflist instack | awk '/default/{print $5}') - INSTALLER_IP=$(/usr/sbin/arp -e | grep ${instack_mac} | awk '{print $1}') - fi - elif [[ "$INSTALLER_TYPE" == "fuel" ]] ; then - if [[ "$INSTALLER_IP" == "none" ]] ; then - instack_mac=$(sudo virsh domiflist fuel-opnfv | awk '/pxebr/{print $5}') - INSTALLER_IP=$(/usr/sbin/arp -e | grep ${instack_mac} | awk '{print $1}') - fi - fi - - if [[ "$INSTALLER_TYPE" != "local" ]] ; then - if [[ -z "$INSTALLER_IP" ]] ; then - echo "ERROR: no installer ip" - exit 1 - fi - fi -} - -prepare_ssh_to_cloud() { - ssh_opts_cpu="$ssh_opts" - - # get ssh key from installer node - if [[ "$INSTALLER_TYPE" == "apex" ]] ; then - sudo scp $ssh_opts root@"$INSTALLER_IP":/home/stack/.ssh/id_rsa instack_key - sudo chown $(whoami):$(whoami) instack_key - chmod 400 instack_key - ssh_opts_cpu+=" -i instack_key" - elif [[ "$INSTALLER_TYPE" == "fuel" ]] ; then - sshpass -p r00tme scp $ssh_opts root@${INSTALLER_IP}:.ssh/id_rsa instack_key - sudo chown $(whoami):$(whoami) instack_key - chmod 400 instack_key - ssh_opts_cpu+=" -i instack_key" - elif [[ "$INSTALLER_TYPE" == "local" ]] ; then - echo "INSTALLER_TYPE set to 'local'. Assuming SSH keys already exchanged with $COMPUTE_HOST" - fi -} -prepare_test_env() { - #TODO delete it when fuel support the configuration - if [[ "$INSTALLER_TYPE" == "fuel" ]] ; then - echo "modify the configuration..." - cat > set_conf.sh << 'END_TXT' -#!/bin/bash -if [ -e /etc/ceilometer/event_pipeline.yaml ]; then - if ! grep -q '^ *- notifier://?topic=alarm.all$' /etc/ceilometer/event_pipeline.yaml; then - sed -i 's|- notifier://|- notifier://?topic=alarm.all|' /etc/ceilometer/event_pipeline.yaml - echo "modify the ceilometer config" - service ceilometer-agent-notification restart - fi -else - echo "ceilometer event_pipeline.yaml file does not exist" - exit 1 -fi -if [ -e /etc/nova/nova.conf ]; then - if ! grep -q '^notification_driver=messaging$' /etc/nova/nova.conf; then - sed -i -r 's/notification_driver=/notification_driver=messaging/g' /etc/nova/nova.conf - echo "modify nova config" - service nova-api restart - fi -else - echo "nova.conf file does not exist" - exit 1 -fi -exit 0 -END_TXT - chmod +x set_conf.sh - CONTROLLER_IP=$(sshpass -p r00tme ssh 2>/dev/null $ssh_opts root@${INSTALLER_IP} \ - "fuel node | grep controller | cut -d '|' -f 5|xargs") - for node in $CONTROLLER_IP;do - scp $ssh_opts_cpu set_conf.sh "root@$node:" - ssh $ssh_opts_cpu "root@$node" './set_conf.sh > set_conf.log 2>&1 &' - sleep 1 - scp $ssh_opts_cpu "root@$node:set_conf.log" set_conf_$node.log - done - - if grep -q "modify the ceilometer config" set_conf_*.log ; then - NEED_TO_RESTORE_CEILOMETER=true - fi - if grep -q "modify nova config" set_conf_*.log ; then - NEED_TO_RESTORE_NOVA=true - fi - - echo "waiting service restart..." - sleep 60 - fi -} - -restore_test_env() { - #TODO delete it when fuel support the configuration - if [[ "$INSTALLER_TYPE" == "fuel" ]] ; then - if ! ($NEED_TO_RESTORE_CEILOMETER || $NEED_TO_RESTORE_NOVA) ; then - echo "Don't need to restore config" - exit 0 - fi - - echo "restore the configuration..." - cat > restore_conf.sh << 'END_TXT' -#!/bin/bash -if @NEED_TO_RESTORE_CEILOMETER@ ; then - if [ -e /etc/ceilometer/event_pipeline.yaml ]; then - if grep -q '^ *- notifier://?topic=alarm.all$' /etc/ceilometer/event_pipeline.yaml; then - sed -i 's|- notifier://?topic=alarm.all|- notifier://|' /etc/ceilometer/event_pipeline.yaml - service ceilometer-agent-notification restart - fi - else - echo "ceilometer event_pipeline.yaml file does not exist" - exit 1 - fi -fi -if @NEED_TO_RESTORE_NOVA@ ; then - if [ -e /etc/nova/nova.conf ]; then - if grep -q '^notification_driver=messaging$' /etc/nova/nova.conf; then - sed -i -r 's/notification_driver=messaging/notification_driver=/g' /etc/nova/nova.conf - service nova-api restart - fi - else - echo "nova.conf file does not exist" - exit 1 - fi -fi -exit 0 -END_TXT - sed -i -e "s/@NEED_TO_RESTORE_CEILOMETER@/$NEED_TO_RESTORE_CEILOMETER/" restore_conf.sh - sed -i -e "s/@NEED_TO_RESTORE_NOVA@/$NEED_TO_RESTORE_NOVA/" restore_conf.sh - chmod +x restore_conf.sh - for node in $CONTROLLER_IP;do - scp $ssh_opts_cpu restore_conf.sh "root@$node:" - ssh $ssh_opts_cpu "root@$node" './restore_conf.sh > restore_conf.log 2>&1 &' - done - - echo "waiting service restart..." - sleep 60 - fi -} +# Functions get_compute_host_info() { # get computer host info which VM boot in COMPUTE_HOST=$(openstack $as_doctor_user server show $VM_NAME | grep "OS-EXT-SRV-ATTR:host" | awk '{ print $4 }') compute_host_in_undercloud=${COMPUTE_HOST%%.*} - if [[ -z "$COMPUTE_HOST" ]] ; then - echo "ERROR: failed to get compute hostname" - exit 1 - fi + die_if_not_set $LINENO COMPUTE_HOST "Failed to get compute hostname" - if [[ "$INSTALLER_TYPE" == "apex" ]] ; then + if is_installer apex; then COMPUTE_USER=${COMPUTE_USER:-heat-admin} COMPUTE_IP=$(sudo ssh $ssh_opts $INSTALLER_IP \ "source stackrc; \ nova show $compute_host_in_undercloud \ | awk '/ ctlplane network /{print \$5}'") - elif [[ "$INSTALLER_TYPE" == "fuel" ]] ; then + elif is_installer fuel; then COMPUTE_USER=${COMPUTE_USER:-root} node_id=$(echo $compute_host_in_undercloud | cut -d "-" -f 2) COMPUTE_IP=$(sshpass -p r00tme ssh 2>/dev/null $ssh_opts root@${INSTALLER_IP} \ "fuel node|awk -F '|' -v id=$node_id '{if (\$1 == id) print \$5}' |xargs") - elif [[ "$INSTALLER_TYPE" == "local" ]] ; then + elif is_installer local; then COMPUTE_USER=${COMPUTE_USER:-$(whoami)} COMPUTE_IP=$(getent hosts "$COMPUTE_HOST" | awk '{ print $1 }') fi - if [[ -z "$COMPUTE_IP" ]]; then - echo "ERROR: Could not resolve $COMPUTE_HOST. Either manually set COMPUTE_IP or enable DNS resolution." - exit 1 - fi + die_if_not_set $LINENO COMPUTE_IP "Could not resolve $COMPUTE_HOST. Either manually set COMPUTE_IP or enable DNS resolution." echo "COMPUTE_HOST=$COMPUTE_HOST" echo "COMPUTE_IP=$COMPUTE_IP" # verify connectivity to target compute host ping -c 1 "$COMPUTE_IP" if [[ $? -ne 0 ]] ; then - echo "ERROR: can not ping to computer host" - exit 1 + die $LINENO "Can not ping to computer host" fi # verify ssh to target compute host ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" 'exit' if [[ $? -ne 0 ]] ; then - echo "ERROR: can not ssh to computer host" - exit 1 + die $LINENO "Can not ssh to computer host" fi } get_consumer_ip() { local get_consumer_command="ip route get $COMPUTE_IP | awk '/ src /{print \$NF}'" - if [[ "$INSTALLER_TYPE" == "apex" ]] ; then + if is_installer apex; then CONSUMER_IP=$(sudo ssh $ssh_opts root@$INSTALLER_IP \ "$get_consumer_command") - elif [[ "$INSTALLER_TYPE" == "fuel" ]] ; then + elif is_installer fuel; then CONSUMER_IP=$(sudo sshpass -p r00tme ssh $ssh_opts root@${INSTALLER_IP} \ "$get_consumer_command") - elif [[ "$INSTALLER_TYPE" == "local" ]] ; then + elif is_installer local; then CONSUMER_IP=`$get_consumer_command` fi echo "CONSUMER_IP=$CONSUMER_IP" - if [[ -z "$CONSUMER_IP" ]]; then - echo "ERROR: Could not get CONSUMER_IP." - exit 1 - fi + die_if_not_set $LINENO CONSUMER_IP "Could not get CONSUMER_IP." } download_image() { @@ -312,12 +154,6 @@ create_alarm() { -q "traits.state=string::error; traits.instance_id=string::$vm_id" } -print_log() { - log_file=$1 - echo "$log_file:" - sed -e 's/^/ /' "$log_file" -} - start_monitor() { pgrep -f "python monitor.py" && return 0 sudo -E python monitor.py "$COMPUTE_HOST" "$COMPUTE_IP" "$INSPECTOR_TYPE" \ @@ -327,79 +163,6 @@ start_monitor() { stop_monitor() { pgrep -f "python monitor.py" || return 0 sudo kill $(pgrep -f "python monitor.py") - print_log monitor.log -} - -congress_add_rule() { - name=$1 - policy=$2 - rule=$3 - - if ! openstack congress policy rule list $policy | grep -q -e "// Name: $name$" ; then - openstack congress policy rule create --name $name $policy "$rule" - fi -} - -congress_del_rule() { - name=$1 - policy=$2 - - if openstack congress policy rule list $policy | grep -q -e "^// Name: $name$" ; then - openstack congress policy rule delete $policy $name - fi -} - -congress_setup_rules() { - congress_add_rule host_down classification \ - 'host_down(host) :- - doctor:events(hostname=host, type="compute.host.down", status="down")' - - congress_add_rule active_instance_in_host classification \ - 'active_instance_in_host(vmid, host) :- - nova:servers(id=vmid, host_name=host, status="ACTIVE")' - - congress_add_rule host_force_down classification \ - 'execute[nova:services.force_down(host, "nova-compute", "True")] :- - host_down(host)' - - congress_add_rule error_vm_states classification \ - 'execute[nova:servers.reset_state(vmid, "error")] :- - host_down(host), - active_instance_in_host(vmid, host)' -} - -start_inspector() { - if [[ "$INSPECTOR_TYPE" == "sample" ]] ; then - pgrep -f "python inspector.py" && return 0 - python inspector.py "$INSPECTOR_PORT" > inspector.log 2>&1 & - elif [[ "$INSPECTOR_TYPE" == "congress" ]] ; then - nova_api_min_version="2.11" - nova_api_version=$(openstack congress datasource list | \ - grep nova | grep -Po "(?<='api_version': ')[^']*") - [[ -z $nova_api_version ]] && nova_api_version="2.0" - if [[ "$nova_api_version" < "$nova_api_min_version" ]]; then - echo "ERROR: Congress Nova datasource API version < $nova_api_min_version ($nova_api_version)" - exit 1 - fi - openstack congress driver list | grep -q " doctor " - openstack congress datasource list | grep -q " doctor " || { - openstack congress datasource create doctor doctor - } - congress_setup_rules - fi -} - -stop_inspector() { - if [[ "$INSPECTOR_TYPE" == "sample" ]] ; then - pgrep -f "python inspector.py" || return 0 - kill $(pgrep -f "python inspector.py") - print_log inspector.log - elif [[ "$INSPECTOR_TYPE" == "congress" ]] ; then - congress_del_rule host_force_down classification - congress_del_rule error_vm_states classification - congress_del_rule active_instance_in_host classification - congress_del_rule host_down classification - fi } start_consumer() { @@ -409,21 +172,18 @@ start_consumer() { # NOTE(r-mibu): create tunnel to the controller nodes, so that we can # avoid some network problems dpends on infra and installers. # This tunnel will be terminated by stop_consumer() or after 10 mins passed. - if [[ "$INSTALLER_TYPE" != "local" ]] ; then - if [[ "$INSTALLER_TYPE" == "apex" ]] ; then + if ! is_installer local; then + if is_installer apex; then CONTROLLER_IPS=$(sudo ssh $ssh_opts $INSTALLER_IP \ "source stackrc; \ nova list | grep ' overcloud-controller-[0-9] ' \ | sed -e 's/^.*ctlplane=//' -e 's/ *|\$//'") - elif [[ "$INSTALLER_TYPE" == "fuel" ]] ; then + elif is_installer fuel; then CONTROLLER_IPS=$(sshpass -p r00tme ssh 2>/dev/null $ssh_opts root@${INSTALLER_IP} \ "fuel node | grep controller | cut -d '|' -f 5|xargs") fi - if [[ -z "$CONTROLLER_IPS" ]]; then - echo "ERROR: Could not get CONTROLLER_IPS." - exit 1 - fi + die_if_not_set $LINENO CONTROLLER_IPS "Could not get CONTROLLER_IPS." for ip in $CONTROLLER_IPS do forward_rule="-R $CONSUMER_PORT:localhost:$CONSUMER_PORT" @@ -436,16 +196,14 @@ start_consumer() { stop_consumer() { pgrep -f "python consumer.py" || return 0 kill $(pgrep -f "python consumer.py") - print_log consumer.log # NOTE(r-mibu): terminate tunnels to the controller nodes - if [[ "$INSTALLER_TYPE" != "local" ]] ; then + if ! is_installer local; then for ip in $CONTROLLER_IPS do forward_rule="-R $CONSUMER_PORT:localhost:$CONSUMER_PORT" tunnel_command="sudo ssh $ssh_opts_cpu $COMPUTE_USER@$ip $forward_rule sleep 600" kill $(pgrep -f "$tunnel_command") - print_log "ssh_tunnel.${ip}.log" done fi } @@ -463,12 +221,13 @@ wait_for_vm_launch() { sleep 5 return 0 fi - [[ "$state" == "ERROR" ]] && echo "vm state is ERROR" && exit 1 + if [[ "$state" == "ERROR" ]]; then + die $LINENO "vm state is ERROR" + fi count=$(($count+1)) sleep 1 done - echo "ERROR: time out while waiting for vm launch" - exit 1 + die $LINENO "Time out while waiting for VM launch" } inject_failure() { @@ -478,6 +237,7 @@ inject_failure() { dev=$(sudo ip a | awk '/ @COMPUTE_IP@\//{print $7}') sleep 1 sudo ip link set $dev down +echo "doctor set host down at" $(date "+%s.%N") sleep 180 sudo ip link set $dev up sleep 1 @@ -488,13 +248,40 @@ END_TXT ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" 'nohup ./disable_network.sh > disable_network.log 2>&1 &' } +profile_performance_poc() { + triggered=$(grep "^doctor set host down at" disable_network.log |\ + sed -e "s/^.* at //") + vmdown=$(grep "doctor mark vm.* error at" inspector.log |tail -n 1 |\ + sed -e "s/^.* at //") + hostdown=$(grep "doctor mark host.* down at" inspector.log |\ + sed -e "s/^.* at //") + + #calculate the relative interval to triggered(T00) + export DOCTOR_PROFILER_T00=0 + export DOCTOR_PROFILER_T01=$(echo "($detected-$triggered)*1000/1" |bc) + export DOCTOR_PROFILER_T03=$(echo "($vmdown-$triggered)*1000/1" |bc) + export DOCTOR_PROFILER_T04=$(echo "($hostdown-$triggered)*1000/1" |bc) + export DOCTOR_PROFILER_T09=$(echo "($notified-$triggered)*1000/1" |bc) + + python profiler-poc.py +} + calculate_notification_time() { - detected=$(grep "doctor monitor detected at" monitor.log | awk '{print $5}') - notified=$(grep "doctor consumer notified at" consumer.log | awk '{print $5}') if ! grep -q "doctor consumer notified at" consumer.log ; then - echo "ERROR: consumer hasn't received fault notification." - exit 1 + die $LINENO "Consumer hasn't received fault notification." + fi + + #keep 'at' as the last keyword just before the value, and + #use regex to get value instead of the fixed column + detected=$(grep "doctor monitor detected at" monitor.log |\ + sed -e "s/^.* at //") + notified=$(grep "doctor consumer notified at" consumer.log |\ + sed -e "s/^.* at //") + + if [[ "$PROFILER_TYPE" == "poc" ]]; then + profile_performance_poc fi + echo "$notified $detected" | \ awk '{ d = $1 - $2; @@ -509,14 +296,11 @@ check_host_status() { host_status_line=$(openstack $as_doctor_user --os-compute-api-version 2.16 \ server show $VM_NAME | grep "host_status") host_status=$(echo $host_status_line | awk '{print $4}') - if [ -z "$host_status" ] ; then - echo "ERROR: host_status not reported by: nova show $VM_NAME" - exit 1 - elif [[ "$expected_state" =~ "$host_status" ]] ; then + die_if_not_set $LINENO host_status "host_status not reported by: nova show $VM_NAME" + if [[ "$expected_state" =~ "$host_status" ]] ; then echo "$VM_NAME showing host_status: $host_status" else - echo "ERROR: host_status:$host_status not equal to expected_state: $expected_state" - exit 1 + die $LINENO "host_status:$host_status not equal to expected_state: $expected_state" fi } @@ -532,7 +316,6 @@ cleanup() { sleep 240 check_host_status "UP" scp $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP:disable_network.log" . - print_log disable_network.log openstack $as_doctor_user server list | grep -q " $VM_NAME " && openstack $as_doctor_user server delete "$VM_NAME" sleep 1 @@ -552,18 +335,21 @@ cleanup() { openstack project delete "$DOCTOR_PROJECT" openstack user delete "$DOCTOR_USER" - restore_test_env + cleanup_installer + cleanup_inspector } +# Main process echo "Note: doctor/tests/run.sh has been executed." trap cleanup EXIT -echo "preparing test env..." -get_installer_ip -prepare_ssh_to_cloud -prepare_test_env +source $TOP_DIR/functions-common +source $TOP_DIR/lib/installer +source $TOP_DIR/lib/inspector + +setup_installer echo "preparing VM image..." download_image |