diff options
-rw-r--r-- | UPSTREAM | 25 | ||||
-rw-r--r-- | docs/development/requirements/06-summary.rst | 24 | ||||
-rw-r--r-- | docs/development/requirements/index.rst | 13 | ||||
-rw-r--r-- | tests/identity_auth.py | 34 | ||||
-rw-r--r-- | tests/inspector.py | 25 | ||||
-rw-r--r-- | tests/lib/inspectors/sample | 4 | ||||
-rw-r--r-- | tests/lib/installers/apex | 4 | ||||
-rw-r--r-- | tests/monitor.py | 23 | ||||
-rw-r--r-- | tests/nova_force_down.py | 60 | ||||
-rwxr-xr-x | tests/run.sh | 104 |
10 files changed, 141 insertions, 175 deletions
@@ -25,15 +25,28 @@ - url: https://blueprints.launchpad.net/congress/+spec/push-type-datasource-driver system: Launchpad -#- -# url: https://review.openstack.org/#/c/314915/ -# system: Gerrit - url: https://blueprints.launchpad.net/cinder/+spec/mark-services-down system: Launchpad - url: https://blueprints.launchpad.net/python-cinderclient/+spec/mark-service-down-cli system: Launchpad -#- -# url: https://bugs.launchpad.net/neutron/+bug/1513144 -# system: Launchpad-bug +- + url: https://bugs.launchpad.net/neutron/+bug/1513144 + system: Launchpad-bug +- + url: https://review.openstack.org/314915 + system: Gerrit + # shortest path +- + url: https://review.openstack.org/424868/ + system: Gerrit + # endswith '/' +- + url: https://review.openstack.org/#/c/430428 + system: Gerrit + # comment path +- + url: https://review.openstack.org/424340 + system: Gerrit + # WIP diff --git a/docs/development/requirements/06-summary.rst b/docs/development/requirements/06-summary.rst deleted file mode 100644 index 61bf3f47..00000000 --- a/docs/development/requirements/06-summary.rst +++ /dev/null @@ -1,24 +0,0 @@ -.. This work is licensed under a Creative Commons Attribution 4.0 International License. -.. http://creativecommons.org/licenses/by/4.0 - -Summary and conclusion -====================== - -The Doctor project aimed at detailing NFVI fault management and NFVI maintenance -requirements. These are indispensable operations for an Operator, and extremely -necessary to realize telco-grade high availability. High availability is a large -topic; the objective of Doctor is not to realize a complete high availability -architecture and implementation. Instead, Doctor limited itself to addressing -the fault events in NFVI, and proposes enhancements necessary in VIM, e.g. -OpenStack, to ensure VNFs availability in such fault events, taking a Telco VNFs -application level management system into account. - -The Doctor project performed a robust analysis of the requirements from NFVI -fault management and NFVI maintenance operation, concretely found out gaps in -between such requirements and the current implementation of OpenStack, and -proposed potential development plans to fill out such gaps in OpenStack. -Blueprints are already under investigation and the next step is to fill out -those gaps in OpenStack by code development in the coming releases. - -.. - vim: set tabstop=4 expandtab textwidth=80: diff --git a/docs/development/requirements/index.rst b/docs/development/requirements/index.rst index fcbfb88e..540f2ac7 100644 --- a/docs/development/requirements/index.rst +++ b/docs/development/requirements/index.rst @@ -26,19 +26,6 @@ Doctor: Fault Management and Maintenance realization for a NFVI fault management and maintenance solution in open source software. -:History: - - ========== ===================================================== - Date Description - ========== ===================================================== - 02.12.2014 Project creation - 14.04.2015 Initial version of the deliverable uploaded to Gerrit - 18.05.2015 Stable version of the Doctor deliverable - 25.02.2016 Updated version for the Brahmaputra release - 26.09.2016 Updated version for the Colorado release - xx.xx.2017 Updated version for the Danube release - ========== ===================================================== - .. raw:: latex \newpage diff --git a/tests/identity_auth.py b/tests/identity_auth.py new file mode 100644 index 00000000..4726ca37 --- /dev/null +++ b/tests/identity_auth.py @@ -0,0 +1,34 @@ +############################################################################## +# Copyright (c) 2017 NEC Corporation and others. +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## + +import os + +from keystoneauth1.identity import v2 +from keystoneauth1.identity import v3 + + +def get_identity_auth(): + auth_url = os.environ['OS_AUTH_URL'] + username = os.environ['OS_USERNAME'] + password = os.environ['OS_PASSWORD'] + user_domain_name = os.environ.get('OS_USER_DOMAIN_NAME') + project_name = os.environ.get('OS_PROJECT_NAME') or os.environ.get('OS_TENANT_NAME') + project_domain_name = os.environ.get('OS_PROJECT_DOMAIN_NAME') + if auth_url.endswith('v3'): + return v3.Password(auth_url=auth_url, + username=username, + password=password, + user_domain_name=user_domain_name, + project_name=project_name, + project_domain_name=project_domain_name) + else: + return v2.Password(auth_url=auth_url, + username=username, + password=password, + tenant_name=project_name) diff --git a/tests/inspector.py b/tests/inspector.py index 7195969a..ba00f40e 100644 --- a/tests/inspector.py +++ b/tests/inspector.py @@ -17,9 +17,10 @@ import os import threading import time +from keystoneauth1 import session import novaclient.client as novaclient -import nova_force_down +import identity_auth LOG = doctor_log.Logger('doctor_inspector').getLogger() @@ -39,7 +40,7 @@ class ThreadedResetState(threading.Thread): class DoctorInspectorSample(object): - NOVA_API_VERSION = '2.11' + NOVA_API_VERSION = '2.34' NUMBER_OF_CLIENTS = 50 # TODO(tojuvone): This could be enhanced in future with dynamic # reuse of self.novaclients when all threads in use and @@ -49,14 +50,13 @@ class DoctorInspectorSample(object): def __init__(self): self.servers = collections.defaultdict(list) self.novaclients = list() + auth=identity_auth.get_identity_auth() + sess=session.Session(auth=auth) # Pool of novaclients for redundant usage for i in range(self.NUMBER_OF_CLIENTS): - self.novaclients.append(novaclient.Client(self.NOVA_API_VERSION, - os.environ['OS_USERNAME'], - os.environ['OS_PASSWORD'], - os.environ['OS_TENANT_NAME'], - os.environ['OS_AUTH_URL'], - connection_pool=True)) + self.novaclients.append( + novaclient.Client(self.NOVA_API_VERSION, session=sess, + connection_pool=True)) # Normally we use this client for non redundant API calls self.nova=self.novaclients[0] self.nova.servers.list(detailed=False) @@ -87,14 +87,7 @@ class DoctorInspectorSample(object): threads.append(t) for t in threads: t.join() - # NOTE: We use our own client here instead of this novaclient for a - # workaround. Once keystone provides v2.1 nova api endpoint - # in the service catalog which is configured by OpenStack - # installer, we can use this: - # - # self.nova.services.force_down(hostname, 'nova-compute', True) - # - nova_force_down.force_down(hostname) + self.nova.services.force_down(hostname, 'nova-compute', True) LOG.info('doctor mark host(%s) down at %s' % (hostname, time.time())) diff --git a/tests/lib/inspectors/sample b/tests/lib/inspectors/sample index d09f7ad0..75d5c022 100644 --- a/tests/lib/inspectors/sample +++ b/tests/lib/inspectors/sample @@ -1,9 +1,9 @@ #!/bin/bash +INSPECTOR_PORT=${INSPECTOR_PORT:-12345} + function start_inspector_sample { pgrep -f "python inspector.py" && return 0 - # TODO (r-mibu): make sample inspector use keystone v3 api - OS_AUTH_URL=${OS_AUTH_URL/v3/v2.0} \ python inspector.py "$INSPECTOR_PORT" > inspector.log 2>&1 & } diff --git a/tests/lib/installers/apex b/tests/lib/installers/apex index e353d251..d9c41f80 100644 --- a/tests/lib/installers/apex +++ b/tests/lib/installers/apex @@ -28,6 +28,10 @@ function setup_installer { get_installer_ip installer_get_ssh_keys get_controller_ips + + # NOTE: while executing command as doctor user, + # 'OS_PROJECT_ID' env parameter make openstack clients confused. + unset OS_PROJECT_ID } function get_compute_ip_from_hostname { diff --git a/tests/monitor.py b/tests/monitor.py index 75d82392..8e8aa7a5 100644 --- a/tests/monitor.py +++ b/tests/monitor.py @@ -18,8 +18,8 @@ import sys import time from congressclient.v1 import client -from keystoneclient import session as ksc_session -from keystoneclient.auth.identity import v2 + +import identity_auth # NOTE: icmp message with all zero data (checksum = 0xf7ff) # see https://tools.ietf.org/html/rfc792 @@ -29,6 +29,7 @@ SUPPORTED_INSPECTOR_TYPES = ['sample', 'congress'] LOG = doctor_log.Logger('doctor_monitor').getLogger() + class DoctorMonitorSample(object): interval = 0.1 # second @@ -40,18 +41,15 @@ class DoctorMonitorSample(object): raise Exception("Inspector type '%s' not supported", args.inspector_type) self.hostname = args.hostname - self.inspector_url = args.inspector_url self.inspector_type = args.inspector_type self.ip_addr = args.ip or socket.gethostbyname(self.hostname) - if self.inspector_type == 'congress': - auth = v2.Password(auth_url=os.environ['OS_AUTH_URL'], - username=os.environ['OS_USERNAME'], - password=os.environ['OS_PASSWORD'], - tenant_name=os.environ['OS_TENANT_NAME']) - self.session = ksc_session.Session(auth=auth) - - congress = client.Client(session=self.session, service_type='policy') + if self.inspector_type == 'sample': + self.inspector_url = 'http://127.0.0.1:12345/events' + elif self.inspector_type == 'congress': + auth=identity_auth.get_identity_auth() + sess=session.Session(auth=auth) + congress = client.Client(session=sess, service_type='policy') ds = congress.list_datasources()['results'] doctor_ds = next((item for item in ds if item['driver'] == 'doctor'), None) @@ -114,9 +112,6 @@ def get_args(): parser.add_argument('inspector_type', metavar='INSPECTOR_TYPE', type=str, nargs='?', help='inspector to report', default='sample') - parser.add_argument('inspector_url', metavar='INSPECTOR_URL', type=str, nargs='?', - help='inspector url to report error', - default='http://127.0.0.1:12345/events') return parser.parse_args() diff --git a/tests/nova_force_down.py b/tests/nova_force_down.py deleted file mode 100644 index abea5671..00000000 --- a/tests/nova_force_down.py +++ /dev/null @@ -1,60 +0,0 @@ -############################################################################## -# Copyright (c) 2016 NEC Corporation and others. -# -# All rights reserved. This program and the accompanying materials -# are made available under the terms of the Apache License, Version 2.0 -# which accompanies this distribution, and is available at -# http://www.apache.org/licenses/LICENSE-2.0 -############################################################################## - -import argparse -import json -import os - -from keystoneclient.v2_0 import client -import requests - - -def force_down(hostname, force_down=True): - keystone = client.Client(username=os.environ['OS_USERNAME'], - password=os.environ['OS_PASSWORD'], - tenant_name=os.environ['OS_TENANT_NAME'], - auth_url=os.environ['OS_AUTH_URL']) - - for service in keystone.auth_ref['serviceCatalog']: - if service['type'] == 'compute': - base_url = service['endpoints'][0]['internalURL'] - break - - url = '%s/os-services/force-down' % base_url.replace('/v2/', '/v2.1/') - data = { - 'forced_down': force_down, - 'binary': 'nova-compute', - 'host': hostname, - } - headers = { - 'Content-Type': 'application/json', - 'Accept': 'application/json', - 'X-Auth-Token': keystone.auth_ref['token']['id'], - 'X-OpenStack-Nova-API-Version': '2.11', - } - - print requests.put(url, data=json.dumps(data), headers=headers) - - -def get_args(): - parser = argparse.ArgumentParser(description='Doctor Test Cleaner') - parser.add_argument('hostname', metavar='HOSTNAME', type=str, nargs='?', - help='a nova-compute hostname to force down') - parser.add_argument('--unset', action='store_true', default=False, - help='unset force_down flag') - return parser.parse_args() - - -def main(): - args = get_args() - force_down(args.hostname, not(args.unset)) - - -if __name__ == '__main__': - main() diff --git a/tests/run.sh b/tests/run.sh index 855ecfc6..d97a5c9c 100755 --- a/tests/run.sh +++ b/tests/run.sh @@ -21,8 +21,9 @@ VM_BASENAME=doctor_vm VM_FLAVOR=m1.tiny #if VM_COUNT set, use that instead VM_COUNT=${VM_COUNT:-1} +NET_NAME=doctor_net +NET_CIDR=192.168.168.0/24 ALARM_BASENAME=doctor_alarm -INSPECTOR_PORT=12345 CONSUMER_PORT=12346 DOCTOR_USER=doctor DOCTOR_PW=doctor @@ -34,7 +35,10 @@ PROFILER_TYPE=${PROFILER_TYPE:-none} TOP_DIR=$(cd $(dirname "$0") && pwd) as_doctor_user="--os-username $DOCTOR_USER --os-password $DOCTOR_PW - --os-tenant-name $DOCTOR_PROJECT" + --os-project-name $DOCTOR_PROJECT --os-tenant-name $DOCTOR_PROJECT" +# NOTE: ceilometer command still requires '--os-tenant-name'. +#ceilometer="ceilometer ${as_doctor_user/--os-project-name/--os-tenant-name}" +ceilometer="ceilometer $as_doctor_user" # Functions @@ -110,17 +114,17 @@ create_test_user() { openstack user create "$DOCTOR_USER" --password "$DOCTOR_PW" \ --project "$DOCTOR_PROJECT" } - openstack user role list "$DOCTOR_USER" --project "$DOCTOR_PROJECT" \ - | grep -q " $DOCTOR_ROLE " || { - openstack role add "$DOCTOR_ROLE" --user "$DOCTOR_USER" \ - --project "$DOCTOR_PROJECT" + openstack role show "$DOCTOR_ROLE" || { + openstack role create "$DOCTOR_ROLE" } + openstack role add "$DOCTOR_ROLE" --user "$DOCTOR_USER" \ + --project "$DOCTOR_PROJECT" # tojuvone: openstack quota show is broken and have to use nova # https://bugs.launchpad.net/manila/+bug/1652118 # Note! while it is encouraged to use openstack client it has proven # quite buggy. # QUOTA=$(openstack quota show $DOCTOR_PROJECT) - DOCTOR_QUOTA=$(nova quota-show --tenant DOCTOR_PROJECT) + DOCTOR_QUOTA=$(nova quota-show --tenant $DOCTOR_PROJECT) # We make sure that quota allows number of instances and cores OLD_INSTANCE_QUOTA=$(echo "${DOCTOR_QUOTA}" | grep " instances " | \ awk '{print $4}') @@ -138,26 +142,35 @@ create_test_user() { boot_vm() { # test VM done with test user, so can test non-admin + + if ! openstack $as_doctor_user network show $NET_NAME; then + openstack $as_doctor_user network create $NET_NAME + fi + if ! openstack $as_doctor_user subnet show $NET_NAME; then + openstack $as_doctor_user subnet create $NET_NAME \ + --network $NET_NAME --subnet-range $NET_CIDR --no-dhcp + fi + net_id=$(openstack $as_doctor_user network show $NET_NAME -f value -c id) + servers=$(openstack $as_doctor_user server list) for i in `seq $VM_COUNT`; do echo "${servers}" | grep -q " $VM_BASENAME$i " && continue openstack $as_doctor_user server create --flavor "$VM_FLAVOR" \ - --image "$IMAGE_NAME" \ - "$VM_BASENAME$i" + --image "$IMAGE_NAME" --nic net-id=$net_id "$VM_BASENAME$i" done sleep 1 } create_alarm() { # get vm_id as test user - alarm_list=$(ceilometer $as_doctor_user alarm-list) + alarm_list=$($ceilometer alarm-list) vms=$(openstack $as_doctor_user server list) for i in `seq $VM_COUNT`; do echo "${alarm_list}" | grep -q " $ALARM_BASENAME$i " || { vm_id=$(echo "${vms}" | grep " $VM_BASENAME$i " | awk '{print $2}') # TODO(r-mibu): change notification endpoint from localhost to the # consumer. IP address (functest container). - ceilometer $as_doctor_user alarm-event-create \ + $ceilometer alarm-event-create \ --name "$ALARM_BASENAME$i" \ --alarm-action "http://localhost:$CONSUMER_PORT/failure" \ --description "VM failure" \ @@ -174,7 +187,7 @@ create_alarm() { start_monitor() { pgrep -f "python monitor.py" && return 0 sudo -E python monitor.py "$COMPUTE_HOST" "$COMPUTE_IP" "$INSPECTOR_TYPE" \ - "http://127.0.0.1:$INSPECTOR_PORT/events" > monitor.log 2>&1 & + > monitor.log 2>&1 & } stop_monitor() { @@ -300,19 +313,6 @@ calculate_notification_time() { }' } -wait_ping() { - local interval=5 - local rounds=$(($1 / $interval)) - for i in `seq $rounds`; do - ping -c 1 "$COMPUTE_IP" - if [[ $? -ne 0 ]] ; then - sleep $interval - continue - fi - return 0 - done -} - check_host_status() { # Check host related to first Doctor VM is in wanted state # $1 Expected state @@ -340,25 +340,36 @@ check_host_status() { } unset_forced_down_hosts() { - for host in $(openstack compute service list --service nova-compute \ - -f value -c Host -c State | sed -n -e '/down$/s/ *down$//p') + # for debug + openstack compute service list --service nova-compute + + downed_computes=$(openstack compute service list --service nova-compute \ + -f value -c Host -c State | grep ' down$' \ + | sed -e 's/ *down$//') + echo "downed_computes: $downed_computes" + for host in $downed_computes do - # TODO (r-mibu): make sample inspector use keystone v3 api - OS_AUTH_URL=${OS_AUTH_URL/v3/v2.0} \ - python ./nova_force_down.py $host --unset + # TODO(r-mibu): use openstack client + #openstack compute service set --up $host nova-compute + nova service-force-down --unset $host nova-compute done echo "waiting disabled compute host back to be enabled..." wait_until 'openstack compute service list --service nova-compute -f value -c State | grep -q down' 240 5 + + for host in $downed_computes + do + # TODO(r-mibu): improve 'get_compute_ip_from_hostname' + get_compute_ip_from_hostname $host + wait_until "! ping -c 1 $COMPUTE_IP" 120 5 + done } collect_logs() { - unset_forced_down_hosts - # TODO: We need to make sure the target compute host is back to IP - # reachable. wait_ping() will be added by tojuvone . - sleep 110 - scp $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP:disable_network.log" . + if [[ -n "$COMPUTE_IP" ]];then + scp $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP:disable_network.log" . + fi # TODO(yujunz) collect other logs, e.g. nova, aodh } @@ -398,10 +409,8 @@ cleanup() { stop_consumer unset_forced_down_hosts + collect_logs - wait_ping 120 - - scp $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP:disable_network.log" . vms=$(openstack $as_doctor_user server list) vmstodel="" for i in `seq $VM_COUNT`; do @@ -409,12 +418,15 @@ cleanup() { vmstodel+=" $VM_BASENAME$i" done [[ $vmstodel ]] && openstack $as_doctor_user server delete $vmstodel - alarm_list=$(ceilometer $as_doctor_user alarm-list) + alarm_list=$($ceilometer alarm-list) for i in `seq $VM_COUNT`; do alarm_id=$(echo "${alarm_list}" | grep " $ALARM_BASENAME$i " | awk '{print $2}') - [ -n "$alarm_id" ] && ceilometer $as_doctor_user alarm-delete "$alarm_id" + [ -n "$alarm_id" ] && $ceilometer alarm-delete "$alarm_id" done + openstack $as_doctor_user subnet delete $NET_NAME + sleep 1 + openstack $as_doctor_user network delete $NET_NAME sleep 1 image_id=$(openstack image list | grep " $IMAGE_NAME " | awk '{print $2}') @@ -427,9 +439,20 @@ cleanup() { --project "$DOCTOR_PROJECT" openstack project delete "$DOCTOR_PROJECT" openstack user delete "$DOCTOR_USER" + # NOTE: remove role only for doctor test. + #openstack role delete "$DOCTOR_ROLE" cleanup_installer cleanup_inspector + + # NOTE: Temporal log printer. + for f in $(find . -name '*.log') + do + echo + echo "[$f]" + sed -e 's/^/ | /' $f + echo + done } # Main process @@ -472,6 +495,7 @@ inject_failure check_host_status "(DOWN|UNKNOWN)" 60 calculate_notification_time +unset_forced_down_hosts collect_logs run_profiler |