diff options
Diffstat (limited to 'doctor_tests')
-rw-r--r-- | doctor_tests/admin_tool/fenix/Dockerfile | 27 | ||||
-rwxr-xr-x | doctor_tests/admin_tool/fenix/run | 6 | ||||
-rw-r--r-- | doctor_tests/app_manager/__init__.py | 8 | ||||
-rw-r--r-- | doctor_tests/app_manager/sample.py | 21 | ||||
-rw-r--r-- | doctor_tests/app_manager/vnfm.py | 441 | ||||
-rw-r--r-- | doctor_tests/common/utils.py | 22 | ||||
-rw-r--r-- | doctor_tests/image.py | 13 | ||||
-rw-r--r-- | doctor_tests/inspector/sample.py | 6 | ||||
-rw-r--r-- | doctor_tests/installer/__init__.py | 8 | ||||
-rw-r--r-- | doctor_tests/installer/base.py | 54 | ||||
-rw-r--r-- | doctor_tests/installer/common/set_compute_config.py | 4 | ||||
-rw-r--r-- | doctor_tests/installer/common/set_fenix.sh | 88 | ||||
-rw-r--r-- | doctor_tests/installer/devstack.py | 151 | ||||
-rw-r--r-- | doctor_tests/installer/local.py | 118 | ||||
-rw-r--r-- | doctor_tests/installer/mcp.py | 170 | ||||
-rw-r--r-- | doctor_tests/main.py | 86 | ||||
-rw-r--r-- | doctor_tests/scenario/fault_management.py | 5 | ||||
-rw-r--r-- | doctor_tests/scenario/maintenance.py | 59 | ||||
-rw-r--r-- | doctor_tests/user.py | 1 |
19 files changed, 991 insertions, 297 deletions
diff --git a/doctor_tests/admin_tool/fenix/Dockerfile b/doctor_tests/admin_tool/fenix/Dockerfile index 90039b0d..202380eb 100644 --- a/doctor_tests/admin_tool/fenix/Dockerfile +++ b/doctor_tests/admin_tool/fenix/Dockerfile @@ -1,4 +1,4 @@ -FROM gliderlabs/alpine:3.5 +FROM gliderlabs/alpine:3.6 ARG BRANCH=master ARG OPENSTACK=master @@ -11,23 +11,24 @@ RUN echo "Building Fenix container against OpenStack $OPENSTACK" && \ mkdir -p /var/tmp/fenix WORKDIR /var/tmp/fenix COPY fenix*.conf /etc/fenix/ + RUN apk --no-cache add ca-certificates && \ apk --no-cache add --update python3 sshpass py-pip git curl && \ apk --no-cache add --virtual .build-deps --update \ - python-dev python3-dev build-base linux-headers libffi-dev \ + python3-dev build-base linux-headers libffi-dev \ openssl-dev libjpeg-turbo-dev && \ - curl https://git.openstack.org/cgit/openstack/requirements/plain/upper-constraints.txt?h=$OPENSTACK > upper-constraints.txt && \ - pip install --upgrade pip && \ - pip install alembic aodhclient ast decorator \ - eventlet flask Flask-RESTful importlib \ - keystoneauth1 logging python-novaclient oslo.config oslo.db \ - oslo.log oslo.messaging oslo.serialization oslo.service \ - oslotest oslo.utils pbr pymysql setuptools six sqlalchemy \ - wsgiref -cupper-constraints.txt && \ - git clone https://git.openstack.org/openstack/fenix -b $BRANCH /fenix && \ + curl https://opendev.org/openstack/requirements/raw/branch/$OPENSTACK/upper-constraints.txt > upper-constraints.txt && \ + if [ ! -e /usr/bin/pip ]; then ln -s pip3 /usr/bin/pip ; fi && \ + if [[ ! -e /usr/bin/python ]]; then ln -sf /usr/bin/python3 /usr/bin/python; fi && \ + pip3 install --upgrade pip && \ + pip3 install alembic aodhclient decorator flask Flask-RESTful eventlet jsonschema \ + keystoneauth1 keystonemiddleware python-novaclient oslo.config pecan \ + oslo.db oslo.log oslo.messaging oslo.serialization oslo.service oslo_policy \ + oslotest oslo.utils pbr pymysql six sqlalchemy -cupper-constraints.txt && \ + git clone https://opendev.org/x/fenix -b $BRANCH /fenix && \ rm -fr /var/tmp/fenix COPY run /fenix -COPY overcloudrc /fenix +COPY keystonercv3 /fenix WORKDIR /fenix -RUN python setup.py install +RUN python3 setup.py install CMD ./run diff --git a/doctor_tests/admin_tool/fenix/run b/doctor_tests/admin_tool/fenix/run index 2a2e37cd..50ae68e7 100755 --- a/doctor_tests/admin_tool/fenix/run +++ b/doctor_tests/admin_tool/fenix/run @@ -1,8 +1,8 @@ #!/bin/sh -. overcloudrc +. keystonercv3 # Start the first process -nohup python /fenix/fenix/cmd/engine.py > /var/log/fenix-engine.log& +nohup python3 /fenix/fenix/cmd/engine.py > /var/log/fenix-engine.log& status=$? if [ $status -ne 0 ]; then echo "Failed to start engine.py: $status" @@ -10,7 +10,7 @@ if [ $status -ne 0 ]; then fi # Start the second process -nohup python /fenix/fenix/cmd/api.py > /var/log/fenix-api.log& +nohup python3 /fenix/fenix/cmd/api.py > /var/log/fenix-api.log& status=$? if [ $status -ne 0 ]; then echo "Failed to start api.py: $status" diff --git a/doctor_tests/app_manager/__init__.py b/doctor_tests/app_manager/__init__.py index 717d6587..c2f75918 100644 --- a/doctor_tests/app_manager/__init__.py +++ b/doctor_tests/app_manager/__init__.py @@ -8,12 +8,13 @@ ############################################################################## from oslo_config import cfg from oslo_utils import importutils +import os OPTS = [ cfg.StrOpt('type', - default='sample', - choices=['sample'], + default=os.environ.get('APP_MANAGER_TYPE', 'sample'), + choices=['sample', 'vnfm'], help='the component of doctor app manager', required=True), cfg.StrOpt('ip', @@ -28,7 +29,8 @@ OPTS = [ _app_manager_name_class_mapping = { - 'sample': 'doctor_tests.app_manager.sample.SampleAppManager' + 'sample': 'doctor_tests.app_manager.sample.SampleAppManager', + 'vnfm': 'doctor_tests.app_manager.vnfm.VNFM', } diff --git a/doctor_tests/app_manager/sample.py b/doctor_tests/app_manager/sample.py index a7bc4126..7ca35b97 100644 --- a/doctor_tests/app_manager/sample.py +++ b/doctor_tests/app_manager/sample.py @@ -17,6 +17,7 @@ import requests from doctor_tests.app_manager.base import BaseAppManager from doctor_tests.identity_auth import get_identity_auth from doctor_tests.identity_auth import get_session +from doctor_tests.os_clients import neutron_client from doctor_tests.os_clients import nova_client @@ -56,12 +57,16 @@ class AppManager(Thread): self.app_manager = app_manager self.log = log self.intance_ids = None + self.auth = get_identity_auth(project=self.conf.doctor_project) + self.session = get_session(auth=self.auth) + self.nova = nova_client(self.conf.nova_version, + self.session) + self.neutron = neutron_client(session=self.session) self.headers = { 'Content-Type': 'application/json', 'Accept': 'application/json'} - self.auth = get_identity_auth(project=self.conf.doctor_project) - self.nova = nova_client(self.conf.nova_version, - get_session(auth=self.auth)) + if self.conf.admin_tool.type == 'fenix': + self.headers['X-Auth-Token'] = self.session.get_token() self.orig_number_of_instances = self.number_of_instances() self.ha_instances = self.get_ha_instances() self.floating_ip = None @@ -85,7 +90,13 @@ class AppManager(Thread): if instance.id != self.active_instance_id: self.log.info('Switch over to: %s %s' % (instance.name, instance.id)) - instance.add_floating_ip(self.floating_ip) + # Deprecated, need to use neutron instead + # instance.add_floating_ip(self.floating_ip) + port = self.neutron.list_ports(device_id=instance.id)['ports'][0]['id'] # noqa + floating_id = self.neutron.list_floatingips(floating_ip_address=self.floating_ip)['floatingips'][0]['id'] # noqa + self.neutron.update_floatingip(floating_id, {'floatingip': {'port_id': port}}) # noqa + # Have to update ha_instances as floating_ip changed + self.ha_instances = self.get_ha_instances() self.active_instance_id = instance.id break @@ -154,7 +165,7 @@ class AppManager(Thread): data = json.loads(request.data.decode('utf8')) try: payload = self._alarm_traits_decoder(data) - except: + except Exception: payload = ({t[0]: t[2] for t in data['reason_data']['event']['traits']}) self.log.error('cannot parse alarm data: %s' % payload) diff --git a/doctor_tests/app_manager/vnfm.py b/doctor_tests/app_manager/vnfm.py new file mode 100644 index 00000000..68fdbb88 --- /dev/null +++ b/doctor_tests/app_manager/vnfm.py @@ -0,0 +1,441 @@ +############################################################################## +# Copyright (c) 2018 Nokia Corporation and others. +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## +from flask import Flask +from flask import request +import json +import requests +from threading import Thread +import time +import uuid +import yaml + +from doctor_tests.app_manager.base import BaseAppManager +from doctor_tests.identity_auth import get_identity_auth +from doctor_tests.identity_auth import get_session +from doctor_tests.os_clients import neutron_client +from doctor_tests.os_clients import nova_client +from doctor_tests.os_clients import keystone_client + + +class VNFM(BaseAppManager): + + def __init__(self, stack, conf, log): + super(VNFM, self).__init__(conf, log) + self.stack = stack + self.app = None + + def start(self): + self.log.info('VNFM start......') + self.app = VNFManager(self.stack, self.conf, self, self.log) + self.app.start() + + def stop(self): + self.log.info('VNFM stop......') + if not self.app: + return + self.app.delete_constraints() + headers = { + 'Content-Type': 'application/json', + 'Accept': 'application/json', + } + url = 'http://%s:%d/shutdown'\ + % (self.conf.app_manager.ip, + self.conf.app_manager.port) + requests.post(url, data='', headers=headers) + + +class VNFManager(Thread): + + def __init__(self, stack, conf, app_manager, log): + Thread.__init__(self) + self.stack = stack + self.conf = conf + self.port = self.conf.app_manager.port + self.app_manager = app_manager + self.log = log + self.intance_ids = None + self.auth = get_identity_auth(project=self.conf.doctor_project) + self.session = get_session(auth=self.auth) + self.keystone = keystone_client( + self.conf.keystone_version, self.session) + self.nova = nova_client(self.conf.nova_version, + self.session) + self.neutron = neutron_client(session=self.session) + self.headers = { + 'Content-Type': 'application/json', + 'Accept': 'application/json'} + if self.conf.admin_tool.type == 'fenix': + self.headers['X-Auth-Token'] = self.session.get_token() + self.orig_number_of_instances = self.number_of_instances() + # List of instances + self.ha_instances = [] + self.nonha_instances = [] + # Different instance_id specific constraints {instanse_id: {},...} + self.instance_constraints = None + # Update existing instances to instance lists + self.update_instances() + nonha_instances = len(self.nonha_instances) + if nonha_instances < 7: + self.scale = 2 + self.max_impacted = 2 + else: + self.scale = int((nonha_instances) / 2) + self.max_impacted = self.scale - 1 + self.log.info('Init nonha_instances: %s scale: %s: max_impacted %s' % + (nonha_instances, self.scale, self.max_impacted)) + # Different instance groups constraints dict + self.ha_group = None + self.nonha_group = None + # Floating IP used in HA instance + self.floating_ip = None + # VNF project_id + self.project_id = None + # HA instance_id that is active / has floating IP + self.active_instance_id = self.active_instance_id() + + services = self.keystone.services.list() + for service in services: + if service.type == 'maintenance': + self.log.info('maintenance service: %s:%s type %s' + % (service.name, service.id, service.type)) + maint_id = service.id + self.maint_endpoint = [ep.url for ep in self.keystone.endpoints.list() + if ep.service_id == maint_id and + ep.interface == 'public'][0] + self.log.info('maintenance endpoint: %s' % self.maint_endpoint) + self.update_constraints_lock = False + self.update_constraints() + + def delete_remote_instance_constraints(self, instance_id): + url = "%s/instance/%s" % (self.maint_endpoint, instance_id) + self.log.info('DELETE: %s' % url) + ret = requests.delete(url, data=None, headers=self.headers) + if ret.status_code != 200 and ret.status_code != 204: + raise Exception(ret.text) + + def update_remote_instance_constraints(self, instance): + url = "%s/instance/%s" % (self.maint_endpoint, instance["instance_id"]) + self.log.info('PUT: %s' % url) + ret = requests.put(url, data=json.dumps(instance), + headers=self.headers) + if ret.status_code != 200 and ret.status_code != 204: + raise Exception(ret.text) + + def delete_remote_group_constraints(self, instance_group): + url = "%s/instance_group/%s" % (self.maint_endpoint, + instance_group["group_id"]) + self.log.info('DELETE: %s' % url) + ret = requests.delete(url, data=None, headers=self.headers) + if ret.status_code != 200 and ret.status_code != 204: + raise Exception(ret.text) + + def update_remote_group_constraints(self, instance_group): + url = "%s/instance_group/%s" % (self.maint_endpoint, + instance_group["group_id"]) + self.log.info('PUT: %s' % url) + ret = requests.put(url, data=json.dumps(instance_group), + headers=self.headers) + if ret.status_code != 200 and ret.status_code != 204: + raise Exception(ret.text) + + def delete_constraints(self): + if self.conf.admin_tool.type == 'fenix': + self.headers['X-Auth-Token'] = self.session.get_token() + for instance_id in self.instance_constraints: + self.delete_remote_instance_constraints(instance_id) + self.delete_remote_group_constraints(self.nonha_group) + self.delete_remote_group_constraints(self.ha_group) + + def update_constraints(self): + while self.update_constraints_lock: + self.log.info('Waiting update_constraints_lock...') + time.sleep(1) + self.update_constraints_lock = True + self.log.info('Update constraints') + if self.project_id is None: + self.project_id = self.keystone.projects.list( + name=self.conf.doctor_project)[0].id + if self.nonha_group is None: + # Nova does not support groupping instances that do not belong to + # anti-affinity server_groups. Anyhow all instances need groupping + self.nonha_group = { + "group_id": str(uuid.uuid4()), + "project_id": self.project_id, + "group_name": "doctor_nonha_app_group", + "anti_affinity_group": False, + "max_instances_per_host": 0, + "max_impacted_members": self.max_impacted, + "recovery_time": 2, + "resource_mitigation": True} + self.log.info('create doctor_nonha_app_group constraints: %s' + % self.nonha_group) + self.update_remote_group_constraints(self.nonha_group) + if self.ha_group is None: + group_id = [sg.id for sg in self.nova.server_groups.list() + if sg.name == "doctor_ha_app_group"][0] + self.ha_group = { + "group_id": group_id, + "project_id": self.project_id, + "group_name": "doctor_ha_app_group", + "anti_affinity_group": True, + "max_instances_per_host": 1, + "max_impacted_members": 1, + "recovery_time": 4, + "resource_mitigation": True} + self.log.info('create doctor_ha_app_group constraints: %s' + % self.ha_group) + self.update_remote_group_constraints(self.ha_group) + instance_constraints = {} + for ha_instance in self.ha_instances: + instance = { + "instance_id": ha_instance.id, + "project_id": self.project_id, + "group_id": self.ha_group["group_id"], + "instance_name": ha_instance.name, + "max_interruption_time": 120, + "migration_type": "MIGRATE", + "resource_mitigation": True, + "lead_time": 40} + self.log.info('create ha instance constraints: %s' + % instance) + instance_constraints[ha_instance.id] = instance + for nonha_instance in self.nonha_instances: + instance = { + "instance_id": nonha_instance.id, + "project_id": self.project_id, + "group_id": self.nonha_group["group_id"], + "instance_name": nonha_instance.name, + "max_interruption_time": 120, + "migration_type": "MIGRATE", + "resource_mitigation": True, + "lead_time": 40} + self.log.info('create nonha instance constraints: %s' + % instance) + instance_constraints[nonha_instance.id] = instance + if not self.instance_constraints: + # Initial instance constraints + self.log.info('create initial instances constraints...') + for instance in [instance_constraints[i] for i + in instance_constraints]: + self.update_remote_instance_constraints(instance) + self.instance_constraints = instance_constraints.copy() + else: + self.log.info('check instances constraints changes...') + added = [i for i in instance_constraints.keys() + if i not in self.instance_constraints] + deleted = [i for i in self.instance_constraints.keys() + if i not in instance_constraints] + modified = [i for i in instance_constraints.keys() + if (i not in added and i not in deleted and + instance_constraints[i] != + self.instance_constraints[i])] + for instance_id in deleted: + self.delete_remote_instance_constraints(instance_id) + updated = added + modified + for instance in [instance_constraints[i] for i in updated]: + self.update_remote_instance_constraints(instance) + if updated or deleted: + # Some instance constraints have changed + self.instance_constraints = instance_constraints.copy() + self.update_constraints_lock = False + + def active_instance_id(self): + # Need rertry as it takes time after heat template done before + # Floating IP in place + retry = 5 + while retry > 0: + for instance in self.ha_instances: + network_interfaces = next(iter(instance.addresses.values())) + for network_interface in network_interfaces: + _type = network_interface.get('OS-EXT-IPS:type') + if _type == "floating": + if not self.floating_ip: + self.floating_ip = network_interface.get('addr') + self.log.debug('active_instance: %s %s' % + (instance.name, instance.id)) + return instance.id + time.sleep(2) + self.update_instances() + retry -= 1 + raise Exception("No active instance found") + + def switch_over_ha_instance(self): + for instance in self.ha_instances: + if instance.id != self.active_instance_id: + self.log.info('Switch over to: %s %s' % (instance.name, + instance.id)) + # Deprecated, need to use neutron instead + # instance.add_floating_ip(self.floating_ip) + port = self.neutron.list_ports(device_id=instance.id)['ports'][0]['id'] # noqa + floating_id = self.neutron.list_floatingips(floating_ip_address=self.floating_ip)['floatingips'][0]['id'] # noqa + self.neutron.update_floatingip(floating_id, {'floatingip': {'port_id': port}}) # noqa + # Have to update ha_instances as floating_ip changed + self.update_instances() + self.active_instance_id = instance.id + break + + def get_instance_ids(self): + ret = list() + for instance in self.nova.servers.list(detailed=False): + ret.append(instance.id) + return ret + + def update_instances(self): + instances = self.nova.servers.list(detailed=True) + self.ha_instances = [i for i in instances + if "doctor_ha_app_" in i.name] + self.nonha_instances = [i for i in instances + if "doctor_nonha_app_" in i.name] + + def _alarm_data_decoder(self, data): + if "[" in data or "{" in data: + # string to list or dict removing unicode + data = yaml.load(data.replace("u'", "'")) + return data + + def _alarm_traits_decoder(self, data): + return ({str(t[0]): self._alarm_data_decoder(str(t[2])) + for t in data['reason_data']['event']['traits']}) + + def get_session_instance_ids(self, url, session_id): + ret = requests.get(url, data=None, headers=self.headers) + if ret.status_code != 200: + raise Exception(ret.text) + self.log.info('get_instance_ids %s' % ret.json()) + return ret.json()['instance_ids'] + + def scale_instances(self, number_of_instances): + number_of_instances_before = self.number_of_instances() + + parameters = self.stack.parameters + parameters['nonha_intances'] += number_of_instances + self.stack.update(self.stack.stack_name, + self.stack.stack_id, + self.stack.template, + parameters=parameters, + files=self.stack.files) + + number_of_instances_after = self.number_of_instances() + if (number_of_instances_before + number_of_instances != + number_of_instances_after): + self.log.error('scale_instances with: %d from: %d ends up to: %d' + % (number_of_instances, number_of_instances_before, + number_of_instances_after)) + raise Exception('scale_instances failed') + + self.log.info('scaled instances from %d to %d' % + (number_of_instances_before, + number_of_instances_after)) + + def number_of_instances(self): + return len(self.nova.servers.list(detailed=False)) + + def run(self): + app = Flask('VNFM') + + @app.route('/maintenance', methods=['POST']) + def maintenance_alarm(): + data = json.loads(request.data.decode('utf8')) + try: + payload = self._alarm_traits_decoder(data) + except Exception: + payload = ({t[0]: t[2] for t in + data['reason_data']['event']['traits']}) + self.log.error('cannot parse alarm data: %s' % payload) + raise Exception('VNFM cannot parse alarm.' + 'Possibly trait data over 256 char') + + self.log.info('VNFM received data = %s' % payload) + + state = payload['state'] + reply_state = None + reply = dict() + + self.log.info('VNFM state: %s' % state) + + if state == 'MAINTENANCE': + instance_ids = (self.get_session_instance_ids( + payload['instance_ids'], + payload['session_id'])) + my_instance_ids = self.get_instance_ids() + invalid_instances = ( + [instance_id for instance_id in instance_ids + if instance_id not in my_instance_ids]) + if invalid_instances: + self.log.error('Invalid instances: %s' % invalid_instances) + reply_state = 'NACK_MAINTENANCE' + else: + reply_state = 'ACK_MAINTENANCE' + + elif state == 'SCALE_IN': + # scale down "self.scale" instances that is VCPUS equaling + # at least a single compute node + self.scale_instances(-self.scale) + reply_state = 'ACK_SCALE_IN' + + elif state == 'MAINTENANCE_COMPLETE': + # possibly need to upscale + number_of_instances = self.number_of_instances() + if self.orig_number_of_instances > number_of_instances: + scale_instances = (self.orig_number_of_instances - + number_of_instances) + self.scale_instances(scale_instances) + reply_state = 'ACK_MAINTENANCE_COMPLETE' + + elif state == 'PREPARE_MAINTENANCE': + # TBD from contraints + if "MIGRATE" not in payload['allowed_actions']: + raise Exception('MIGRATE not supported') + instance_ids = payload['instance_ids'][0] + self.log.info('VNFM got instance: %s' % instance_ids) + if instance_ids == self.active_instance_id: + self.switch_over_ha_instance() + # optional also in contraints + reply['instance_action'] = "MIGRATE" + reply_state = 'ACK_PREPARE_MAINTENANCE' + + elif state == 'PLANNED_MAINTENANCE': + # TBD from contraints + if "MIGRATE" not in payload['allowed_actions']: + raise Exception('MIGRATE not supported') + instance_ids = payload['instance_ids'][0] + self.log.info('VNFM got instance: %s' % instance_ids) + if instance_ids == self.active_instance_id: + self.switch_over_ha_instance() + # optional also in contraints + reply['instance_action'] = "MIGRATE" + reply_state = 'ACK_PLANNED_MAINTENANCE' + + elif state == 'INSTANCE_ACTION_DONE': + # TBD was action done in allowed window + self.log.info('%s' % payload['instance_ids']) + else: + raise Exception('VNFM received event with' + ' unknown state %s' % state) + + if reply_state: + if self.conf.admin_tool.type == 'fenix': + self.headers['X-Auth-Token'] = self.session.get_token() + reply['state'] = reply_state + url = payload['reply_url'] + self.log.info('VNFM reply: %s' % reply) + requests.put(url, data=json.dumps(reply), headers=self.headers) + + return 'OK' + + @app.route('/shutdown', methods=['POST']) + def shutdown(): + self.log.info('shutdown VNFM server at %s' % time.time()) + func = request.environ.get('werkzeug.server.shutdown') + if func is None: + raise RuntimeError('Not running with the Werkzeug Server') + func() + return 'VNFM shutting down...' + + app.run(host="0.0.0.0", port=self.port) diff --git a/doctor_tests/common/utils.py b/doctor_tests/common/utils.py index 1a8840dd..67ca4f4b 100644 --- a/doctor_tests/common/utils.py +++ b/doctor_tests/common/utils.py @@ -10,6 +10,7 @@ import json import os import paramiko import re +import subprocess def load_json_file(full_path): @@ -97,6 +98,27 @@ class SSHClient(object): ftp.close() +class LocalSSH(object): + + def __init__(self, log): + self.log = log + self.log.info('Init local ssh client') + + def ssh(self, cmd): + ret = 0 + output = "%s failed!!!" % cmd + try: + output = subprocess.check_output((cmd), shell=True, + universal_newlines=True) + except subprocess.CalledProcessError: + ret = 1 + return ret, output + + def scp(self, src_file, dst_file): + return subprocess.check_output("cp %s %s" % (src_file, dst_file), + shell=True) + + def run_async(func): from threading import Thread from functools import wraps diff --git a/doctor_tests/image.py b/doctor_tests/image.py index 9961b22d..50841ef6 100644 --- a/doctor_tests/image.py +++ b/doctor_tests/image.py @@ -7,7 +7,11 @@ # http://www.apache.org/licenses/LICENSE-2.0 ############################################################################## import os -import urllib.request +try: + from urllib.request import urlopen +except Exception: + from urllib2 import urlopen + from oslo_config import cfg @@ -46,11 +50,14 @@ class Image(object): def create(self): self.log.info('image create start......') - images = {image.name: image for image in self.glance.images.list()} + if self.conf.image_name == 'cirros': + cirros = [image for image in images if 'cirros' in image] + if cirros: + self.conf.image_name = cirros[0] if self.conf.image_name not in images: if not os.path.exists(self.conf.image_filename): - resp = urllib.request.urlopen(self.conf.image_download_url) + resp = urlopen(self.conf.image_download_url) with open(self.conf.image_filename, "wb") as file: file.write(resp.read()) self.image = \ diff --git a/doctor_tests/inspector/sample.py b/doctor_tests/inspector/sample.py index baf0306f..c44db95d 100644 --- a/doctor_tests/inspector/sample.py +++ b/doctor_tests/inspector/sample.py @@ -52,7 +52,7 @@ class SampleInspector(BaseInspector): driver='messaging', topics=['notifications']) self.notif = self.notif.prepare(publisher_id='sample') - except: + except Exception: self.notif = None def _init_novaclients(self): @@ -72,7 +72,7 @@ class SampleInspector(BaseInspector): host = server.__dict__.get('OS-EXT-SRV-ATTR:host') self.servers[host].append(server) self.log.debug('get hostname=%s from server=%s' - % (host, server)) + % (host, str(server.name))) except Exception as e: self.log.info('can not get hostname from server=%s, error=%s' % (server, e)) @@ -135,7 +135,7 @@ class SampleInspector(BaseInspector): def maintenance(self, data): try: payload = self._alarm_traits_decoder(data) - except: + except Exception: payload = ({t[0]: t[2] for t in data['reason_data']['event']['traits']}) self.log.error('cannot parse alarm data: %s' % payload) diff --git a/doctor_tests/installer/__init__.py b/doctor_tests/installer/__init__.py index 2b9ad83d..00a01667 100644 --- a/doctor_tests/installer/__init__.py +++ b/doctor_tests/installer/__init__.py @@ -13,8 +13,8 @@ from oslo_utils import importutils OPTS = [ cfg.StrOpt('type', - default=os.environ.get('INSTALLER_TYPE', 'local'), - choices=['local', 'apex', 'daisy', 'fuel'], + default=os.environ.get('INSTALLER_TYPE', 'devstack'), + choices=['apex', 'daisy', 'fuel', 'devstack'], help='the type of installer', required=True), cfg.StrOpt('ip', @@ -28,10 +28,10 @@ OPTS = [ _installer_name_class_mapping = { - 'local': 'doctor_tests.installer.local.LocalInstaller', 'apex': 'doctor_tests.installer.apex.ApexInstaller', 'daisy': 'doctor_tests.installer.daisy.DaisyInstaller', - 'fuel': 'doctor_tests.installer.mcp.McpInstaller' + 'fuel': 'doctor_tests.installer.mcp.McpInstaller', + 'devstack': 'doctor_tests.installer.devstack.DevstackInstaller' } diff --git a/doctor_tests/installer/base.py b/doctor_tests/installer/base.py index 7e2658e5..de4d2f2e 100644 --- a/doctor_tests/installer/base.py +++ b/doctor_tests/installer/base.py @@ -11,7 +11,6 @@ import getpass import grp import os import pwd -import re import six import stat import subprocess @@ -127,47 +126,9 @@ class BaseInstaller(object): os.chmod(ssh_key, stat.S_IREAD) return ssh_key + @abc.abstractmethod def get_transport_url(self): - client = utils.SSHClient(self.controllers[0], self.node_user_name, - key_filename=self.key_file) - if self.use_containers: - ncbase = "/var/lib/config-data/puppet-generated/nova" - else: - ncbase = "" - try: - cmd = 'sudo grep "^transport_url" %s/etc/nova/nova.conf' % ncbase - ret, url = client.ssh(cmd) - if ret: - raise Exception('Exec command to get transport from ' - 'controller(%s) in Apex installer failed, ' - 'ret=%s, output=%s' - % (self.controllers[0], ret, url)) - else: - # need to use ip instead of hostname - ret = (re.sub("@.*:", "@%s:" % self.controllers[0], - url[0].split("=", 1)[1])) - except: - cmd = 'grep -i "^rabbit" %s/etc/nova/nova.conf' % ncbase - ret, lines = client.ssh(cmd) - if ret: - raise Exception('Exec command to get transport from ' - 'controller(%s) in Apex installer failed, ' - 'ret=%s, output=%s' - % (self.controllers[0], ret, url)) - else: - for line in lines.split('\n'): - if line.startswith("rabbit_userid"): - rabbit_userid = line.split("=") - if line.startswith("rabbit_port"): - rabbit_port = line.split("=") - if line.startswith("rabbit_password"): - rabbit_password = line.split("=") - ret = "rabbit://%s:%s@%s:%s/?ssl=0" % (rabbit_userid, - rabbit_password, - self.controllers[0], - rabbit_port) - self.log.debug('get_transport_url %s' % ret) - return ret + pass def _run_cmd_remote(self, client, command): self.log.info('Run command=%s in %s installer......' @@ -199,14 +160,15 @@ class BaseInstaller(object): def _run_apply_patches(self, client, restart_cmd, script_names, python='python3'): installer_dir = os.path.dirname(os.path.realpath(__file__)) - if isinstance(script_names, list): for script_name in script_names: script_abs_path = '{0}/{1}/{2}'.format(installer_dir, 'common', script_name) + if self.conf.installer.type == "devstack": + script_name = "/opt/stack/%s" % script_name try: client.scp(script_abs_path, script_name) - except: + except Exception: client.scp(script_abs_path, script_name) try: if ".py" in script_name: @@ -216,14 +178,14 @@ class BaseInstaller(object): script_name) ret, output = client.ssh(cmd) self.log.info('Command %s output %s' % (cmd, output)) - except: + except Exception: ret, output = client.ssh(cmd) - + self.log.info('Command %s output %s' % (cmd, output)) if ret: raise Exception('Do the command in remote' ' node failed, ret=%s, cmd=%s, output=%s' % (ret, cmd, output)) - if 'nova' in restart_cmd: + if 'nova' in restart_cmd or 'devstack@n-' in restart_cmd: # Make sure scheduler has proper cpu_allocation_ratio time.sleep(5) client.ssh(restart_cmd) diff --git a/doctor_tests/installer/common/set_compute_config.py b/doctor_tests/installer/common/set_compute_config.py index 76ac649b..615f1895 100644 --- a/doctor_tests/installer/common/set_compute_config.py +++ b/doctor_tests/installer/common/set_compute_config.py @@ -26,9 +26,9 @@ def set_cpu_allocation_ratio(): found_list = ([ca for ca in fcheck.readlines() if "cpu_allocation_ratio" in ca]) fcheck.close() + change = False + found = False if found_list and len(found_list): - change = False - found = False for car in found_list: if car.startswith('#'): continue diff --git a/doctor_tests/installer/common/set_fenix.sh b/doctor_tests/installer/common/set_fenix.sh index a660af79..bd1eae47 100644 --- a/doctor_tests/installer/common/set_fenix.sh +++ b/doctor_tests/installer/common/set_fenix.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash ############################################################################## -# Copyright (c) 2018 Nokia Corporation and others. +# Copyright (c) 2019 Nokia Corporation and others. # # All rights reserved. This program and the accompanying materials # are made available under the terms of the Apache License, Version 2.0 @@ -10,18 +10,80 @@ ############################################################################## # Config files +docker -v >/dev/null || { +echo "Fenix needs docker to be installed..." +ver=`grep "UBUNTU_CODENAME" /etc/os-release | cut -d '=' -f 2` +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - +add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $ver stable" +apt install apt-transport-https ca-certificates curl software-properties-common +apt update +apt-cache policy docker-ce +apt-get install -y docker-ce docker-ce-cli containerd.io +dpkg -r --force-depends golang-docker-credential-helpers +} + +docker ps | grep fenix -q && { +REMOTE=`git ls-remote https://opendev.org/x/fenix HEAD | awk '{ print $1}'` +LOCAL=`docker exec -t fenix git rev-parse @` +if [[ "$LOCAL" =~ "$REMOTE" ]]; then + # Difference in above string ending marks, so cannot compare equal + echo "Fenix start: Already running latest $LOCAL equals $REMOTE" + exit 0 +else + echo "Fenix container needs to be recreated $LOCAL not $REMOTE" + # Remove previous container + for img in `docker image list | grep "^fenix" | awk '{print $1}'`; do + for dock in `docker ps --all -f "ancestor=$img" | grep "$img" | awk '{print $1}'`; do + docker stop $dock; docker rm $dock; + done; + docker image rm $img; + done +fi +} || echo "Fenix container needs to be created..." + +cp /root/keystonercv3 . + +transport=`grep -m1 "^transport" /etc/nova/nova.conf` +. keystonercv3 + echo "[DEFAULT]" > fenix.conf -echo "[DEFAULT]" > fenix-api.conf echo "port = 12347" >> fenix.conf -echo "port = 12347" >> fenix-api.conf -grep -m1 "^transport" /var/lib/config-data/puppet-generated/nova/etc/nova/nova.conf >> fenix.conf -grep -m1 "^transport" /var/lib/config-data/puppet-generated/nova/etc/nova/nova.conf >> fenix-api.conf +echo $transport >> fenix.conf + echo "[database]" >> fenix.conf -MYSQLIP=`grep -m1 "^connection=mysql" /var/lib/config-data/puppet-generated/nova/etc/nova/nova.conf | sed -e "s/.*@//;s/\/.*//"` -echo "connection=mysql+pymysql://fenix:fenix@$MYSQLIP/fenix?read_default_group=tripleo&read_default_file=/etc/my.cnf.d/tripleo.cnf" >> fenix.conf +MYSQLIP=`grep -m1 "^connection" /etc/nova/nova.conf | sed -e "s/.*@//;s/\/.*//"` +echo "connection = mysql+pymysql://fenix:fenix@$MYSQLIP/fenix" >> fenix.conf + +echo "[service_user]" >> fenix.conf +echo "os_auth_url = $OS_AUTH_URL" >> fenix.conf +echo "os_username = $OS_USERNAME" >> fenix.conf +echo "os_password = $OS_PASSWORD" >> fenix.conf +echo "os_user_domain_name = $OS_USER_DOMAIN_NAME" >> fenix.conf +echo "os_project_name = $OS_PROJECT_NAME" >> fenix.conf +echo "os_project_domain_name = $OS_PROJECT_DOMAIN_NAME" >> fenix.conf + +echo "[DEFAULT]" > fenix-api.conf +echo "port = 12347" >> fenix-api.conf +echo $transport >> fenix-api.conf + +echo "[keystone_authtoken]" >> fenix-api.conf +echo "auth_url = $OS_AUTH_URL" >> fenix-api.conf +echo "auth_type = password" >> fenix-api.conf +echo "project_domain_name = $OS_PROJECT_DOMAIN_NAME" >> fenix-api.conf +echo "project_name = $OS_PROJECT_NAME" >> fenix-api.conf +echo "user_domain_name = $OS_PROJECT_DOMAIN_NAME" >> fenix-api.conf +echo "password = $OS_PASSWORD" >> fenix-api.conf +echo "username = $OS_USERNAME" >> fenix-api.conf +echo "cafile = /opt/stack/data/ca-bundle.pem" >> fenix-api.conf + +openstack service list | grep -q maintenance || { +openstack service create --name fenix --enable maintenance +openstack endpoint create --region $OS_REGION_NAME --enable fenix public http://localhost:12347/v1 +} # Mysql pw -MYSQLPW=`cat /var/lib/config-data/mysql/etc/puppet/hieradata/service_configs.json | grep mysql | grep root_password | awk -F": " '{print $2}' | awk -F"\"" '{print $2}'` +# MYSQLPW=`cat /var/lib/config-data/mysql/etc/puppet/hieradata/service_configs.json | grep mysql | grep root_password | awk -F": " '{print $2}' | awk -F"\"" '{print $2}'` +MYSQLPW=root # Fenix DB [ `mysql -uroot -p$MYSQLPW -e "SELECT host, user FROM mysql.user;" | grep fenix | wc -l` -eq 0 ] && { @@ -31,17 +93,9 @@ MYSQLPW=`cat /var/lib/config-data/mysql/etc/puppet/hieradata/service_configs.jso mysql -ufenix -pfenix -hlocalhost -e "DROP DATABASE IF EXISTS fenix;" mysql -ufenix -pfenix -hlocalhost -e "CREATE DATABASE fenix CHARACTER SET utf8;" -# Remove previous container -for img in `docker image list | grep "^fenix" | awk '{print $1}'`; do - for dock in `docker ps --all -f "ancestor=$img" | grep "$img" | awk '{print $1}'`; do - docker stop $dock; docker rm $dock; - done; - docker image rm $img; -done - # Build Fenix container and run it chmod 700 run -docker build --build-arg OPENSTACK=master --build-arg BRANCH=master --network host /home/heat-admin -t fenix | tail -1 +docker build --build-arg OPENSTACK=master --build-arg BRANCH=master --network host $PWD -t fenix | tail -1 docker run --network host -d --name fenix -p 12347:12347 -ti fenix if [ $? -eq 0 ]; then echo "Fenix start: OK" diff --git a/doctor_tests/installer/devstack.py b/doctor_tests/installer/devstack.py new file mode 100644 index 00000000..02f3601a --- /dev/null +++ b/doctor_tests/installer/devstack.py @@ -0,0 +1,151 @@ +############################################################################## +# Copyright (c) 2019 Nokia Corporation and others. +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +############################################################################## +import os +import socket +import time + +from doctor_tests.common.utils import SSHClient +from doctor_tests.common.utils import LocalSSH +from doctor_tests.identity_auth import get_session +from doctor_tests.installer.base import BaseInstaller +from doctor_tests.os_clients import nova_client + + +class DevstackInstaller(BaseInstaller): + node_user_name = None + cm_set_script = 'set_config.py' + nc_set_compute_script = 'set_compute_config.py' + cm_restore_script = 'restore_config.py' + nc_restore_compute_script = 'restore_compute_config.py' + ac_restart_script = 'restart_aodh.py' + ac_restore_script = 'restore_aodh.py' + python = 'python' + + def __init__(self, conf, log): + super(DevstackInstaller, self).__init__(conf, log) + # Run Doctor under users home. sudo hides other env param to be used + home, self.node_user_name = (iter(os.environ.get('VIRTUAL_ENV') + .split('/', 3)[1:3])) + # Migration needs to work so ssh should have proper key defined + self.key_file = '/%s/%s/.ssh/id_rsa' % (home, self.node_user_name) + self.log.info('ssh uses: %s and %s' % (self.node_user_name, + self.key_file)) + self.controllers = ([ip for ip in + socket.gethostbyname_ex(socket.gethostname())[2] + if not ip.startswith('127.')] or + [[(s.connect(('8.8.8.8', 53)), + s.getsockname()[0], s.close()) + for s in [socket.socket(socket.AF_INET, + socket.SOCK_DGRAM)]][0][1]]) + conf.admin_tool.ip = self.controllers[0] + self.computes = list() + self.nova = nova_client(conf.nova_version, get_session()) + + def setup(self): + self.log.info('Setup Devstack installer start......') + self._get_devstack_conf() + self.create_flavor() + self.set_apply_patches() + + def cleanup(self): + self.restore_apply_patches() + + def get_ssh_key_from_installer(self): + return self.key_file + + def get_transport_url(self): + client = LocalSSH(self.log) + cmd = 'sudo grep -m1 "^transport_url" /etc/nova/nova.conf' + ret, url = client.ssh(cmd) + url = url.split("= ", 1)[1][:-1] + self.log.info('get_transport_url %s' % url) + return url + + def get_host_ip_from_hostname(self, hostname): + return [hvisor.__getattr__('host_ip') for hvisor in self.hvisors + if hvisor.__getattr__('hypervisor_hostname') == hostname][0] + + def _get_devstack_conf(self): + self.log.info('Get devstack config details for Devstack installer' + '......') + self.hvisors = self.nova.hypervisors.list(detailed=True) + self.log.info('checking hypervisors.......') + self.computes = [hvisor.__getattr__('host_ip') for hvisor in + self.hvisors] + self.use_containers = False + self.log.info('controller_ips:%s' % self.controllers) + self.log.info('compute_ips:%s' % self.computes) + self.log.info('use_containers:%s' % self.use_containers) + + def _set_docker_restart_cmd(self, service): + # There can be multiple instances running so need to restart all + cmd = "for container in `sudo docker ps | grep " + cmd += service + cmd += " | awk '{print $1}'`; do sudo docker restart $container; \ + done;" + return cmd + + def set_apply_patches(self): + self.log.info('Set apply patches start......') + + set_scripts = [self.cm_set_script] + + restart_cmd = 'sudo systemctl restart' \ + ' devstack@ceilometer-anotification.service' + + client = LocalSSH(self.log) + self._run_apply_patches(client, + restart_cmd, + set_scripts, + python=self.python) + time.sleep(7) + + self.log.info('Set apply patches start......') + + if self.conf.test_case != 'fault_management': + restart_cmd = 'sudo systemctl restart' \ + ' devstack@n-cpu.service' + for node_ip in self.computes: + client = SSHClient(node_ip, self.node_user_name, + key_filename=self.key_file) + self._run_apply_patches(client, + restart_cmd, + [self.nc_set_compute_script], + python=self.python) + time.sleep(7) + + def restore_apply_patches(self): + self.log.info('restore apply patches start......') + + restore_scripts = [self.cm_restore_script] + + restart_cmd = 'sudo systemctl restart' \ + ' devstack@ceilometer-anotification.service' + + if self.conf.test_case != 'fault_management': + restart_cmd += ' devstack@n-sch.service' + restore_scripts.append(self.nc_restore_compute_script) + + client = LocalSSH(self.log) + self._run_apply_patches(client, + restart_cmd, + restore_scripts, + python=self.python) + + if self.conf.test_case != 'fault_management': + + restart_cmd = 'sudo systemctl restart' \ + ' devstack@n-cpu.service' + for node_ip in self.computes: + client = SSHClient(node_ip, self.node_user_name, + key_filename=self.key_file) + self._run_apply_patches( + client, restart_cmd, + [self.nc_restore_compute_script], + python=self.python) diff --git a/doctor_tests/installer/local.py b/doctor_tests/installer/local.py deleted file mode 100644 index fee14f33..00000000 --- a/doctor_tests/installer/local.py +++ /dev/null @@ -1,118 +0,0 @@ -############################################################################## -# Copyright (c) 2017 ZTE Corporation and others. -# -# All rights reserved. This program and the accompanying materials -# are made available under the terms of the Apache License, Version 2.0 -# which accompanies this distribution, and is available at -# http://www.apache.org/licenses/LICENSE-2.0 -############################################################################## -import os -import shutil -import subprocess - -from doctor_tests.installer.base import BaseInstaller -from doctor_tests.installer.common.vitrage import \ - set_vitrage_host_down_template -from doctor_tests.common.constants import Inspector -from doctor_tests.common.utils import load_json_file -from doctor_tests.common.utils import write_json_file - - -class LocalInstaller(BaseInstaller): - node_user_name = 'root' - - nova_policy_file = '/etc/nova/policy.json' - nova_policy_file_backup = '%s%s' % (nova_policy_file, '.bak') - - def __init__(self, conf, log): - super(LocalInstaller, self).__init__(conf, log) - self.policy_modified = False - self.add_policy_file = False - - def setup(self): - self.get_ssh_key_from_installer() - self.set_apply_patches() - - def cleanup(self): - self.restore_apply_patches() - - def get_ssh_key_from_installer(self): - self.log.info('Assuming SSH keys already exchanged with computer' - 'for local installer type') - return None - - def get_host_ip_from_hostname(self, hostname): - self.log.info('Get host ip from host name in local installer......') - - cmd = "getent hosts %s | awk '{ print $1 }'" % (hostname) - server = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True) - stdout, stderr = server.communicate() - host_ip = stdout.strip().decode("utf-8") - - self.log.info('Get host_ip:%s from host_name:%s in local installer' - % (host_ip, hostname)) - return host_ip - - def set_apply_patches(self): - self._set_nova_policy() - if self.conf.inspector.type == Inspector.VITRAGE: - set_vitrage_host_down_template() - os.system('sudo systemctl restart devstack@vitrage-graph.service') - - def restore_apply_patches(self): - self._restore_nova_policy() - - def _set_nova_policy(self): - host_status_policy = 'os_compute_api:servers:show:host_status' - host_status_rule = 'rule:admin_or_owner' - policy_data = { - 'context_is_admin': 'role:admin', - 'owner': 'user_id:%(user_id)s', - 'admin_or_owner': 'rule:context_is_admin or rule:owner', - host_status_policy: host_status_rule - } - - if os.path.isfile(self.nova_policy_file): - data = load_json_file(self.nova_policy_file) - if host_status_policy in data: - rule_origion = data[host_status_policy] - if host_status_rule == rule_origion: - self.log.info('Do not need to modify nova policy.') - self.policy_modified = False - else: - # update the host_status_policy - data[host_status_policy] = host_status_rule - self.policy_modified = True - else: - # add the host_status_policy, if the admin_or_owner is not - # defined, add it also - for policy, rule in policy_data.items(): - if policy not in data: - data[policy] = rule - self.policy_modified = True - if self.policy_modified: - self.log.info('Nova policy is Modified.') - shutil.copyfile(self.nova_policy_file, - self.nova_policy_file_backup) - else: - # file does not exit, create a new one and add the policy - self.log.info('Nova policy file not exist. Creating a new one') - data = policy_data - self.add_policy_file = True - - if self.policy_modified or self.add_policy_file: - write_json_file(self.nova_policy_file, data) - os.system('sudo systemctl restart devstack@n-api.service') - - def _restore_nova_policy(self): - if self.policy_modified: - shutil.copyfile(self.nova_policy_file_backup, - self.nova_policy_file) - os.remove(self.nova_policy_file_backup) - elif self.add_policy_file: - os.remove(self.nova_policy_file) - - if self.add_policy_file or self.policy_modified: - os.system('sudo systemctl restart devstack@n-api.service') - self.add_policy_file = False - self.policy_modified = False diff --git a/doctor_tests/installer/mcp.py b/doctor_tests/installer/mcp.py index 80e559ed..7659c9e2 100644 --- a/doctor_tests/installer/mcp.py +++ b/doctor_tests/installer/mcp.py @@ -1,5 +1,5 @@ ############################################################################## -# Copyright (c) 2018 ZTE Corporation and others. +# Copyright (c) 2019 ZTE Corporation and others. # # All rights reserved. This program and the accompanying materials # are made available under the terms of the Apache License, Version 2.0 @@ -7,7 +7,11 @@ # http://www.apache.org/licenses/LICENSE-2.0 ############################################################################## from os.path import isfile +import re +import time +from doctor_tests.common.constants import is_fenix +from doctor_tests.common.utils import get_doctor_test_root_dir from doctor_tests.common.utils import SSHClient from doctor_tests.installer.base import BaseInstaller @@ -15,6 +19,15 @@ from doctor_tests.installer.base import BaseInstaller class McpInstaller(BaseInstaller): node_user_name = 'ubuntu' + cm_set_script = 'set_config.py' + nc_set_compute_script = 'set_compute_config.py' + fe_set_script = 'set_fenix.sh' + cm_restore_script = 'restore_config.py' + nc_restore_compute_script = 'restore_compute_config.py' + ac_restart_script = 'restart_aodh.py' + ac_restore_script = 'restore_aodh.py' + python = 'python3' + def __init__(self, conf, log): super(McpInstaller, self).__init__(conf, log) self.key_file = self.get_ssh_key_from_installer() @@ -24,40 +37,87 @@ class McpInstaller(BaseInstaller): look_for_keys=True) self.controllers = list() self.controller_clients = list() + self.computes = list() def setup(self): self.log.info('Setup MCP installer start......') - - self.controllers = self.get_controller_ips() + self.get_node_ips() self.create_flavor() - self.set_apply_patches() + if is_fenix(self.conf): + self.set_apply_patches() self.setup_stunnel() def cleanup(self): - self.restore_apply_patches() + if is_fenix(self.conf): + self.restore_apply_patches() for server in self.servers: server.terminate() def get_ssh_key_from_installer(self): self.log.info('Get SSH keys from MCP......') - # Assuming mcp.rsa is already mapped to functest container - # if not, only the test runs on jumphost can get the ssh_key - # default in path /var/lib/opnfv/mcp.rsa + # Default in path /var/lib/opnfv/mcp.rsa ssh_key = '/root/.ssh/id_rsa' mcp_key = '/var/lib/opnfv/mcp.rsa' - return ssh_key if isfile(ssh_key) else mcp_key + return mcp_key if isfile(mcp_key) else ssh_key + + def get_transport_url(self): + client = SSHClient(self.controllers[0], self.node_user_name, + key_filename=self.key_file) + try: + cmd = 'sudo grep -m1 "^transport_url" /etc/nova/nova.conf' + ret, url = client.ssh(cmd) + + if ret: + raise Exception('Exec command to get transport from ' + 'controller(%s) in MCP installer failed, ' + 'ret=%s, output=%s' + % (self.controllers[0], ret, url)) + elif self.controllers[0] not in url: + # need to use ip instead of hostname + url = (re.sub("@.*:", "@%s:" % self.controllers[0], + url[0].split("=", 1)[1])) + except Exception: + cmd = 'grep -i "^rabbit" /etc/nova/nova.conf' + ret, lines = client.ssh(cmd) + if ret: + raise Exception('Exec command to get transport from ' + 'controller(%s) in MCP installer failed, ' + 'ret=%s, output=%s' + % (self.controllers[0], ret, url)) + else: + for line in lines.split('\n'): + if line.startswith("rabbit_userid"): + rabbit_userid = line.split("=") + if line.startswith("rabbit_port"): + rabbit_port = line.split("=") + if line.startswith("rabbit_password"): + rabbit_password = line.split("=") + url = "rabbit://%s:%s@%s:%s/?ssl=0" % (rabbit_userid, + rabbit_password, + self.controllers[0], + rabbit_port) + self.log.info('get_transport_url %s' % url) + return url + + def _copy_overcloudrc_to_controllers(self): + for ip in self.controllers: + cmd = "scp overcloudrc %s@%s:" % (self.node_user_name, ip) + self._run_cmd_remote(self.client, cmd) - def get_controller_ips(self): - self.log.info('Get controller ips from Mcp installer......') + def get_node_ips(self): + self.log.info('Get node ips from Mcp installer......') - command = "sudo salt --out yaml 'ctl*' " \ - "pillar.get _param:openstack_control_address |" \ - "awk '{print $2}'" - controllers = self._run_cmd_remote(self.client, command) - self.log.info('Get controller_ips:%s from Mcp installer' - % controllers) - return controllers + command = 'sudo salt "*" --out yaml pillar.get _param:single_address' + node_details = self._run_cmd_remote(self.client, command) + + self.controllers = [line.split()[1] for line in node_details + if line.startswith("ctl")] + self.computes = [line.split()[1] for line in node_details + if line.startswith("cmp")] + + self.log.info('controller_ips:%s' % self.controllers) + self.log.info('compute_ips:%s' % self.computes) def get_host_ip_from_hostname(self, hostname): command = "sudo salt --out yaml '%s*' " \ @@ -68,6 +128,80 @@ class McpInstaller(BaseInstaller): def set_apply_patches(self): self.log.info('Set apply patches start......') + fenix_files = None + set_scripts = [self.cm_set_script] + thrs = [] + + restart_cmd = 'sudo systemctl restart' \ + ' ceilometer-agent-notification.service' + + if self.conf.test_case != 'fault_management': + if is_fenix(self.conf): + set_scripts.append(self.fe_set_script) + testdir = get_doctor_test_root_dir() + fenix_files = ["Dockerfile", "run"] + restart_cmd += ' nova-scheduler.service' + set_scripts.append(self.nc_set_compute_script) + + for node_ip in self.controllers: + client = SSHClient(node_ip, self.node_user_name, + key_filename=self.key_file) + if fenix_files is not None: + for fenix_file in fenix_files: + src_file = '{0}/{1}/{2}'.format(testdir, + 'admin_tool/fenix', + fenix_file) + client.scp(src_file, fenix_file) + thrs.append(self._run_apply_patches(client, + restart_cmd, + set_scripts, + python=self.python)) + time.sleep(5) + + self.log.info('Set apply patches start......') + + if self.conf.test_case != 'fault_management': + restart_cmd = 'sudo systemctl restart nova-compute.service' + for node_ip in self.computes: + client = SSHClient(node_ip, self.node_user_name, + key_filename=self.key_file) + thrs.append(self._run_apply_patches( + client, + restart_cmd, + [self.nc_set_compute_script], + python=self.python)) + time.sleep(5) + # If Fenix container ir build, it needs to be ready before continue + for thr in thrs: + thr.join() def restore_apply_patches(self): self.log.info('restore apply patches start......') + + restore_scripts = [self.cm_restore_script] + + restore_scripts.append(self.ac_restore_script) + restart_cmd = 'sudo systemctl restart' \ + ' ceilometer-agent-notification.service' + + if self.conf.test_case != 'fault_management': + restart_cmd += ' nova-scheduler.service' + restore_scripts.append(self.nc_restore_compute_script) + + for node_ip in self.controllers: + client = SSHClient(node_ip, self.node_user_name, + key_filename=self.key_file) + self._run_apply_patches(client, + restart_cmd, + restore_scripts, + python=self.python) + + if self.conf.test_case != 'fault_management': + restart_cmd = 'sudo systemctl restart nova-compute.service' + for node_ip in self.computes: + client = SSHClient(node_ip, self.node_user_name, + key_filename=self.key_file) + self._run_apply_patches( + client, restart_cmd, + [self.nc_restore_compute_script], + python=self.python) diff --git a/doctor_tests/main.py b/doctor_tests/main.py index 351d5f19..7573faec 100644 --- a/doctor_tests/main.py +++ b/doctor_tests/main.py @@ -1,5 +1,5 @@ ############################################################################## -# Copyright (c) 2017 ZTE Corporation and others. +# Copyright (c) 2019 ZTE Corporation and others. # # All rights reserved. This program and the accompanying materials # are made available under the terms of the Apache License, Version 2.0 @@ -43,7 +43,6 @@ class DoctorTest(object): def setup(self): # prepare the cloud env self.installer.setup() - # preparing VM image... self.image.create() @@ -51,39 +50,50 @@ class DoctorTest(object): self.user.create() def test_fault_management(self): - try: - LOG.info('doctor fault management test starting.......') - transport_url = self.installer.get_transport_url() - self.fault_management = \ - FaultManagement(self.conf, self.installer, self.user, LOG, - transport_url) - - # prepare test env - self.fault_management.setup() - - # wait for aodh alarms are updated in caches for event evaluator, - # sleep time should be larger than event_alarm_cache_ttl - # (default 60) - # (tojuvone) Fraser currently needs 120 - time.sleep(120) - - # injecting host failure... - # NOTE (umar) add INTERFACE_NAME logic to host injection - self.fault_management.start() - time.sleep(30) - - # verify the test results - # NOTE (umar) copy remote monitor.log file when monitor=collectd - self.fault_management.check_host_status('down') - self.fault_management.check_notification_time() - - except Exception as e: - LOG.error('doctor fault management test failed, ' - 'Exception=%s' % e) - LOG.error(format_exc()) - sys.exit(1) - finally: - self.fault_management.cleanup() + retry = 2 + # Retry once if notified_time is None + while retry > 0: + try: + self.fault_management = None + LOG.info('doctor fault management test starting.......') + transport_url = self.installer.get_transport_url() + self.fault_management = \ + FaultManagement(self.conf, self.installer, self.user, LOG, + transport_url) + + # prepare test env + self.fault_management.setup() + + # wait for aodh alarms are updated in caches for event + # evaluator,sleep time should be larger than + # event_alarm_cache_ttl (default 60) + # (tojuvone) Fraser currently needs 120 + time.sleep(120) + + # injecting host failure... + # NOTE (umar) add INTERFACE_NAME logic to host injection + self.fault_management.start() + time.sleep(30) + + # verify the test results + # NOTE (umar) copy remote monitor.log file when + # monitor=collectd + self.fault_management.check_host_status('down') + self.fault_management.check_notification_time() + retry = 0 + + except Exception as e: + LOG.error('doctor fault management test failed, ' + 'Exception=%s' % e) + if 'notified_time=None' in str(e): + retry -= 1 + LOG.info('doctor fault management retry') + continue + LOG.error(format_exc()) + sys.exit(1) + finally: + if self.fault_management is not None: + self.fault_management.cleanup() def _amount_compute_nodes(self): services = self.nova.services.list(binary='nova-compute') @@ -96,11 +106,12 @@ class DoctorTest(object): LOG.info('not enough compute nodes, skipping doctor ' 'maintenance test') return - elif self.conf.installer.type != 'apex': + elif self.conf.installer.type not in ['apex', 'fuel', 'devstack']: LOG.info('not supported installer, skipping doctor ' 'maintenance test') return try: + maintenance = None LOG.info('doctor maintenance test starting.......') trasport_url = self.installer.get_transport_url() maintenance = Maintenance(trasport_url, self.conf, LOG) @@ -122,7 +133,8 @@ class DoctorTest(object): LOG.error(format_exc()) sys.exit(1) finally: - maintenance.cleanup_maintenance() + if maintenance is not None: + maintenance.cleanup_maintenance() def run(self): """run doctor tests""" diff --git a/doctor_tests/scenario/fault_management.py b/doctor_tests/scenario/fault_management.py index 5216aa80..0271dffe 100644 --- a/doctor_tests/scenario/fault_management.py +++ b/doctor_tests/scenario/fault_management.py @@ -111,7 +111,10 @@ class FaultManagement(object): server = servers.get(vm_name) if not server: raise Exception('Can not find instance: vm_name(%s)' % vm_name) - host_name = server.__dict__.get('OS-EXT-SRV-ATTR:hypervisor_hostname') + # use hostname without domain name which is mapped to the cell + hostname = \ + server.__dict__.get('OS-EXT-SRV-ATTR:hypervisor_hostname') + host_name = hostname.split('.')[0] host_ip = self.installer.get_host_ip_from_hostname(host_name) self.log.info('Get host info(name:%s, ip:%s) which vm(%s) launched at' diff --git a/doctor_tests/scenario/maintenance.py b/doctor_tests/scenario/maintenance.py index 7c2c17e0..e6cdcccd 100644 --- a/doctor_tests/scenario/maintenance.py +++ b/doctor_tests/scenario/maintenance.py @@ -1,5 +1,5 @@ ############################################################################## -# Copyright (c) 2018 Nokia Corporation and others. +# Copyright (c) 2019 Nokia Corporation and others. # # All rights reserved. This program and the accompanying materials # are made available under the terms of the Apache License, Version 2.0 @@ -28,17 +28,23 @@ class Maintenance(object): def __init__(self, trasport_url, conf, log): self.conf = conf self.log = log + self.admin_session = get_session() self.keystone = keystone_client( self.conf.keystone_version, get_session()) self.nova = nova_client(conf.nova_version, get_session()) auth = get_identity_auth(project=self.conf.doctor_project) self.neutron = neutron_client(get_session(auth=auth)) self.stack = Stack(self.conf, self.log) + if self.conf.installer.type == "devstack": + self.endpoint_ip = trasport_url.split("@", 1)[1].split(":", 1)[0] + else: + self.endpoint_ip = self.conf.admin_tool.ip + self.endpoint = "http://%s:12347/" % self.endpoint_ip if self.conf.admin_tool.type == 'sample': self.admin_tool = get_admin_tool(trasport_url, self.conf, self.log) - self.endpoint = 'maintenance' + self.endpoint += 'maintenance' else: - self.endpoint = 'v1/maintenance' + self.endpoint += 'v1/maintenance' self.app_manager = get_app_manager(self.stack, self.conf, self.log) self.inspector = get_inspector(self.conf, self.log, trasport_url) @@ -127,8 +133,9 @@ class Maintenance(object): else: # TBD Now we expect Fenix is running in self.conf.admin_tool.port pass - self.app_manager.start() + # Inspector before app_manager, as floating ip might come late self.inspector.start() + self.app_manager.start() def start_maintenance(self): self.log.info('start maintenance.......') @@ -137,16 +144,15 @@ class Maintenance(object): for hvisor in hvisors: hostname = hvisor.__getattr__('hypervisor_hostname') maintenance_hosts.append(hostname) - - url = ('http://%s:%s/%s' % - (self.conf.admin_tool.ip, - self.conf.admin_tool.port, - self.endpoint)) + url = self.endpoint headers = { 'Content-Type': 'application/json', 'Accept': 'application/json'} - + if self.conf.admin_tool.type == 'fenix': + headers['X-Auth-Token'] = self.admin_session.get_token() + self.log.info('url %s headers %s' % (url, headers)) retries = 12 + ret = None while retries > 0: # let's start maintenance 20sec from now, so projects will have # time to ACK to it before that @@ -156,8 +162,12 @@ class Maintenance(object): data = {'state': 'MAINTENANCE', 'maintenance_at': maintenance_at, - 'metadata': {'openstack_version': 'Rocky'}, - 'workflow': 'default'} + 'metadata': {'openstack_version': 'Train'}} + + if self.conf.app_manager.type == 'vnfm': + data['workflow'] = 'vnf' + else: + data['workflow'] = 'default' if self.conf.admin_tool.type == 'sample': data['hosts'] = maintenance_hosts @@ -166,7 +176,7 @@ class Maintenance(object): try: ret = requests.post(url, data=json.dumps(data), headers=headers) - except: + except Exception: if retries == 0: raise Exception('admin tool did not respond in 120s') else: @@ -175,36 +185,39 @@ class Maintenance(object): time.sleep(10) continue break + if not ret: + raise Exception("admin tool did not respond") if ret.status_code != 200: raise Exception(ret.text) return ret.json()['session_id'] def remove_maintenance_session(self, session_id): self.log.info('remove maintenance session %s.......' % session_id) - url = ('http://%s:%s/%s/%s' % - (self.conf.admin_tool.ip, - self.conf.admin_tool.port, - self.endpoint, - session_id)) + + url = ('%s/%s' % (self.endpoint, session_id)) headers = { 'Content-Type': 'application/json', 'Accept': 'application/json'} + if self.conf.admin_tool.type == 'fenix': + headers['X-Auth-Token'] = self.admin_session.get_token() + ret = requests.delete(url, data=None, headers=headers) if ret.status_code != 200: raise Exception(ret.text) def get_maintenance_state(self, session_id): - url = ('http://%s:%s/%s/%s' % - (self.conf.admin_tool.ip, - self.conf.admin_tool.port, - self.endpoint, - session_id)) + + url = ('%s/%s' % (self.endpoint, session_id)) headers = { 'Content-Type': 'application/json', 'Accept': 'application/json'} + + if self.conf.admin_tool.type == 'fenix': + headers['X-Auth-Token'] = self.admin_session.get_token() + ret = requests.get(url, data=None, headers=headers) if ret.status_code != 200: raise Exception(ret.text) diff --git a/doctor_tests/user.py b/doctor_tests/user.py index 29aa004b..2cd9757f 100644 --- a/doctor_tests/user.py +++ b/doctor_tests/user.py @@ -129,7 +129,6 @@ class User(object): def _add_user_role_in_project(self, is_admin=False): """add test user with test role in test project""" - project = self.projects.get(self.conf.doctor_project) user_name = 'admin' if is_admin else self.conf.doctor_user |