summaryrefslogtreecommitdiffstats
path: root/doctor_tests
diff options
context:
space:
mode:
Diffstat (limited to 'doctor_tests')
-rw-r--r--doctor_tests/admin_tool/__init__.py37
-rw-r--r--doctor_tests/admin_tool/base.py26
-rw-r--r--doctor_tests/admin_tool/sample.py726
-rw-r--r--doctor_tests/app_manager/__init__.py38
-rw-r--r--doctor_tests/app_manager/base.py26
-rw-r--r--doctor_tests/app_manager/sample.py255
-rw-r--r--doctor_tests/config.py4
-rw-r--r--doctor_tests/consumer/__init__.py2
-rw-r--r--doctor_tests/inspector/sample.py39
-rw-r--r--doctor_tests/installer/__init__.py4
-rw-r--r--doctor_tests/installer/apex.py83
-rw-r--r--doctor_tests/installer/base.py39
-rw-r--r--doctor_tests/installer/common/restore_compute_config.py25
-rw-r--r--doctor_tests/installer/common/restore_config.py (renamed from doctor_tests/installer/common/restore_ceilometer.py)28
-rw-r--r--doctor_tests/installer/common/set_ceilometer.py45
-rw-r--r--doctor_tests/installer/common/set_compute_config.py48
-rw-r--r--doctor_tests/installer/common/set_config.py139
-rw-r--r--doctor_tests/installer/mcp.py3
-rw-r--r--doctor_tests/main.py32
-rw-r--r--doctor_tests/maintenance_hot_tpl.yaml119
-rw-r--r--doctor_tests/os_clients.py7
-rw-r--r--doctor_tests/scenario/fault_management.py2
-rw-r--r--doctor_tests/scenario/maintenance.py192
-rw-r--r--doctor_tests/stack.py106
-rw-r--r--doctor_tests/user.py65
25 files changed, 1993 insertions, 97 deletions
diff --git a/doctor_tests/admin_tool/__init__.py b/doctor_tests/admin_tool/__init__.py
new file mode 100644
index 00000000..e8b12817
--- /dev/null
+++ b/doctor_tests/admin_tool/__init__.py
@@ -0,0 +1,37 @@
+##############################################################################
+# Copyright (c) 2018 Nokia Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+from oslo_config import cfg
+from oslo_utils import importutils
+
+
+OPTS = [
+ cfg.StrOpt('type',
+ default='sample',
+ choices=['sample'],
+ help='the component of doctor admin_tool',
+ required=True),
+ cfg.StrOpt('ip',
+ default='127.0.0.1',
+ help='the ip of admin_tool',
+ required=True),
+ cfg.IntOpt('port',
+ default='12347',
+ help='the port of doctor admin_tool',
+ required=True),
+]
+
+
+_admin_tool_name_class_mapping = {
+ 'sample': 'doctor_tests.admin_tool.sample.SampleAdminTool'
+}
+
+
+def get_admin_tool(trasport_url, conf, log):
+ admin_tool_class = _admin_tool_name_class_mapping.get(conf.admin_tool.type)
+ return importutils.import_object(admin_tool_class, trasport_url, conf, log)
diff --git a/doctor_tests/admin_tool/base.py b/doctor_tests/admin_tool/base.py
new file mode 100644
index 00000000..0f0b2dcd
--- /dev/null
+++ b/doctor_tests/admin_tool/base.py
@@ -0,0 +1,26 @@
+##############################################################################
+# Copyright (c) 2018 Nokia Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+import abc
+import six
+
+
+@six.add_metaclass(abc.ABCMeta)
+class BaseAdminTool(object):
+
+ def __init__(self, conf, log):
+ self.conf = conf
+ self.log = log
+
+ @abc.abstractmethod
+ def start(self):
+ pass
+
+ @abc.abstractmethod
+ def stop(self):
+ pass
diff --git a/doctor_tests/admin_tool/sample.py b/doctor_tests/admin_tool/sample.py
new file mode 100644
index 00000000..892a4c83
--- /dev/null
+++ b/doctor_tests/admin_tool/sample.py
@@ -0,0 +1,726 @@
+##############################################################################
+# Copyright (c) 2018 Nokia Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+import datetime
+from flask import Flask
+from flask import request
+import json
+from novaclient.exceptions import BadRequest
+import oslo_messaging as messaging
+import requests
+import time
+from threading import Thread
+from traceback import format_exc
+from uuid import uuid1 as generate_uuid
+
+from doctor_tests.admin_tool.base import BaseAdminTool
+from doctor_tests.identity_auth import get_identity_auth
+from doctor_tests.identity_auth import get_session
+from doctor_tests.os_clients import aodh_client
+from doctor_tests.os_clients import nova_client
+
+
+class SampleAdminTool(BaseAdminTool):
+
+ def __init__(self, trasport_url, conf, log):
+ super(SampleAdminTool, self).__init__(conf, log)
+ self.trasport_url = trasport_url
+ self.app = None
+
+ def start(self):
+ self.log.info('sample admin tool start......')
+ self.app = AdminTool(self.trasport_url, self.conf, self, self.log)
+ self.app.start()
+
+ def stop(self):
+ self.log.info('sample admin tool stop......')
+ if not self.app:
+ return
+ headers = {
+ 'Content-Type': 'application/json',
+ 'Accept': 'application/json',
+ }
+ url = 'http://%s:%d/shutdown'\
+ % (self.conf.admin_tool.ip,
+ self.conf.admin_tool.port)
+ requests.post(url, data='', headers=headers)
+
+
+class AdminMain(Thread):
+
+ def __init__(self, trasport_url, session_id, data, parent, conf, log):
+ Thread.__init__(self)
+ self.session_id = session_id
+ self.parent = parent
+ self.log = log
+ self.conf = conf
+ self.url = 'http://0.0.0.0:%s' % conf.admin_tool.port
+ self.projects_state = dict() # current state for each project
+ self.proj_server_actions = dict() # actions for each project server
+ self.projects_servers = dict() # servers processed in current state
+ self.maint_proj_servers = dict() # servers under whole maintenance
+ self.hosts = data['hosts']
+ self.maintenance_at = data['maintenance_at']
+ self.computes_disabled = list()
+ self.metadata = data['metadata']
+ self.auth = get_identity_auth(project=self.conf.doctor_project)
+ self.state = data['state']
+ self.aodh = aodh_client(self.conf.aodh_version,
+ get_session(auth=self.auth))
+ self.nova = nova_client(self.conf.nova_version,
+ get_session(auth=self.auth))
+ self.log.info('transport_url %s' % trasport_url)
+ transport = messaging.get_transport(self.conf, trasport_url)
+ self.notif_proj = messaging.Notifier(transport,
+ 'maintenance.planned',
+ driver='messaging',
+ topics=['notifications'])
+ self.notif_proj = self.notif_proj.prepare(publisher_id='admin_tool')
+ self.notif_admin = messaging.Notifier(transport,
+ 'maintenance.host',
+ driver='messaging',
+ topics=['notifications'])
+ self.notif_admin = self.notif_admin.prepare(publisher_id='admin_tool')
+ self.log.info('Admin tool session %s initialized' % self.session_id)
+
+ def cleanup(self):
+ for host in self.computes_disabled:
+ self.log.info('enable nova-compute on %s' % host)
+ self.nova.services.enable(host, 'nova-compute')
+
+ def _projects_not_in_wanted_states(self, wanted_states):
+ if len([v for v in self.projects_state.values()
+ if v not in wanted_states]):
+ return True
+ else:
+ return False
+
+ def projects_not_in_state(self, state):
+ if len([v for v in self.projects_state.values()
+ if v != state]):
+ return True
+ else:
+ return False
+
+ def wait_projects_state(self, wanted_states, wait_seconds):
+ retries = wait_seconds
+ while (retries > 0 and
+ self._projects_not_in_wanted_states(wanted_states)):
+ time.sleep(1)
+ retries = retries - 1
+ if self._projects_not_in_wanted_states(wanted_states):
+ self.log.error('Admin tool session %s: projects in invalid states '
+ '%s' % (self.session_id, self.projects_state))
+ raise Exception('Admin tool session %s: not all projects in states'
+ ' %s' % (self.session_id, wanted_states))
+ else:
+ self.log.info('all projects replied')
+
+ def _project_notify(self, project_id, instance_ids, allowed_actions,
+ actions_at, state, metadata):
+ reply_url = '%s/%s/maintenance' % (self.url, project_id)
+
+ payload = dict(project_id=project_id,
+ instance_ids=instance_ids,
+ allowed_actions=allowed_actions,
+ state=state,
+ actions_at=actions_at,
+ session_id=self.session_id,
+ metadata=metadata,
+ reply_url=reply_url)
+
+ self.log.debug('Sending "maintenance.planned" to project: %s' %
+ payload)
+
+ self.notif_proj.info({'some': 'context'}, 'maintenance.scheduled',
+ payload)
+
+ def _admin_notify(self, project, host, state, session_id):
+ payload = dict(project_id=project, host=host, state=state,
+ session_id=session_id)
+
+ self.log.debug('Sending "maintenance.host": %s' % payload)
+
+ self.notif_admin.info({'some': 'context'}, 'maintenance.host', payload)
+
+ def down_scale(self):
+ for project in self.projects_servers:
+ self.log.info('DOWN_SCALE to project %s' % project)
+ self.log.debug('instance_ids %s' % self.projects_servers[project])
+ instance_ids = '%s/%s/maintenance' % (self.url, project)
+ allowed_actions = []
+ wait_seconds = 120
+ actions_at = (datetime.datetime.utcnow() +
+ datetime.timedelta(seconds=wait_seconds)
+ ).strftime('%Y-%m-%d %H:%M:%S')
+ state = self.state
+ metadata = self.metadata
+ self._project_notify(project, instance_ids,
+ allowed_actions, actions_at, state,
+ metadata)
+ allowed_states = ['ACK_DOWN_SCALE', 'NACK_DOWN_SCALE']
+ self.wait_projects_state(allowed_states, wait_seconds)
+ if self.projects_not_in_state('ACK_DOWN_SCALE'):
+ raise Exception('Admin tool session %s: all states not '
+ 'ACK_DOWN_SCALE %s' %
+ (self.session_id, self.projects_state))
+
+ def maintenance(self):
+ for project in self.projects_servers:
+ self.log.info('\nMAINTENANCE to project %s\n' % project)
+ self.log.debug('instance_ids %s' % self.projects_servers[project])
+ instance_ids = '%s/%s/maintenance' % (self.url, project)
+ allowed_actions = []
+ actions_at = self.maintenance_at
+ state = self.state
+ metadata = self.metadata
+ maint_at = self.str_to_datetime(self.maintenance_at)
+ td = maint_at - datetime.datetime.utcnow()
+ wait_seconds = int(td.total_seconds())
+ if wait_seconds < 10:
+ raise Exception('Admin tool session %s: No time for project to'
+ ' answer: %s' %
+ (self.session_id, wait_seconds))
+ self._project_notify(project, instance_ids,
+ allowed_actions, actions_at, state,
+ metadata)
+ allowed_states = ['ACK_MAINTENANCE', 'NACK_MAINTENANCE']
+ self.wait_projects_state(allowed_states, wait_seconds)
+ if self.projects_not_in_state('ACK_MAINTENANCE'):
+ raise Exception('Admin tool session %s: all states not '
+ 'ACK_MAINTENANCE %s' %
+ (self.session_id, self.projects_state))
+
+ def maintenance_complete(self):
+ for project in self.projects_servers:
+ self.log.info('MAINTENANCE_COMPLETE to project %s' % project)
+ instance_ids = '%s/%s/maintenance' % (self.url, project)
+ allowed_actions = []
+ wait_seconds = 120
+ actions_at = (datetime.datetime.utcnow() +
+ datetime.timedelta(seconds=wait_seconds)
+ ).strftime('%Y-%m-%d %H:%M:%S')
+ state = 'MAINTENANCE_COMPLETE'
+ metadata = self.metadata
+ self._project_notify(project, instance_ids,
+ allowed_actions, actions_at, state,
+ metadata)
+ allowed_states = ['ACK_MAINTENANCE_COMPLETE',
+ 'NACK_MAINTENANCE_COMPLETE']
+ self.wait_projects_state(allowed_states, wait_seconds)
+ if self.projects_not_in_state('ACK_MAINTENANCE_COMPLETE'):
+ raise Exception('Admin tool session %s: all states not '
+ 'ACK_MAINTENANCE_COMPLETE %s' %
+ (self.session_id, self.projects_state))
+
+ def need_down_scale(self, host_servers):
+ room_for_instances = 0
+ for host in host_servers:
+ instances = 0
+ for project in host_servers[host]:
+ for instance in host_servers[host][project]:
+ instances += 1
+ room_for_instances += (2 - instances)
+ self.log.info('there is room for %d instances' % room_for_instances)
+ if room_for_instances > 1:
+ return False
+ else:
+ return True
+
+ def find_host_to_be_empty(self, host_servers):
+ host_to_be_empty = None
+ host_nonha_instances = 0
+ for host in host_servers:
+ ha_instances = 0
+ nonha_instances = 0
+ for project in host_servers[host]:
+ for instance in host_servers[host][project]:
+ if ('doctor_ha_app_' in
+ host_servers[host][project][instance]):
+ ha_instances += 1
+ else:
+ nonha_instances += 1
+ self.log.info('host %s has %d ha and %d non ha instances' %
+ (host, ha_instances, nonha_instances))
+ if ha_instances == 0:
+ if host_to_be_empty:
+ if nonha_instances < host_nonha_instances:
+ host_to_be_empty = host
+ host_nonha_instances = nonha_instances
+ else:
+ host_to_be_empty = host
+ host_nonha_instances = nonha_instances
+ self.log.info('host %s selected to be empty' % host_to_be_empty)
+ return host_to_be_empty
+
+ def make_compute_host_empty(self, host, projects_servers, statebase):
+ state = statebase
+ state_ack = 'ACK_%s' % statebase
+ state_nack = 'NACK_%s' % statebase
+ for project in projects_servers:
+ # self.projects_servers must have servers under action
+ self.projects_servers[project] = projects_servers[project].copy()
+ self.log.info('%s to project %s' % (state, project))
+ self.project_servers_log_info(project, projects_servers)
+ instance_ids = '%s/%s/maintenance' % (self.url, project)
+ allowed_actions = ['MIGRATE', 'LIVE_MIGRATE', 'OWN_ACTION']
+ wait_seconds = 120
+ actions_at = (datetime.datetime.utcnow() +
+ datetime.timedelta(seconds=wait_seconds)
+ ).strftime('%Y-%m-%d %H:%M:%S')
+ metadata = self.metadata
+ self._project_notify(project, instance_ids,
+ allowed_actions, actions_at, state,
+ metadata)
+ allowed_states = [state_ack, state_nack]
+ self.wait_projects_state(allowed_states, wait_seconds)
+ if self.projects_not_in_state(state_ack):
+ raise Exception('Admin tool session %s: all states not %s %s' %
+ (self.session_id, state_ack, self.projects_state))
+ self.actions_to_have_empty_host(host)
+
+ def notify_action_done(self, project, instance_id):
+ instance_ids = instance_id
+ allowed_actions = []
+ actions_at = None
+ state = "INSTANCE_ACTION_DONE"
+ metadata = None
+ self._project_notify(project, instance_ids, allowed_actions,
+ actions_at, state, metadata)
+
+ def actions_to_have_empty_host(self, host):
+ retry = 0
+ while len(self.proj_server_actions) == 0:
+ time.sleep(2)
+ if retry == 10:
+ raise Exception('Admin tool session %s: project server actions'
+ ' not set' % self.session_id)
+ retry += 1
+ for project in self.proj_server_actions:
+ for server, action in self.proj_server_actions[project].items():
+ self.log.info('Action %s server %s: %s' % (action, server,
+ self.projects_servers[project][server]))
+ if action == 'MIGRATE':
+ self.migrate_server(server)
+ self.notify_action_done(project, server)
+ elif action == 'OWN_ACTION':
+ pass
+ else:
+ raise Exception('Admin tool session %s: server %s action '
+ '%s not supported' %
+ (self.session_id, server, action))
+ self.proj_server_actions = dict()
+ self._wait_host_empty(host)
+
+ def migrate_server(self, server_id):
+ server = self.nova.servers.get(server_id)
+ vm_state = server.__dict__.get('OS-EXT-STS:vm_state')
+ self.log.info('server %s state %s' % (server_id, vm_state))
+ last_vm_state = vm_state
+ retry_migrate = 5
+ while True:
+ try:
+ server.migrate()
+ time.sleep(5)
+ retries = 36
+ while vm_state != 'resized' and retries > 0:
+ # try to confirm within 3min
+ server = self.nova.servers.get(server_id)
+ vm_state = server.__dict__.get('OS-EXT-STS:vm_state')
+ if vm_state == 'resized':
+ server.confirm_resize()
+ self.log.info('server %s migration confirmed' %
+ server_id)
+ return
+ if last_vm_state != vm_state:
+ self.log.info('server %s state: %s' % (server_id,
+ vm_state))
+ if vm_state == 'error':
+ raise Exception('server %s migration failed, state: %s'
+ % (server_id, vm_state))
+ time.sleep(5)
+ retries = retries - 1
+ last_vm_state = vm_state
+ # Timout waiting state to change
+ break
+
+ except BadRequest:
+ if retry_migrate == 0:
+ raise Exception('server %s migrate failed' % server_id)
+ # Might take time for scheduler to sync inconsistent instance
+ # list for host
+ retry_time = 180 - (retry_migrate * 30)
+ self.log.info('server %s migrate failed, retry in %s sec'
+ % (server_id, retry_time))
+ time.sleep(retry_time)
+ except Exception as e:
+ self.log.error('server %s migration failed, Exception=%s' %
+ (server_id, e))
+ self.log.error(format_exc())
+ raise Exception('server %s migration failed, state: %s' %
+ (server_id, vm_state))
+ finally:
+ retry_migrate = retry_migrate - 1
+ raise Exception('server %s migration timeout, state: %s' %
+ (server_id, vm_state))
+
+ def _wait_host_empty(self, host):
+ hid = self.nova.hypervisors.search(host)[0].id
+ vcpus_used_last = 0
+ # wait 4min to get host empty
+ for j in range(48):
+ hvisor = self.nova.hypervisors.get(hid)
+ vcpus_used = hvisor.__getattr__('vcpus_used')
+ if vcpus_used > 0:
+ if vcpus_used_last == 0:
+ self.log.info('%s still has %d vcpus reserved. wait...'
+ % (host, vcpus_used))
+ elif vcpus_used != vcpus_used_last:
+ self.log.info('%s still has %d vcpus reserved. wait...'
+ % (host, vcpus_used))
+ vcpus_used_last = vcpus_used
+ time.sleep(5)
+ else:
+ self.log.info('%s empty' % host)
+ return
+ raise Exception('%s host not empty' % host)
+
+ def projects_listen_alarm(self, match_event):
+ match_projects = ([str(alarm['project_id']) for alarm in
+ self.aodh.alarm.list() if
+ str(alarm['event_rule']['event_type']) ==
+ match_event])
+ all_projects_match = True
+ for project in list(self.projects_state):
+ if project not in match_projects:
+ self.log.error('Admin tool session %s: project %s not '
+ 'listening to %s' %
+ (self.session_id, project, match_event))
+ all_projects_match = False
+ return all_projects_match
+
+ def project_servers_log_info(self, project, host_servers):
+ info = 'Project servers:\n'
+ for server in host_servers[project]:
+ info += (' %s: %s\n' %
+ (server, host_servers[project][server]))
+ self.log.info('%s' % info)
+
+ def servers_log_info(self, host_servers):
+ info = '\n'
+ for host in self.hosts:
+ info += '%s:\n' % host
+ if host in host_servers:
+ for project in host_servers[host]:
+ info += ' %s:\n' % project
+ for server in host_servers[host][project]:
+ info += (' %s: %s\n' %
+ (server, host_servers[host][project][server]))
+ self.log.info('%s' % info)
+
+ def update_server_info(self):
+ opts = {'all_tenants': True}
+ servers = self.nova.servers.list(search_opts=opts)
+ self.projects_servers = dict()
+ host_servers = dict()
+ for server in servers:
+ try:
+ host = str(server.__dict__.get('OS-EXT-SRV-ATTR:host'))
+ project = str(server.tenant_id)
+ server_name = str(server.name)
+ server_id = str(server.id)
+ except Exception:
+ raise Exception('can not get params from server=%s' %
+ server)
+ if host not in self.hosts:
+ continue
+ if host not in host_servers:
+ host_servers[host] = dict()
+ if project not in host_servers[host]:
+ host_servers[host][project] = dict()
+ if project not in self.projects_servers:
+ self.projects_servers[project] = dict()
+ if project not in self.projects_state:
+ self.projects_state[project] = None
+ host_servers[host][project][server_id] = server_name
+ self.projects_servers[project][server_id] = server_name
+ return host_servers
+
+ def str_to_datetime(self, dt_str):
+ mdate, mtime = dt_str.split()
+ year, month, day = map(int, mdate.split('-'))
+ hours, minutes, seconds = map(int, mtime.split(':'))
+ return datetime.datetime(year, month, day, hours, minutes, seconds)
+
+ def host_maintenance(self, host):
+ self.log.info('maintaining host %s' % host)
+ # no implementation to make real maintenance
+ time.sleep(5)
+
+ def run(self):
+ while self.state != 'MAINTENANCE_COMPLETE':
+ self.log.info('--==session %s: processing state %s==--' %
+ (self.session_id, self.state))
+ if self.state == 'MAINTENANCE':
+ host_servers = self.update_server_info()
+ self.servers_log_info(host_servers)
+
+ if not self.projects_listen_alarm('maintenance.scheduled'):
+ raise Exception('all projects do not listen maintenance '
+ 'alarm')
+ self.maintenance()
+
+ maint_at = self.str_to_datetime(self.maintenance_at)
+ if maint_at > datetime.datetime.utcnow():
+ time_now = (datetime.datetime.utcnow().strftime(
+ '%Y-%m-%d %H:%M:%S'))
+ self.log.info('Time now: %s maintenance starts: %s....' %
+ (time_now, self.maintenance_at))
+ td = maint_at - datetime.datetime.utcnow()
+ time.sleep(td.total_seconds())
+ time_now = (datetime.datetime.utcnow().strftime(
+ '%Y-%m-%d %H:%M:%S'))
+ self.log.info('Time to start maintenance starts: %s' %
+ time_now)
+
+ # check if we have empty compute host
+ # True -> PLANNED_MAINTENANCE
+ # False -> check if we can migrate VMs to get empty host
+ # True -> PREPARE_MAINTENANCE
+ # False -> DOWN_SCALE
+ maintenance_empty_hosts = ([h for h in self.hosts if h not in
+ host_servers])
+
+ if len(maintenance_empty_hosts) == 0:
+ if self.need_down_scale(host_servers):
+ self.log.info('Need to down scale')
+ self.state = 'DOWN_SCALE'
+ else:
+ self.log.info('Free capacity, but need empty host')
+ self.state = 'PREPARE_MAINTENANCE'
+ else:
+ self.log.info('Free capacity, but need empty host')
+ self.state = 'PLANNED_MAINTENANCE'
+ self.log.info('--==State change from MAINTENANCE to %s==--'
+ % self.state)
+ elif self.state == 'DOWN_SCALE':
+ # Test case is hard coded to have all compute capacity used
+ # We need to down scale to have one empty compute host
+ self.down_scale()
+ self.state = 'PREPARE_MAINTENANCE'
+ host_servers = self.update_server_info()
+ self.servers_log_info(host_servers)
+ self.log.info('--==State change from DOWN_SCALE to'
+ ' %s==--' % self.state)
+
+ elif self.state == 'PREPARE_MAINTENANCE':
+ # It might be down scale did not free capacity on a single
+ # compute host, so we need to arrange free capacity to a single
+ # compute host
+ self.maint_proj_servers = self.projects_servers.copy()
+ maintenance_empty_hosts = ([h for h in self.hosts if h not in
+ host_servers])
+ if len(maintenance_empty_hosts) == 0:
+ self.log.info('no empty hosts for maintenance')
+ if self.need_down_scale(host_servers):
+ raise Exception('Admin tool session %s: Not enough '
+ 'free capacity for maintenance' %
+ self.session_id)
+ host = self.find_host_to_be_empty(host_servers)
+ if host:
+ self.make_compute_host_empty(host, host_servers[host],
+ 'PREPARE_MAINTENANCE')
+ else:
+ # We do not currently support another down scale if
+ # first was not enough
+ raise Exception('Admin tool session %s: No host '
+ 'candidate to be emptied' %
+ self.session_id)
+ else:
+ for host in maintenance_empty_hosts:
+ self.log.info('%s already empty '
+ 'for maintenance' % host)
+ self.state = 'PLANNED_MAINTENANCE'
+ host_servers = self.update_server_info()
+ self.servers_log_info(host_servers)
+ self.log.info('--==State change from PREPARE_MAINTENANCE to %s'
+ '==--' % self.state)
+ elif self.state == 'PLANNED_MAINTENANCE':
+ maintenance_hosts = list()
+ maintenance_empty_hosts = list()
+ # TODO This should be admin. hack for now to have it work
+ admin_project = list(self.projects_state)[0]
+ for host in self.hosts:
+ self.log.info('disable nova-compute on host %s' % host)
+ self.nova.services.disable_log_reason(host, 'nova-compute',
+ 'maintenance')
+ self.computes_disabled.append(host)
+ if host in host_servers and len(host_servers[host]):
+ maintenance_hosts.append(host)
+ else:
+ maintenance_empty_hosts.append(host)
+ self.log.info('--==Start to maintain empty hosts==--\n%s' %
+ maintenance_empty_hosts)
+ for host in maintenance_empty_hosts:
+ # scheduler has problems, let's see if just down scaled
+ # host is really empty
+ self._wait_host_empty(host)
+ self.log.info('IN_MAINTENANCE host %s' % host)
+ self._admin_notify(admin_project, host, 'IN_MAINTENANCE',
+ self.session_id)
+ self.host_maintenance(host)
+ self._admin_notify(admin_project, host,
+ 'MAINTENANCE_COMPLETE',
+ self.session_id)
+ self.nova.services.enable(host, 'nova-compute')
+ self.computes_disabled.remove(host)
+ self.log.info('MAINTENANCE_COMPLETE host %s' % host)
+ self.log.info('--==Start to maintain occupied hosts==--\n%s' %
+ maintenance_hosts)
+ for host in maintenance_hosts:
+ self.log.info('PLANNED_MAINTENANCE host %s' % host)
+ self.make_compute_host_empty(host, host_servers[host],
+ 'PLANNED_MAINTENANCE')
+ self.log.info('IN_MAINTENANCE host %s' % host)
+ self._admin_notify(admin_project, host, 'IN_MAINTENANCE',
+ self.session_id)
+ self.host_maintenance(host)
+ self._admin_notify(admin_project, host,
+ 'MAINTENANCE_COMPLETE',
+ self.session_id)
+ self.nova.services.enable(host, 'nova-compute')
+ self.computes_disabled.remove(host)
+ self.log.info('MAINTENANCE_COMPLETE host %s' % host)
+ self.state = 'PLANNED_MAINTENANCE_COMPLETE'
+ host_servers = self.update_server_info()
+ self.servers_log_info(host_servers)
+ elif self.state == 'PLANNED_MAINTENANCE_COMPLETE':
+ self.log.info('Projects still need to up scale back to full '
+ 'capcity')
+ self.maintenance_complete()
+ host_servers = self.update_server_info()
+ self.servers_log_info(host_servers)
+ self.state = 'MAINTENANCE_COMPLETE'
+ else:
+ raise Exception('Admin tool session %s: session in invalid '
+ 'state %s' % (self.session_id, self.state))
+ self.log.info('--==Maintenance session %s: '
+ 'MAINTENANCE SESSION COMPLETE==--' % self.session_id)
+
+ def project_input(self, project_id, data):
+ self.log.debug('Admin tool session %s: project %s input' %
+ (self.session_id, project_id))
+ if 'instance_actions' in data:
+ self.proj_server_actions[project_id] = (
+ data['instance_actions'].copy())
+ self.projects_state[project_id] = data['state']
+
+ def project_get_instances(self, project_id):
+ ret = list(self.projects_servers[project_id])
+ self.log.debug('Admin tool session %s: project %s GET return: %s' %
+ (self.session_id, project_id, ret))
+ return ret
+
+ def stop(self):
+ self.stopped = True
+
+
+class AdminTool(Thread):
+
+ def __init__(self, trasport_url, conf, admin_tool, log):
+ Thread.__init__(self)
+ self.admin_tool = admin_tool
+ self.log = log
+ self.conf = conf
+ self.port = self.conf.admin_tool.port
+ self.maint_sessions = {}
+ self.projects = {}
+ self.maintenance_hosts = []
+ self.trasport_url = trasport_url
+
+ def run(self):
+ app = Flask('admin_tool')
+
+ @app.route('/maintenance', methods=['POST'])
+ def admin_maintenance_api_post():
+ data = json.loads(request.data.decode('utf8'))
+ self.log.info('maintenance message: %s' % data)
+ if 'session_id' in data:
+ if data['state'] == 'REMOVE_MAINTENANCE_SESSION':
+ session_id = data['session_id']
+ self.log.info('remove session %s'
+ % session_id)
+ self.maint_sessions[session_id].cleanup()
+ self.maint_sessions[session_id].stop()
+ del self.maint_sessions[session_id]
+ else:
+ session_id = str(generate_uuid())
+ self.log.info('creating session: %s' % session_id)
+ self.maint_sessions[session_id] = (
+ AdminMain(self.trasport_url,
+ session_id,
+ data,
+ self,
+ self.conf,
+ self.log))
+ self.maint_sessions[session_id].start()
+ reply = json.dumps({'session_id': session_id,
+ 'state': 'ACK_%s' % data['state']})
+ self.log.debug('reply: %s' % reply)
+ return reply, 200, None
+
+ @app.route('/maintenance', methods=['GET'])
+ def admin_maintenance_api_get():
+ data = json.loads(request.data.decode('utf8'))
+ self.log.debug('Admin get maintenance: %s' % data)
+ session_id = data['session_id']
+ reply = json.dumps({'state':
+ self.maint_sessions[session_id].state})
+ self.log.debug('reply: %s' % reply)
+ return reply, 200, None
+
+ @app.route('/<projet_id>/maintenance', methods=['PUT'])
+ def project_maintenance_api_put(projet_id=None):
+ data = json.loads(request.data.decode('utf8'))
+ self.log.debug('%s project put: %s' % (projet_id, data))
+ self.project_input(projet_id, data)
+ return 'OK'
+
+ @app.route('/<projet_id>/maintenance', methods=['GET'])
+ def project_maintenance_api_get(projet_id=None):
+ data = json.loads(request.data.decode('utf8'))
+ self.log.debug('%s project get %s' % (projet_id, data))
+ instances = self.project_get_instances(projet_id, data)
+ reply = json.dumps({'instance_ids': instances})
+ self.log.debug('%s reply: %s' % (projet_id, reply))
+ return reply, 200, None
+
+ @app.route('/shutdown', methods=['POST'])
+ def shutdown():
+ for session in self.maint_sessions:
+ self.log.info('shutdown admin tool session %s thread' %
+ session)
+ self.maint_sessions[session].cleanup()
+ self.maint_sessions[session].stop()
+ self.log.info('shutdown admin_tool server at %s' % time.time())
+ func = request.environ.get('werkzeug.server.shutdown')
+ if func is None:
+ raise RuntimeError('Not running with the Werkzeug Server')
+ func()
+ return 'admin_tool app shutting down...'
+
+ app.run(host='0.0.0.0', port=self.port)
+
+ def project_input(self, project_id, data):
+ session_id = data['session_id']
+ self.maint_sessions[session_id].project_input(project_id, data)
+
+ def project_get_instances(self, project_id, data):
+ session_id = data['session_id']
+ return self.maint_sessions[session_id].project_get_instances(
+ project_id)
diff --git a/doctor_tests/app_manager/__init__.py b/doctor_tests/app_manager/__init__.py
new file mode 100644
index 00000000..717d6587
--- /dev/null
+++ b/doctor_tests/app_manager/__init__.py
@@ -0,0 +1,38 @@
+##############################################################################
+# Copyright (c) 2018 Nokia Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+from oslo_config import cfg
+from oslo_utils import importutils
+
+
+OPTS = [
+ cfg.StrOpt('type',
+ default='sample',
+ choices=['sample'],
+ help='the component of doctor app manager',
+ required=True),
+ cfg.StrOpt('ip',
+ default='127.0.0.1',
+ help='the ip of app manager',
+ required=True),
+ cfg.IntOpt('port',
+ default='12348',
+ help='the port of doctor app manager',
+ required=True),
+]
+
+
+_app_manager_name_class_mapping = {
+ 'sample': 'doctor_tests.app_manager.sample.SampleAppManager'
+}
+
+
+def get_app_manager(stack, conf, log):
+ app_manager_class = (
+ _app_manager_name_class_mapping.get(conf.app_manager.type))
+ return importutils.import_object(app_manager_class, stack, conf, log)
diff --git a/doctor_tests/app_manager/base.py b/doctor_tests/app_manager/base.py
new file mode 100644
index 00000000..0d424083
--- /dev/null
+++ b/doctor_tests/app_manager/base.py
@@ -0,0 +1,26 @@
+##############################################################################
+# Copyright (c) 2018 Nokia Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+import abc
+import six
+
+
+@six.add_metaclass(abc.ABCMeta)
+class BaseAppManager(object):
+
+ def __init__(self, conf, log):
+ self.conf = conf
+ self.log = log
+
+ @abc.abstractmethod
+ def start(self):
+ pass
+
+ @abc.abstractmethod
+ def stop(self):
+ pass
diff --git a/doctor_tests/app_manager/sample.py b/doctor_tests/app_manager/sample.py
new file mode 100644
index 00000000..94926ee2
--- /dev/null
+++ b/doctor_tests/app_manager/sample.py
@@ -0,0 +1,255 @@
+##############################################################################
+# Copyright (c) 2018 Nokia Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+from flask import Flask
+from flask import request
+import json
+import yaml
+import time
+from threading import Thread
+import requests
+
+from doctor_tests.app_manager.base import BaseAppManager
+from doctor_tests.identity_auth import get_identity_auth
+from doctor_tests.identity_auth import get_session
+from doctor_tests.os_clients import nova_client
+
+
+class SampleAppManager(BaseAppManager):
+
+ def __init__(self, stack, conf, log):
+ super(SampleAppManager, self).__init__(conf, log)
+ self.stack = stack
+ self.app = None
+
+ def start(self):
+ self.log.info('sample app manager start......')
+ self.app = AppManager(self.stack, self.conf, self, self.log)
+ self.app.start()
+
+ def stop(self):
+ self.log.info('sample app manager stop......')
+ if not self.app:
+ return
+ headers = {
+ 'Content-Type': 'application/json',
+ 'Accept': 'application/json',
+ }
+ url = 'http://%s:%d/shutdown'\
+ % (self.conf.app_manager.ip,
+ self.conf.app_manager.port)
+ requests.post(url, data='', headers=headers)
+
+
+class AppManager(Thread):
+
+ def __init__(self, stack, conf, app_manager, log):
+ Thread.__init__(self)
+ self.stack = stack
+ self.conf = conf
+ self.port = self.conf.app_manager.port
+ self.app_manager = app_manager
+ self.log = log
+ self.intance_ids = None
+ self.headers = {
+ 'Content-Type': 'application/json',
+ 'Accept': 'application/json'}
+ self.auth = get_identity_auth(project=self.conf.doctor_project)
+ self.nova = nova_client(self.conf.nova_version,
+ get_session(auth=self.auth))
+ self.orig_number_of_instances = self.number_of_instances()
+ self.ha_instances = self.get_ha_instances()
+ self.floating_ip = None
+ self.active_instance_id = self.active_instance_id()
+
+ def active_instance_id(self):
+ for instance in self.ha_instances:
+ network_interfaces = next(iter(instance.addresses.values()))
+ for network_interface in network_interfaces:
+ _type = network_interface.get('OS-EXT-IPS:type')
+ if _type == "floating":
+ if not self.floating_ip:
+ self.floating_ip = network_interface.get('addr')
+ self.log.debug('active_instance: %s %s' %
+ (instance.name, instance.id))
+ return instance.id
+ raise Exception("No active instance found")
+
+ def switch_over_ha_instance(self):
+ for instance in self.ha_instances:
+ if instance.id != self.active_instance_id:
+ self.log.info('Switch over to: %s %s' % (instance.name,
+ instance.id))
+ instance.add_floating_ip(self.floating_ip)
+ self.active_instance_id = instance.id
+ break
+
+ def get_instance_ids(self):
+ ret = list()
+ for instance in self.nova.servers.list(detailed=False):
+ ret.append(instance.id)
+ return ret
+
+ def get_ha_instances(self):
+ ha_instances = list()
+ for instance in self.nova.servers.list(detailed=True):
+ if "doctor_ha_app_" in instance.name:
+ ha_instances.append(instance)
+ self.log.debug('ha_instances: %s' % instance.name)
+ return ha_instances
+
+ def _alarm_data_decoder(self, data):
+ if "[" in data or "{" in data:
+ # string to list or dict removing unicode
+ data = yaml.load(data.replace("u'", "'"))
+ return data
+
+ def _alarm_traits_decoder(self, data):
+ return ({str(t[0]): self._alarm_data_decoder(str(t[2]))
+ for t in data['reason_data']['event']['traits']})
+
+ def get_session_instance_ids(self, url, session_id):
+ data = {'session_id': session_id}
+ ret = requests.get(url, data=json.dumps(data), headers=self.headers)
+ if ret.status_code != 200:
+ raise Exception(ret.text)
+ self.log.info('get_instance_ids %s' % ret.json())
+ return ret.json()['instance_ids']
+
+ def scale_instances(self, number_of_instances):
+ number_of_instances_before = self.number_of_instances()
+
+ parameters = self.stack.parameters
+ parameters['nonha_intances'] += number_of_instances
+ self.stack.update(self.stack.stack_name,
+ self.stack.stack_id,
+ self.stack.template,
+ parameters=parameters,
+ files=self.stack.files)
+
+ number_of_instances_after = self.number_of_instances()
+ if (number_of_instances_before + number_of_instances !=
+ number_of_instances_after):
+ self.log.error('scale_instances with: %d from: %d ends up to: %d'
+ % (number_of_instances, number_of_instances_before,
+ number_of_instances_after))
+ raise Exception('scale_instances failed')
+
+ self.log.info('scaled insances from %d to %d' %
+ (number_of_instances_before,
+ number_of_instances_after))
+
+ def number_of_instances(self):
+ return len(self.nova.servers.list(detailed=False))
+
+ def run(self):
+ app = Flask('app_manager')
+
+ @app.route('/maintenance', methods=['POST'])
+ def maintenance_alarm():
+ data = json.loads(request.data.decode('utf8'))
+ try:
+ payload = self._alarm_traits_decoder(data)
+ except:
+ payload = ({t[0]: t[2] for t in
+ data['reason_data']['event']['traits']})
+ self.log.error('cannot parse alarm data: %s' % payload)
+ raise Exception('sample app manager cannot parse alarm.'
+ 'Possibly trait data over 256 char')
+
+ self.log.info('sample app manager received data = %s' % payload)
+
+ state = payload['state']
+ reply_state = None
+ reply = dict()
+
+ self.log.info('sample app manager state: %s' % state)
+
+ if state == 'MAINTENANCE':
+ instance_ids = (self.get_session_instance_ids(
+ payload['instance_ids'],
+ payload['session_id']))
+ reply['instance_ids'] = instance_ids
+ reply_state = 'ACK_MAINTENANCE'
+
+ elif state == 'DOWN_SCALE':
+ # scale down 2 isntances that is VCPUS equaling to single
+ # compute node
+ self.scale_instances(-2)
+ reply['instance_ids'] = self.get_instance_ids()
+ reply_state = 'ACK_DOWN_SCALE'
+
+ elif state == 'MAINTENANCE_COMPLETE':
+ # possibly need to upscale
+ number_of_instances = self.number_of_instances()
+ if self.orig_number_of_instances > number_of_instances:
+ scale_instances = (self.orig_number_of_instances -
+ number_of_instances)
+ self.scale_instances(scale_instances)
+ reply_state = 'ACK_MAINTENANCE_COMPLETE'
+
+ elif state == 'PREPARE_MAINTENANCE':
+ if "MIGRATE" not in payload['allowed_actions']:
+ raise Exception('MIGRATE not supported')
+
+ instance_ids = (self.get_session_instance_ids(
+ payload['instance_ids'],
+ payload['session_id']))
+ self.log.info('sample app manager got instances: %s' %
+ instance_ids)
+ instance_actions = dict()
+ for instance_id in instance_ids:
+ instance_actions[instance_id] = "MIGRATE"
+ if instance_id == self.active_instance_id:
+ self.switch_over_ha_instance()
+ reply['instance_actions'] = instance_actions
+ reply_state = 'ACK_PREPARE_MAINTENANCE'
+
+ elif state == 'PLANNED_MAINTENANCE':
+ if "MIGRATE" not in payload['allowed_actions']:
+ raise Exception('MIGRATE not supported')
+
+ instance_ids = (self.get_session_instance_ids(
+ payload['instance_ids'],
+ payload['session_id']))
+ self.log.info('sample app manager got instances: %s' %
+ instance_ids)
+ instance_actions = dict()
+ for instance_id in instance_ids:
+ instance_actions[instance_id] = "MIGRATE"
+ if instance_id == self.active_instance_id:
+ self.switch_over_ha_instance()
+ reply['instance_actions'] = instance_actions
+ reply_state = 'ACK_PLANNED_MAINTENANCE'
+
+ elif state == 'INSTANCE_ACTION_DONE':
+ self.log.info('%s' % payload['instance_ids'])
+
+ else:
+ raise Exception('sample app manager received event with'
+ ' unknown state %s' % state)
+
+ if reply_state:
+ reply['session_id'] = payload['session_id']
+ reply['state'] = reply_state
+ url = payload['reply_url']
+ self.log.info('sample app manager reply: %s' % reply)
+ requests.put(url, data=json.dumps(reply), headers=self.headers)
+
+ return 'OK'
+
+ @app.route('/shutdown', methods=['POST'])
+ def shutdown():
+ self.log.info('shutdown app manager server at %s' % time.time())
+ func = request.environ.get('werkzeug.server.shutdown')
+ if func is None:
+ raise RuntimeError('Not running with the Werkzeug Server')
+ func()
+ return 'app manager shutting down...'
+
+ app.run(host="0.0.0.0", port=self.port)
diff --git a/doctor_tests/config.py b/doctor_tests/config.py
index dc05c0d8..cea1f0c9 100644
--- a/doctor_tests/config.py
+++ b/doctor_tests/config.py
@@ -11,6 +11,8 @@ import itertools
from oslo_config import cfg
from doctor_tests import alarm
+from doctor_tests import admin_tool
+from doctor_tests import app_manager
from doctor_tests import consumer
from doctor_tests import image
from doctor_tests import instance
@@ -30,6 +32,8 @@ def list_opts():
('monitor', monitor.OPTS),
('inspector', inspector.OPTS),
('consumer', consumer.OPTS),
+ ('admin_tool', admin_tool.OPTS),
+ ('app_manager', app_manager.OPTS),
('DEFAULT', itertools.chain(
os_clients.OPTS,
image.OPTS,
diff --git a/doctor_tests/consumer/__init__.py b/doctor_tests/consumer/__init__.py
index 2c66a547..e5a36506 100644
--- a/doctor_tests/consumer/__init__.py
+++ b/doctor_tests/consumer/__init__.py
@@ -21,7 +21,7 @@ OPTS = [
help='the ip of consumer',
required=True),
cfg.IntOpt('port',
- default='12346',
+ default=12346,
help='the port of doctor consumer',
required=True),
]
diff --git a/doctor_tests/inspector/sample.py b/doctor_tests/inspector/sample.py
index 7742373d..a55a12b7 100644
--- a/doctor_tests/inspector/sample.py
+++ b/doctor_tests/inspector/sample.py
@@ -13,6 +13,7 @@ import json
import time
from threading import Thread
import requests
+import yaml
from doctor_tests.common import utils
from doctor_tests.identity_auth import get_identity_auth
@@ -105,6 +106,39 @@ class SampleInspector(BaseInspector):
if self.conf.inspector.update_neutron_port_dp_status:
thr3.join()
+ def _alarm_data_decoder(self, data):
+ if "[" in data or "{" in data:
+ # string to list or dict removing unicode
+ data = yaml.load(data.replace("u'", "'"))
+ return data
+
+ def _alarm_traits_decoder(self, data):
+ return ({str(t[0]): self._alarm_data_decoder(str(t[2]))
+ for t in data['reason_data']['event']['traits']})
+
+ def maintenance(self, data):
+ try:
+ payload = self._alarm_traits_decoder(data)
+ except:
+ payload = ({t[0]: t[2] for t in
+ data['reason_data']['event']['traits']})
+ self.log.error('cannot parse alarm data: %s' % payload)
+ raise Exception('sample inspector cannot parse alarm.'
+ 'Possibly trait data over 256 char')
+ self.log.info('sample inspector received data = %s' % payload)
+
+ state = payload['state']
+ host = payload['host']
+
+ if state == 'IN_MAINTENANCE':
+ self.log.info("sample inspector: disable %s automatic fault "
+ "management" % host)
+ elif state == 'MAINTENANCE_COMPLETE':
+ self.log.info("sample inspector: enable %s automatic fault "
+ "management" % host)
+ else:
+ raise("sample inspector couldn't handle state: %s" % state)
+
@utils.run_async
def _disable_compute_host(self, hostname):
self.nova.services.force_down(hostname, 'nova-compute', True)
@@ -173,6 +207,11 @@ class InspectorApp(Thread):
self.inspector.handle_events(events)
return "OK"
+ @app.route('/maintenance', methods=['POST'])
+ def maintenance():
+ self.inspector.maintenance(request.json)
+ return "OK"
+
@app.route('/events/shutdown', methods=['POST'])
def shutdown():
self.log.info('shutdown inspector app server at %s' % time.time())
diff --git a/doctor_tests/installer/__init__.py b/doctor_tests/installer/__init__.py
index 31fce754..ee44018c 100644
--- a/doctor_tests/installer/__init__.py
+++ b/doctor_tests/installer/__init__.py
@@ -24,6 +24,10 @@ OPTS = [
default='root',
help='the user name for login installer server',
required=True),
+ cfg.StrOpt('key_file',
+ default=os.environ.get('SSH_KEY', None),
+ help='the key for user to login installer server',
+ required=False),
]
diff --git a/doctor_tests/installer/apex.py b/doctor_tests/installer/apex.py
index 1ce3eb65..bfa72d32 100644
--- a/doctor_tests/installer/apex.py
+++ b/doctor_tests/installer/apex.py
@@ -6,29 +6,36 @@
# which accompanies this distribution, and is available at
# http://www.apache.org/licenses/LICENSE-2.0
##############################################################################
+import re
+import time
+
from doctor_tests.common.utils import SSHClient
from doctor_tests.installer.base import BaseInstaller
class ApexInstaller(BaseInstaller):
node_user_name = 'heat-admin'
- cm_set_script = 'set_ceilometer.py'
- cm_restore_script = 'restore_ceilometer.py'
+ cm_set_script = 'set_config.py'
+ cm_set_compute_script = 'set_compute_config.py'
+ cm_restore_script = 'restore_config.py'
+ cm_restore_compute_script = 'restore_compute_config.py'
def __init__(self, conf, log):
super(ApexInstaller, self).__init__(conf, log)
self.client = SSHClient(self.conf.installer.ip,
self.conf.installer.username,
+ key_filename=self.conf.installer.key_file,
look_for_keys=True)
self.key_file = None
self.controllers = list()
+ self.computes = list()
self.controller_clients = list()
+ self.compute_clients = list()
def setup(self):
self.log.info('Setup Apex installer start......')
-
self.key_file = self.get_ssh_key_from_installer()
- self.controllers = self.get_controller_ips()
+ self._get_and_set_ips()
self.create_flavor()
self.set_apply_patches()
self.setup_stunnel()
@@ -42,16 +49,20 @@ class ApexInstaller(BaseInstaller):
key_path = '/home/stack/.ssh/id_rsa'
return self._get_ssh_key(self.client, key_path)
- def get_controller_ips(self):
- self.log.info('Get controller ips from Apex installer......')
-
- command = "source stackrc; " \
- "nova list | grep ' overcloud-controller-[0-9] ' " \
- "| sed -e 's/^.*ctlplane=//' |awk '{print $1}'"
- controllers = self._run_cmd_remote(self.client, command)
- self.log.info('Get controller_ips:%s from Apex installer'
- % controllers)
- return controllers
+ def _get_and_set_ips(self):
+ self.log.info('Get controller and compute ips from Apex installer'
+ '......')
+
+ command = "source stackrc; nova list | grep ' overcloud-'"
+ raw_ips_list = self._run_cmd_remote(self.client, command)
+ for line in raw_ips_list:
+ ip = line.split('ctlplane=', 1)[1].split(" ", 1)[0]
+ if 'overcloud-controller-' in line:
+ self.controllers.append(ip)
+ elif 'overcloud-novacompute-' in line:
+ self.computes.append(ip)
+ self.log.info('controller_ips:%s' % self.controllers)
+ self.log.info('compute_ips:%s' % self.computes)
def get_host_ip_from_hostname(self, hostname):
self.log.info('Get host ip by hostname=%s from Apex installer......'
@@ -62,12 +73,31 @@ class ApexInstaller(BaseInstaller):
host_ips = self._run_cmd_remote(self.client, command)
return host_ips[0]
+ def get_transport_url(self):
+ client = SSHClient(self.controllers[0], self.node_user_name,
+ key_filename=self.key_file)
+
+ command = 'sudo grep "^transport_url" /etc/nova/nova.conf'
+ ret, url = client.ssh(command)
+ if ret:
+ raise Exception('Exec command to get host ip from controller(%s)'
+ 'in Apex installer failed, ret=%s, output=%s'
+ % (self.controllers[0], ret, url))
+ # need to use ip instead of hostname
+ ret = (re.sub("@.*:", "@%s:" % self.controllers[0],
+ url[0].split("=", 1)[1]))
+ self.log.debug('get_transport_url %s' % ret)
+ return ret
+
def set_apply_patches(self):
self.log.info('Set apply patches start......')
restart_cm_cmd = 'sudo systemctl restart ' \
'openstack-ceilometer-notification.service'
+ if self.conf.test_case != 'fault_management':
+ restart_cm_cmd += ' openstack-nova-scheduler.service'
+
for node_ip in self.controllers:
client = SSHClient(node_ip, self.node_user_name,
key_filename=self.key_file)
@@ -76,13 +106,38 @@ class ApexInstaller(BaseInstaller):
restart_cm_cmd,
self.cm_set_script)
+ if self.conf.test_case != 'fault_management':
+ restart_cm_cmd = 'sudo systemctl restart ' \
+ 'openstack-nova-compute.service'
+ for node_ip in self.computes:
+ client = SSHClient(node_ip, self.node_user_name,
+ key_filename=self.key_file)
+ self.compute_clients.append(client)
+ self._run_apply_patches(client,
+ restart_cm_cmd,
+ self.cm_set_compute_script)
+
+ if self.conf.test_case != 'fault_management':
+ time.sleep(10)
+
def restore_apply_patches(self):
self.log.info('restore apply patches start......')
restart_cm_cmd = 'sudo systemctl restart ' \
'openstack-ceilometer-notification.service'
+ if self.conf.test_case != 'fault_management':
+ restart_cm_cmd += ' openstack-nova-scheduler.service'
+
for client in self.controller_clients:
self._run_apply_patches(client,
restart_cm_cmd,
self.cm_restore_script)
+
+ if self.conf.test_case != 'fault_management':
+ restart_cm_cmd = 'sudo systemctl restart ' \
+ 'openstack-nova-compute.service'
+ for client in self.compute_clients:
+ self._run_apply_patches(client,
+ restart_cm_cmd,
+ self.cm_restore_compute_script)
diff --git a/doctor_tests/installer/base.py b/doctor_tests/installer/base.py
index 76bbeb1e..4eed3f29 100644
--- a/doctor_tests/installer/base.py
+++ b/doctor_tests/installer/base.py
@@ -58,22 +58,33 @@ class BaseInstaller(object):
def setup_stunnel(self):
self.log.info('Setup ssh stunnel in %s installer......'
% self.conf.installer.type)
+ tunnels = [self.conf.consumer.port]
+ if self.conf.test_case == 'maintenance':
+ tunnel_uptime = 1200
+ tunnels += [self.conf.app_manager.port, self.conf.inspector.port]
+ elif self.conf.test_case == 'all':
+ tunnel_uptime = 1800
+ tunnels += [self.conf.app_manager.port, self.conf.inspector.port]
+ else:
+ tunnel_uptime = 600
for node_ip in self.controllers:
- cmd = ("ssh -o UserKnownHostsFile=/dev/null"
- " -o StrictHostKeyChecking=no"
- " -i %s %s@%s -R %s:localhost:%s"
- " sleep 600 > ssh_tunnel.%s.log"
- " 2>&1 < /dev/null &"
- % (self.key_file,
- self.node_user_name,
- node_ip,
- self.conf.consumer.port,
- self.conf.consumer.port,
- node_ip))
- server = subprocess.Popen(cmd, shell=True)
- self.servers.append(server)
- server.communicate()
+ for port in tunnels:
+ self.log.info('tunnel for port %s' % port)
+ cmd = ("ssh -o UserKnownHostsFile=/dev/null"
+ " -o StrictHostKeyChecking=no"
+ " -i %s %s@%s -R %s:localhost:%s"
+ " sleep %s > ssh_tunnel.%s"
+ " 2>&1 < /dev/null "
+ % (self.key_file,
+ self.node_user_name,
+ node_ip,
+ port,
+ port,
+ tunnel_uptime,
+ node_ip))
+ server = subprocess.Popen('exec ' + cmd, shell=True)
+ self.servers.append(server)
def _get_ssh_key(self, client, key_path):
self.log.info('Get SSH keys from %s installer......'
diff --git a/doctor_tests/installer/common/restore_compute_config.py b/doctor_tests/installer/common/restore_compute_config.py
new file mode 100644
index 00000000..0971d12b
--- /dev/null
+++ b/doctor_tests/installer/common/restore_compute_config.py
@@ -0,0 +1,25 @@
+##############################################################################
+# Copyright (c) 2018 Nokia Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+import os
+import shutil
+
+
+def restore_cpu_allocation_ratio():
+ nova_file = '/etc/nova/nova.conf'
+ nova_file_bak = '/etc/nova/nova.bak'
+
+ if not os.path.isfile(nova_file_bak):
+ print('Bak_file:%s does not exist.' % nova_file_bak)
+ else:
+ print('restore: %s' % nova_file)
+ shutil.copyfile(nova_file_bak, nova_file)
+ os.remove(nova_file_bak)
+ return
+
+restore_cpu_allocation_ratio()
diff --git a/doctor_tests/installer/common/restore_ceilometer.py b/doctor_tests/installer/common/restore_config.py
index d25b9ede..c1f919c1 100644
--- a/doctor_tests/installer/common/restore_ceilometer.py
+++ b/doctor_tests/installer/common/restore_config.py
@@ -24,4 +24,32 @@ def restore_ep_config():
return
+def restore_ed_config():
+
+ ed_file = '/etc/ceilometer/event_definitions.yaml'
+ ed_file_bak = '/etc/ceilometer/event_definitions.bak'
+
+ if not os.path.isfile(ed_file_bak):
+ print("Bak_file doesn't exist: %s." % ed_file_bak)
+ else:
+ print('restore: %s' % ed_file)
+ shutil.copyfile(ed_file_bak, ed_file)
+ os.remove(ed_file_bak)
+ return
+
+
+def restore_cpu_allocation_ratio():
+ nova_file = '/etc/nova/nova.conf'
+ nova_file_bak = '/etc/nova/nova.bak'
+
+ if not os.path.isfile(nova_file_bak):
+ print('Bak_file:%s does not exist.' % nova_file_bak)
+ else:
+ print('restore: %s' % nova_file)
+ shutil.copyfile(nova_file_bak, nova_file)
+ os.remove(nova_file_bak)
+ return
+
restore_ep_config()
+restore_ed_config()
+restore_cpu_allocation_ratio()
diff --git a/doctor_tests/installer/common/set_ceilometer.py b/doctor_tests/installer/common/set_ceilometer.py
deleted file mode 100644
index 4050aaef..00000000
--- a/doctor_tests/installer/common/set_ceilometer.py
+++ /dev/null
@@ -1,45 +0,0 @@
-##############################################################################
-# Copyright (c) 2017 ZTE Corporation and others.
-#
-# All rights reserved. This program and the accompanying materials
-# are made available under the terms of the Apache License, Version 2.0
-# which accompanies this distribution, and is available at
-# http://www.apache.org/licenses/LICENSE-2.0
-##############################################################################
-import os
-import shutil
-import yaml
-
-ep_file = '/etc/ceilometer/event_pipeline.yaml'
-ep_file_bak = '/etc/ceilometer/event_pipeline.yaml.bak'
-event_notifier_topic = 'notifier://?topic=alarm.all'
-
-
-def set_notifier_topic():
- config_modified = False
-
- if not os.path.isfile(ep_file):
- raise Exception("File doesn't exist: %s." % ep_file)
-
- with open(ep_file, 'r') as file:
- config = yaml.safe_load(file)
-
- sinks = config['sinks']
- for sink in sinks:
- if sink['name'] == 'event_sink':
- publishers = sink['publishers']
- if event_notifier_topic not in publishers:
- print('Add event notifier in ceilometer')
- publishers.append(event_notifier_topic)
- config_modified = True
- else:
- print('NOTE: event notifier is configured'
- 'in ceilometer as we needed')
-
- if config_modified:
- shutil.copyfile(ep_file, ep_file_bak)
- with open(ep_file, 'w+') as file:
- file.write(yaml.safe_dump(config))
-
-
-set_notifier_topic()
diff --git a/doctor_tests/installer/common/set_compute_config.py b/doctor_tests/installer/common/set_compute_config.py
new file mode 100644
index 00000000..07db1e16
--- /dev/null
+++ b/doctor_tests/installer/common/set_compute_config.py
@@ -0,0 +1,48 @@
+##############################################################################
+# Copyright (c) 2018 Nokia Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+import os
+import shutil
+
+
+def set_cpu_allocation_ratio():
+ nova_file = '/etc/nova/nova.conf'
+ nova_file_bak = '/etc/nova/nova.bak'
+
+ if not os.path.isfile(nova_file):
+ raise Exception("File doesn't exist: %s." % nova_file)
+ # TODO (tojuvone): Unfortunately ConfigParser did not produce working conf
+ fcheck = open(nova_file)
+ found_list = ([ca for ca in fcheck.readlines() if "cpu_allocation_ratio"
+ in ca])
+ fcheck.close()
+ if found_list and len(found_list):
+ change = False
+ found = False
+ for car in found_list:
+ if car.startswith('#'):
+ continue
+ if car.startswith('cpu_allocation_ratio'):
+ found = True
+ if "1.0" not in car.split('=')[1]:
+ change = True
+ if not found or change:
+ # need to add or change
+ shutil.copyfile(nova_file, nova_file_bak)
+ fin = open(nova_file_bak)
+ fout = open(nova_file, "wt")
+ for line in fin:
+ if change and line.startswith("cpu_allocation_ratio"):
+ line = "cpu_allocation_ratio=1.0"
+ if not found and line.startswith("[DEFAULT]"):
+ line += "cpu_allocation_ratio=1.0\n"
+ fout.write(line)
+ fin.close()
+ fout.close()
+
+set_cpu_allocation_ratio()
diff --git a/doctor_tests/installer/common/set_config.py b/doctor_tests/installer/common/set_config.py
new file mode 100644
index 00000000..42465247
--- /dev/null
+++ b/doctor_tests/installer/common/set_config.py
@@ -0,0 +1,139 @@
+##############################################################################
+# Copyright (c) 2017 ZTE Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+import os
+import shutil
+import yaml
+
+ep_file = '/etc/ceilometer/event_pipeline.yaml'
+ep_file_bak = '/etc/ceilometer/event_pipeline.yaml.bak'
+event_notifier_topic = 'notifier://?topic=alarm.all'
+
+
+def set_notifier_topic():
+ config_modified = False
+
+ if not os.path.isfile(ep_file):
+ raise Exception("File doesn't exist: %s." % ep_file)
+
+ with open(ep_file, 'r') as file:
+ config = yaml.safe_load(file)
+
+ sinks = config['sinks']
+ for sink in sinks:
+ if sink['name'] == 'event_sink':
+ publishers = sink['publishers']
+ if event_notifier_topic not in publishers:
+ print('Add event notifier in ceilometer')
+ publishers.append(event_notifier_topic)
+ config_modified = True
+ else:
+ print('NOTE: event notifier is configured'
+ 'in ceilometer as we needed')
+
+ if config_modified:
+ shutil.copyfile(ep_file, ep_file_bak)
+ with open(ep_file, 'w+') as file:
+ file.write(yaml.safe_dump(config))
+
+
+def set_maintenance_event_definitions():
+ ed_file = '/etc/ceilometer/event_definitions.yaml'
+ ed_file_bak = '/etc/ceilometer/event_definitions.bak'
+
+ if not os.path.isfile(ed_file):
+ raise Exception("File doesn't exist: %s." % ed_file)
+
+ with open(ed_file, 'r') as file:
+ config = yaml.safe_load(file)
+
+ et_list = [et['event_type'] for et in config]
+
+ if 'maintenance.scheduled' in et_list:
+ add_mscheduled = False
+ print('NOTE: maintenance.scheduled allready configured')
+ else:
+ print('NOTE: add maintenance.scheduled to event_definitions.yaml')
+ add_mscheduled = True
+ mscheduled = {
+ 'event_type': 'maintenance.scheduled',
+ 'traits': {
+ 'allowed_actions': {'fields': 'payload.allowed_actions'},
+ 'instance_ids': {'fields': 'payload.instance_ids'},
+ 'reply_url': {'fields': 'payload.reply_url'},
+ 'actions_at': {'fields': 'payload.actions_at',
+ 'type': 'datetime'},
+ 'state': {'fields': 'payload.state'},
+ 'session_id': {'fields': 'payload.session_id'},
+ 'project_id': {'fields': 'payload.project_id'},
+ 'metadata': {'fields': 'payload.metadata'}
+ }
+ }
+ config.append(mscheduled)
+
+ if 'maintenance.host' in et_list:
+ add_mhost = False
+ print('NOTE: maintenance.host allready configured')
+ else:
+ print('NOTE: add maintenance.host to event_definitions.yaml')
+ add_mhost = True
+ mhost = {
+ 'event_type': 'maintenance.host',
+ 'traits': {
+ 'host': {'fields': 'payload.host'},
+ 'project_id': {'fields': 'payload.project_id'},
+ 'state': {'fields': 'payload.state'},
+ 'session_id': {'fields': 'payload.session_id'}
+ }
+ }
+ config.append(mhost)
+
+ if add_mscheduled or add_mhost:
+ shutil.copyfile(ed_file, ed_file_bak)
+ with open(ed_file, 'w+') as file:
+ file.write(yaml.safe_dump(config))
+
+
+def set_cpu_allocation_ratio():
+ nova_file = '/etc/nova/nova.conf'
+ nova_file_bak = '/etc/nova/nova.bak'
+
+ if not os.path.isfile(nova_file):
+ raise Exception("File doesn't exist: %s." % nova_file)
+ # TODO (tojuvone): Unfortunately ConfigParser did not produce working conf
+ fcheck = open(nova_file)
+ found_list = ([ca for ca in fcheck.readlines() if "cpu_allocation_ratio"
+ in ca])
+ fcheck.close()
+ if found_list and len(found_list):
+ change = False
+ found = False
+ for car in found_list:
+ if car.startswith('#'):
+ continue
+ if car.startswith('cpu_allocation_ratio'):
+ found = True
+ if "1.0" not in car.split('=')[1]:
+ change = True
+ if not found or change:
+ # need to add or change
+ shutil.copyfile(nova_file, nova_file_bak)
+ fin = open(nova_file_bak)
+ fout = open(nova_file, "wt")
+ for line in fin:
+ if change and line.startswith("cpu_allocation_ratio"):
+ line = "cpu_allocation_ratio=1.0"
+ if not found and line.startswith("[DEFAULT]"):
+ line += "cpu_allocation_ratio=1.0\n"
+ fout.write(line)
+ fin.close()
+ fout.close()
+
+set_notifier_topic()
+set_maintenance_event_definitions()
+set_cpu_allocation_ratio()
diff --git a/doctor_tests/installer/mcp.py b/doctor_tests/installer/mcp.py
index 8ba9f000..e7e41dbe 100644
--- a/doctor_tests/installer/mcp.py
+++ b/doctor_tests/installer/mcp.py
@@ -22,7 +22,8 @@ class McpInstaller(BaseInstaller):
self.key_file = self.get_ssh_key_from_installer()
self.client = SSHClient(self.conf.installer.ip,
self.node_user_name,
- key_filename=self.key_file)
+ key_filename=self.key_file,
+ look_for_keys=True)
self.controllers = list()
self.controller_clients = list()
diff --git a/doctor_tests/main.py b/doctor_tests/main.py
index 61facb61..438d8324 100644
--- a/doctor_tests/main.py
+++ b/doctor_tests/main.py
@@ -10,6 +10,7 @@ import os
from os.path import isfile, join
import sys
import time
+from traceback import format_exc
from doctor_tests import config
from doctor_tests.identity_auth import get_identity_auth
@@ -17,8 +18,9 @@ from doctor_tests.identity_auth import get_session
from doctor_tests.image import Image
from doctor_tests.installer import get_installer
import doctor_tests.logger as doctor_log
-from doctor_tests.os_clients import nova_client
from doctor_tests.scenario.fault_management import FaultManagement
+from doctor_tests.os_clients import nova_client
+from doctor_tests.scenario.maintenance import Maintenance
from doctor_tests.user import User
@@ -67,7 +69,7 @@ class DoctorTest(object):
# injecting host failure...
# NOTE (umar) add INTERFACE_NAME logic to host injection
self.fault_management.start()
- time.sleep(10)
+ time.sleep(30)
# verify the test results
# NOTE (umar) copy remote monitor.log file when monitor=collectd
@@ -92,20 +94,42 @@ class DoctorTest(object):
LOG.info('not enough compute nodes, skipping doctor '
'maintenance test')
return
+ elif self.conf.installer.type != 'apex':
+ LOG.info('not supported installer, skipping doctor '
+ 'maintenance test')
+ return
try:
LOG.info('doctor maintenance test starting.......')
- # TODO (tojuvone) test setup and actual test
+ trasport_url = self.installer.get_transport_url()
+ maintenance = Maintenance(trasport_url, self.conf, LOG)
+ maintenance.setup_maintenance(self.user)
+
+ # wait for aodh alarms are updated in caches for event evaluator,
+ # sleep time should be larger than event_alarm_cache_ttl
+ # (default 60)
+ LOG.info('wait aodh for 120s.......')
+ time.sleep(120)
+
+ session_id = maintenance.start_maintenance()
+ maintenance.wait_maintenance_complete(session_id)
+
+ LOG.info('doctor maintenance complete.......')
+
except Exception as e:
LOG.error('doctor maintenance test failed, Exception=%s' % e)
+ LOG.error(format_exc())
sys.exit(1)
- # TODO (tojuvone) finally: test case specific cleanup
+ finally:
+ maintenance.cleanup_maintenance()
def run(self):
"""run doctor tests"""
try:
LOG.info('doctor test starting.......')
+
# prepare common test env
self.setup()
+
if self.conf.test_case == 'all':
self.test_fault_management()
self.test_maintenance()
diff --git a/doctor_tests/maintenance_hot_tpl.yaml b/doctor_tests/maintenance_hot_tpl.yaml
new file mode 100644
index 00000000..e2e47023
--- /dev/null
+++ b/doctor_tests/maintenance_hot_tpl.yaml
@@ -0,0 +1,119 @@
+---
+heat_template_version: 2017-02-24
+description: Doctor Maintenance test case
+
+parameters:
+ ext_net:
+ type: string
+ default: external
+ flavor_vcpus:
+ type: number
+ default: 24
+ maint_image:
+ type: string
+ default: cirros
+ ha_intances:
+ type: number
+ default: 2
+ nonha_intances:
+ type: number
+ default: 4
+ app_manager_alarm_url:
+ type: string
+ default: http://0.0.0.0:12348/maintenance
+ inpector_alarm_url:
+ type: string
+ default: http://0.0.0.0:12345/maintenance
+
+
+resources:
+ int_net:
+ type: OS::Neutron::Net
+
+ int_subnet:
+ type: OS::Neutron::Subnet
+ properties:
+ network_id: {get_resource: int_net}
+ cidr: "9.9.9.0/24"
+ dns_nameservers: ["8.8.8.8"]
+ ip_version: 4
+
+ int_router:
+ type: OS::Neutron::Router
+ properties:
+ external_gateway_info: {network: {get_param: ext_net}}
+
+ int_interface:
+ type: OS::Neutron::RouterInterface
+ properties:
+ router_id: {get_resource: int_router}
+ subnet: {get_resource: int_subnet}
+
+ maint_instance_flavor:
+ type: OS::Nova::Flavor
+ properties:
+ name: doctor_maint_flavor
+ ram: 512
+ vcpus: {get_param: flavor_vcpus}
+ disk: 1
+
+ ha_app_svrgrp:
+ type: OS::Nova::ServerGroup
+ properties:
+ name: doctor_ha_app_group
+ policies: ['anti-affinity']
+
+ floating_ip:
+ type: OS::Nova::FloatingIP
+ properties:
+ pool: {get_param: ext_net}
+
+ multi_ha_instances:
+ type: OS::Heat::ResourceGroup
+ properties:
+ count: {get_param: ha_intances}
+ resource_def:
+ type: OS::Nova::Server
+ properties:
+ name: doctor_ha_app_%index%
+ flavor: {get_resource: maint_instance_flavor}
+ image: {get_param: maint_image}
+ networks:
+ - network: {get_resource: int_net}
+ scheduler_hints:
+ group: {get_resource: ha_app_svrgrp}
+
+ multi_nonha_instances:
+ type: OS::Heat::ResourceGroup
+ properties:
+ count: {get_param: nonha_intances}
+ resource_def:
+ type: OS::Nova::Server
+ properties:
+ name: doctor_nonha_app_%index%
+ flavor: {get_resource: maint_instance_flavor}
+ image: {get_param: maint_image}
+ networks:
+ - network: {get_resource: int_net}
+
+ association:
+ type: OS::Nova::FloatingIPAssociation
+ properties:
+ floating_ip: {get_resource: floating_ip}
+ server_id: {get_attr: [multi_ha_instances, resource.0]}
+
+ app_manager_alarm:
+ type: OS::Aodh::EventAlarm
+ properties:
+ alarm_actions:
+ - {get_param: app_manager_alarm_url}
+ event_type: "maintenance.scheduled"
+ repeat_actions: true
+
+ inpector_alarm:
+ type: OS::Aodh::EventAlarm
+ properties:
+ alarm_actions:
+ - {get_param: inpector_alarm_url}
+ event_type: "maintenance.host"
+ repeat_actions: true
diff --git a/doctor_tests/os_clients.py b/doctor_tests/os_clients.py
index 640281df..7ab4e9b4 100644
--- a/doctor_tests/os_clients.py
+++ b/doctor_tests/os_clients.py
@@ -11,6 +11,7 @@ from oslo_config import cfg
import aodhclient.client as aodhclient
from congressclient.v1 import client as congressclient
import glanceclient.client as glanceclient
+import heatclient.client as heatclient
from keystoneclient import client as ks_client
from neutronclient.v2_0 import client as neutronclient
import novaclient.client as novaclient
@@ -23,6 +24,7 @@ OPTS = [
cfg.StrOpt('aodh_version', default='2', help='aodh version'),
cfg.StrOpt('vitrage_version', default='1', help='vitrage version'),
cfg.StrOpt('keystone_version', default='v3', help='keystone version'),
+ cfg.StrOpt('heat_version', default='1', help='heat version'),
]
@@ -31,6 +33,11 @@ def glance_client(version, session):
session=session)
+def heat_client(version, session):
+ return heatclient.Client(version=version,
+ session=session)
+
+
def keystone_client(version, session):
return ks_client.Client(version=version,
session=session)
diff --git a/doctor_tests/scenario/fault_management.py b/doctor_tests/scenario/fault_management.py
index b1fe8099..f8f53e8e 100644
--- a/doctor_tests/scenario/fault_management.py
+++ b/doctor_tests/scenario/fault_management.py
@@ -32,7 +32,7 @@ dev=$(sudo ip a | awk '/ {compute_ip}\//{{print $NF}}')
sleep 1
sudo ip link set $dev down
echo "doctor set link down at" $(date "+%s.%N")
-sleep 10
+sleep 30
sudo ip link set $dev up
sleep 1
"""
diff --git a/doctor_tests/scenario/maintenance.py b/doctor_tests/scenario/maintenance.py
new file mode 100644
index 00000000..54244d79
--- /dev/null
+++ b/doctor_tests/scenario/maintenance.py
@@ -0,0 +1,192 @@
+##############################################################################
+# Copyright (c) 2018 Nokia Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+import datetime
+import json
+import requests
+import time
+
+from doctor_tests.admin_tool import get_admin_tool
+from doctor_tests.app_manager import get_app_manager
+from doctor_tests.common.utils import get_doctor_test_root_dir
+from doctor_tests.identity_auth import get_identity_auth
+from doctor_tests.identity_auth import get_session
+from doctor_tests.inspector import get_inspector
+from doctor_tests.os_clients import keystone_client
+from doctor_tests.os_clients import neutron_client
+from doctor_tests.os_clients import nova_client
+from doctor_tests.stack import Stack
+
+
+class Maintenance(object):
+
+ def __init__(self, trasport_url, conf, log):
+ self.conf = conf
+ self.log = log
+ self.keystone = keystone_client(
+ self.conf.keystone_version, get_session())
+ self.nova = nova_client(conf.nova_version, get_session())
+ auth = get_identity_auth(project=self.conf.doctor_project)
+ self.neutron = neutron_client(get_session(auth=auth))
+ self.stack = Stack(self.conf, self.log)
+ self.admin_tool = get_admin_tool(trasport_url, self.conf, self.log)
+ self.app_manager = get_app_manager(self.stack, self.conf, self.log)
+ self.inspector = get_inspector(self.conf, self.log)
+
+ def get_external_network(self):
+ ext_net = None
+ networks = self.neutron.list_networks()['networks']
+ for network in networks:
+ if network['router:external']:
+ ext_net = network['name']
+ break
+ if ext_net is None:
+ raise Exception("external network not defined")
+ return ext_net
+
+ def setup_maintenance(self, user):
+ # each hypervisor needs to have same amount of vcpus and they
+ # need to be free before test
+ hvisors = self.nova.hypervisors.list(detailed=True)
+ prev_vcpus = 0
+ prev_hostname = ''
+ self.log.info('checking hypervisors.......')
+ for hvisor in hvisors:
+ vcpus = hvisor.__getattr__('vcpus')
+ vcpus_used = hvisor.__getattr__('vcpus_used')
+ hostname = hvisor.__getattr__('hypervisor_hostname')
+ if vcpus < 2:
+ raise Exception('not enough vcpus (%d) on %s' %
+ (vcpus, hostname))
+ if vcpus_used > 0:
+ raise Exception('%d vcpus used on %s'
+ % (vcpus_used, hostname))
+ if prev_vcpus != 0 and prev_vcpus != vcpus:
+ raise Exception('%d vcpus on %s does not match to'
+ '%d on %s'
+ % (vcpus, hostname,
+ prev_vcpus, prev_hostname))
+ prev_vcpus = vcpus
+ prev_hostname = hostname
+
+ # maintenance flavor made so that 2 instances take whole node
+ flavor_vcpus = int(vcpus / 2)
+ compute_nodes = len(hvisors)
+ amount_actstdby_instances = 2
+ amount_noredundancy_instances = 2 * compute_nodes - 2
+ self.log.info('testing %d computes with %d vcpus each'
+ % (compute_nodes, vcpus))
+ self.log.info('testing %d actstdby and %d noredundancy instances'
+ % (amount_actstdby_instances,
+ amount_noredundancy_instances))
+ max_instances = (amount_actstdby_instances +
+ amount_noredundancy_instances)
+ max_cores = compute_nodes * vcpus
+
+ user.update_quota(max_instances, max_cores)
+
+ test_dir = get_doctor_test_root_dir()
+ template_file = '{0}/{1}'.format(test_dir, 'maintenance_hot_tpl.yaml')
+ files, template = self.stack.get_hot_tpl(template_file)
+
+ ext_net = self.get_external_network()
+
+ parameters = {'ext_net': ext_net,
+ 'flavor_vcpus': flavor_vcpus,
+ 'maint_image': self.conf.image_name,
+ 'nonha_intances': amount_noredundancy_instances,
+ 'ha_intances': amount_actstdby_instances}
+
+ self.log.info('creating maintenance stack.......')
+ self.log.info('parameters: %s' % parameters)
+
+ self.stack.create('doctor_test_maintenance',
+ template,
+ parameters=parameters,
+ files=files)
+
+ self.admin_tool.start()
+ self.app_manager.start()
+ self.inspector.start()
+
+ def start_maintenance(self):
+ self.log.info('start maintenance.......')
+ hvisors = self.nova.hypervisors.list(detailed=True)
+ maintenance_hosts = list()
+ for hvisor in hvisors:
+ hostname = hvisor.__getattr__('hypervisor_hostname')
+ maintenance_hosts.append(hostname)
+
+ url = 'http://0.0.0.0:%s/maintenance' % self.conf.admin_tool.port
+ # let's start maintenance 20sec from now, so projects will have
+ # time to ACK to it before that
+ maintenance_at = (datetime.datetime.utcnow() +
+ datetime.timedelta(seconds=20)
+ ).strftime('%Y-%m-%d %H:%M:%S')
+ data = {'hosts': maintenance_hosts,
+ 'state': 'MAINTENANCE',
+ 'maintenance_at': maintenance_at,
+ 'metadata': {'openstack_version': 'Pike'}}
+ headers = {
+ 'Content-Type': 'application/json',
+ 'Accept': 'application/json'}
+
+ ret = requests.post(url, data=json.dumps(data), headers=headers)
+ if ret.status_code != 200:
+ raise Exception(ret.text)
+ return ret.json()['session_id']
+
+ def remove_maintenance_session(self, session_id):
+ self.log.info('remove maintenance session %s.......' % session_id)
+
+ url = 'http://0.0.0.0:%s/maintenance' % self.conf.admin_tool.port
+
+ data = {'state': 'REMOVE_MAINTENANCE_SESSION',
+ 'session_id': session_id}
+ headers = {
+ 'Content-Type': 'application/json',
+ 'Accept': 'application/json'}
+
+ ret = requests.post(url, data=json.dumps(data), headers=headers)
+ if ret.status_code != 200:
+ raise Exception(ret.text)
+
+ def get_maintenance_state(self, session_id):
+ url = 'http://0.0.0.0:%s/maintenance' % self.conf.admin_tool.port
+ data = {'session_id': session_id}
+ headers = {
+ 'Content-Type': 'application/json',
+ 'Accept': 'application/json'}
+ ret = requests.get(url, data=json.dumps(data), headers=headers)
+ if ret.status_code != 200:
+ raise Exception(ret.text)
+ return ret.json()['state']
+
+ def wait_maintenance_complete(self, session_id):
+ retries = 60
+ state = None
+ time.sleep(600)
+ while state != 'MAINTENANCE_COMPLETE' and retries > 0:
+ time.sleep(10)
+ state = self.get_maintenance_state(session_id)
+ retries = retries - 1
+ if retries == 0 and state != 'MAINTENANCE_COMPLETE':
+ raise Exception('maintenance %s not completed within 20min, status'
+ ' %s' % (session_id, state))
+ elif state == 'MAINTENANCE_COMPLETE':
+ self.log.info('maintenance %s %s' % (session_id, state))
+ self.remove_maintenance_session(session_id)
+ elif state == 'MAINTENANCE_FAILED':
+ raise Exception('maintenance %s failed' % session_id)
+
+ def cleanup_maintenance(self):
+ self.admin_tool.stop()
+ self.app_manager.stop()
+ self.inspector.stop()
+ self.log.info('stack delete start.......')
+ self.stack.delete()
diff --git a/doctor_tests/stack.py b/doctor_tests/stack.py
new file mode 100644
index 00000000..688c2050
--- /dev/null
+++ b/doctor_tests/stack.py
@@ -0,0 +1,106 @@
+##############################################################################
+# Copyright (c) 2018 Nokia Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+import os
+import time
+
+from heatclient.common.template_utils import get_template_contents
+from heatclient import exc as heat_excecption
+
+from doctor_tests.identity_auth import get_identity_auth
+from doctor_tests.identity_auth import get_session
+from doctor_tests.os_clients import heat_client
+
+
+class Stack(object):
+
+ def __init__(self, conf, log):
+ self.conf = conf
+ self.log = log
+ auth = get_identity_auth(project=self.conf.doctor_project)
+ self.heat = heat_client(self.conf.heat_version,
+ get_session(auth=auth))
+ self.stack_name = None
+ self.stack_id = None
+ self.template = None
+ self.parameters = {}
+ self.files = {}
+
+ # standard yaml.load will not work for hot tpl becasue of date format in
+ # heat_template_version is not string
+ def get_hot_tpl(self, template_file):
+ if not os.path.isfile(template_file):
+ raise Exception('File(%s) does not exist' % template_file)
+ return get_template_contents(template_file=template_file)
+
+ def _wait_stack_action_complete(self, action):
+ action_in_progress = '%s_IN_PROGRESS' % action
+ action_complete = '%s_COMPLETE' % action
+ action_failed = '%s_FAILED' % action
+
+ status = action_in_progress
+ stack_retries = 150
+ while status == action_in_progress and stack_retries > 0:
+ time.sleep(2)
+ try:
+ stack = self.heat.stacks.get(self.stack_name)
+ except heat_excecption.HTTPNotFound:
+ if action == 'DELETE':
+ # Might happen you never get status as stack deleted
+ status = action_complete
+ break
+ else:
+ raise Exception('unable to get stack')
+ status = stack.stack_status
+ stack_retries = stack_retries - 1
+ if stack_retries == 0 and status != action_complete:
+ raise Exception("stack %s not completed within 5min, status:"
+ " %s" % (action, status))
+ elif status == action_complete:
+ self.log.info('stack %s %s' % (self.stack_name, status))
+ elif status == action_failed:
+ raise Exception("stack %s failed" % action)
+ else:
+ self.log.error('stack %s %s' % (self.stack_name, status))
+ raise Exception("stack %s unknown result" % action)
+
+ def wait_stack_delete(self):
+ self._wait_stack_action_complete('DELETE')
+
+ def wait_stack_create(self):
+ self._wait_stack_action_complete('CREATE')
+
+ def wait_stack_update(self):
+ self._wait_stack_action_complete('UPDATE')
+
+ def create(self, stack_name, template, parameters={}, files={}):
+ self.stack_name = stack_name
+ self.template = template
+ self.parameters = parameters
+ self.files = files
+ stack = self.heat.stacks.create(stack_name=self.stack_name,
+ files=files,
+ template=template,
+ parameters=parameters)
+ self.stack_id = stack['stack']['id']
+ self.wait_stack_create()
+
+ def update(self, stack_name, stack_id, template, parameters={}, files={}):
+ self.heat.stacks.update(stack_name=stack_name,
+ stack_id=stack_id,
+ files=files,
+ template=template,
+ parameters=parameters)
+ self.wait_stack_update()
+
+ def delete(self):
+ if self.stack_id is not None:
+ self.heat.stacks.delete(self.stack_name)
+ self.wait_stack_delete()
+ else:
+ self.log.info('no stack to delete')
diff --git a/doctor_tests/user.py b/doctor_tests/user.py
index fee3e1fb..29aa004b 100644
--- a/doctor_tests/user.py
+++ b/doctor_tests/user.py
@@ -8,12 +8,12 @@
##############################################################################
import os
+from keystoneclient import exceptions as ks_exceptions
from oslo_config import cfg
from doctor_tests.identity_auth import get_session
from doctor_tests.os_clients import keystone_client
from doctor_tests.os_clients import nova_client
-from keystoneclient import exceptions as ks_exceptions
OPTS = [
@@ -53,10 +53,11 @@ class User(object):
def __init__(self, conf, log):
self.conf = conf
self.log = log
+ self.def_quota = None
+ self.restore_def_quota = False
self.keystone = keystone_client(
self.conf.keystone_version, get_session())
- self.nova = \
- nova_client(conf.nova_version, get_session())
+ self.nova = nova_client(conf.nova_version, get_session())
self.users = {}
self.projects = {}
self.roles = {}
@@ -83,10 +84,9 @@ class User(object):
domain=self.conf.doctor_domain_id)}
if self.conf.doctor_project not in self.projects:
self.log.info('create project......')
- test_project = \
- self.keystone.projects.create(
- self.conf.doctor_project,
- self.conf.doctor_domain_id)
+ test_project = self.keystone.projects.create(
+ self.conf.doctor_project,
+ self.conf.doctor_domain_id)
self.projects[test_project.name] = test_project
else:
self.log.info('project %s already created......'
@@ -151,6 +151,13 @@ class User(object):
self.keystone.roles.grant(role, user=user, project=project)
roles_for_user[role_name] = role
+ def _restore_default_quota(self):
+ if self.def_quota is not None and self.restore_def_quota:
+ self.log.info('restore default quota......')
+ self.nova.quota_classes.update('default',
+ instances=self.def_quota.instances,
+ cores=self.def_quota.cores)
+
def delete(self):
"""delete the test user, project and role"""
self.log.info('user delete start......')
@@ -159,6 +166,8 @@ class User(object):
user = self.users.get(self.conf.doctor_user)
role = self.roles.get(self.conf.doctor_role)
+ self._restore_default_quota()
+
if project:
if 'admin' in self.roles_for_admin:
self.keystone.roles.revoke(
@@ -177,23 +186,45 @@ class User(object):
self.keystone.projects.delete(project)
self.log.info('user delete end......')
- def update_quota(self):
- self.log.info('user quota update start......')
+ def update_quota(self, instances=None, cores=None):
+ self.log.info('quota update start......')
project = self.projects.get(self.conf.doctor_project)
+
user = self.users.get(self.conf.doctor_user)
+ if instances is not None:
+ quota_instances = instances
+ else:
+ quota_instances = self.conf.quota_instances
+ if cores is not None:
+ quota_cores = cores
+ else:
+ quota_cores = self.conf.quota_cores
+
if project and user:
+ # default needs to be at least the same as with doctor_user
+ self.log.info('default quota update start......')
+
+ self.def_quota = self.nova.quota_classes.get('default')
+ if quota_instances > self.def_quota.instances:
+ self.restore_def_quota = True
+ self.nova.quota_classes.update('default',
+ instances=quota_instances)
+ if quota_cores > self.def_quota.cores:
+ self.restore_def_quota = True
+ self.nova.quota_classes.update('default',
+ cores=quota_cores)
+ self.log.info('user quota update start......')
self.quota = self.nova.quotas.get(project.id,
user_id=user.id)
- if self.conf.quota_instances > self.quota.instances:
- self.nova.quotas.update(
- project.id,
- instances=self.conf.quota_instances,
- user_id=user.id)
- if self.conf.quota_cores > self.quota.cores:
+ if quota_instances > self.quota.instances:
+ self.nova.quotas.update(project.id,
+ instances=quota_instances,
+ user_id=user.id)
+ if quota_cores > self.quota.cores:
self.nova.quotas.update(project.id,
- cores=self.conf.quota_cores,
+ cores=quota_cores,
user_id=user.id)
- self.log.info('user quota update end......')
else:
raise Exception('No project or role for update quota')
+ self.log.info('quota update end......')