diff options
Diffstat (limited to 'doctor_tests')
-rw-r--r-- | doctor_tests/admin_tool/__init__.py | 8 | ||||
-rw-r--r-- | doctor_tests/admin_tool/sample.py | 185 | ||||
-rw-r--r-- | doctor_tests/app_manager/sample.py | 7 | ||||
-rw-r--r-- | doctor_tests/installer/apex.py | 34 | ||||
-rw-r--r-- | doctor_tests/installer/base.py | 33 | ||||
-rw-r--r-- | doctor_tests/installer/common/set_config.py | 1 | ||||
-rw-r--r-- | doctor_tests/scenario/maintenance.py | 68 | ||||
-rw-r--r-- | doctor_tests/stack.py | 2 |
8 files changed, 197 insertions, 141 deletions
diff --git a/doctor_tests/admin_tool/__init__.py b/doctor_tests/admin_tool/__init__.py index e8b12817..3417a334 100644 --- a/doctor_tests/admin_tool/__init__.py +++ b/doctor_tests/admin_tool/__init__.py @@ -8,16 +8,16 @@ ############################################################################## from oslo_config import cfg from oslo_utils import importutils - +import os OPTS = [ cfg.StrOpt('type', - default='sample', - choices=['sample'], + default=os.environ.get('ADMIN_TOOL_TYPE', 'sample'), + choices=['sample', 'fenix'], help='the component of doctor admin_tool', required=True), cfg.StrOpt('ip', - default='127.0.0.1', + default='0.0.0.0', help='the ip of admin_tool', required=True), cfg.IntOpt('port', diff --git a/doctor_tests/admin_tool/sample.py b/doctor_tests/admin_tool/sample.py index 892a4c83..a71f43a1 100644 --- a/doctor_tests/admin_tool/sample.py +++ b/doctor_tests/admin_tool/sample.py @@ -59,7 +59,7 @@ class AdminMain(Thread): self.parent = parent self.log = log self.conf = conf - self.url = 'http://0.0.0.0:%s' % conf.admin_tool.port + self.url = 'http://%s:%s' % (conf.admin_tool.ip, conf.admin_tool.port) self.projects_state = dict() # current state for each project self.proj_server_actions = dict() # actions for each project server self.projects_servers = dict() # servers processed in current state @@ -86,6 +86,7 @@ class AdminMain(Thread): driver='messaging', topics=['notifications']) self.notif_admin = self.notif_admin.prepare(publisher_id='admin_tool') + self.stopped = False self.log.info('Admin tool session %s initialized' % self.session_id) def cleanup(self): @@ -116,14 +117,15 @@ class AdminMain(Thread): if self._projects_not_in_wanted_states(wanted_states): self.log.error('Admin tool session %s: projects in invalid states ' '%s' % (self.session_id, self.projects_state)) - raise Exception('Admin tool session %s: not all projects in states' - ' %s' % (self.session_id, wanted_states)) + return False else: self.log.info('all projects replied') + return True def _project_notify(self, project_id, instance_ids, allowed_actions, actions_at, state, metadata): - reply_url = '%s/%s/maintenance' % (self.url, project_id) + reply_url = '%s/maintenance/%s/%s' % (self.url, self.session_id, + project_id) payload = dict(project_id=project_id, instance_ids=instance_ids, @@ -148,11 +150,12 @@ class AdminMain(Thread): self.notif_admin.info({'some': 'context'}, 'maintenance.host', payload) - def down_scale(self): + def in_scale(self): for project in self.projects_servers: - self.log.info('DOWN_SCALE to project %s' % project) + self.log.info('SCALE_IN to project %s' % project) self.log.debug('instance_ids %s' % self.projects_servers[project]) - instance_ids = '%s/%s/maintenance' % (self.url, project) + instance_ids = '%s/maintenance/%s/%s' % (self.url, self.session_id, + project) allowed_actions = [] wait_seconds = 120 actions_at = (datetime.datetime.utcnow() + @@ -163,18 +166,20 @@ class AdminMain(Thread): self._project_notify(project, instance_ids, allowed_actions, actions_at, state, metadata) - allowed_states = ['ACK_DOWN_SCALE', 'NACK_DOWN_SCALE'] - self.wait_projects_state(allowed_states, wait_seconds) - if self.projects_not_in_state('ACK_DOWN_SCALE'): - raise Exception('Admin tool session %s: all states not ' - 'ACK_DOWN_SCALE %s' % - (self.session_id, self.projects_state)) + allowed_states = ['ACK_SCALE_IN', 'NACK_SCALE_IN'] + if not self.wait_projects_state(allowed_states, wait_seconds): + self.state = 'MAINTENANCE_FAILED' + if self.projects_not_in_state('ACK_SCALE_IN'): + self.log.error('%s: all states not ACK_SCALE_IN' % + self.session_id) + self.state = 'MAINTENANCE_FAILED' def maintenance(self): for project in self.projects_servers: self.log.info('\nMAINTENANCE to project %s\n' % project) self.log.debug('instance_ids %s' % self.projects_servers[project]) - instance_ids = '%s/%s/maintenance' % (self.url, project) + instance_ids = '%s/maintenance/%s/%s' % (self.url, self.session_id, + project) allowed_actions = [] actions_at = self.maintenance_at state = self.state @@ -190,16 +195,18 @@ class AdminMain(Thread): allowed_actions, actions_at, state, metadata) allowed_states = ['ACK_MAINTENANCE', 'NACK_MAINTENANCE'] - self.wait_projects_state(allowed_states, wait_seconds) + if not self.wait_projects_state(allowed_states, wait_seconds): + self.state = 'MAINTENANCE_FAILED' if self.projects_not_in_state('ACK_MAINTENANCE'): - raise Exception('Admin tool session %s: all states not ' - 'ACK_MAINTENANCE %s' % - (self.session_id, self.projects_state)) + self.log.error('%s: all states not ACK_MAINTENANCE' % + self.session_id) + self.state = 'MAINTENANCE_FAILED' def maintenance_complete(self): for project in self.projects_servers: self.log.info('MAINTENANCE_COMPLETE to project %s' % project) - instance_ids = '%s/%s/maintenance' % (self.url, project) + instance_ids = '%s/maintenance/%s/%s' % (self.url, self.session_id, + project) allowed_actions = [] wait_seconds = 120 actions_at = (datetime.datetime.utcnow() + @@ -212,13 +219,14 @@ class AdminMain(Thread): metadata) allowed_states = ['ACK_MAINTENANCE_COMPLETE', 'NACK_MAINTENANCE_COMPLETE'] - self.wait_projects_state(allowed_states, wait_seconds) + if not self.wait_projects_state(allowed_states, wait_seconds): + self.state = 'MAINTENANCE_FAILED' if self.projects_not_in_state('ACK_MAINTENANCE_COMPLETE'): - raise Exception('Admin tool session %s: all states not ' - 'ACK_MAINTENANCE_COMPLETE %s' % - (self.session_id, self.projects_state)) + self.log.error('%s: all states not ACK_MAINTENANCE_COMPLETE' % + self.session_id) + self.state = 'MAINTENANCE_FAILED' - def need_down_scale(self, host_servers): + def need_in_scale(self, host_servers): room_for_instances = 0 for host in host_servers: instances = 0 @@ -267,7 +275,8 @@ class AdminMain(Thread): self.projects_servers[project] = projects_servers[project].copy() self.log.info('%s to project %s' % (state, project)) self.project_servers_log_info(project, projects_servers) - instance_ids = '%s/%s/maintenance' % (self.url, project) + instance_ids = '%s/maintenance/%s/%s' % (self.url, self.session_id, + project) allowed_actions = ['MIGRATE', 'LIVE_MIGRATE', 'OWN_ACTION'] wait_seconds = 120 actions_at = (datetime.datetime.utcnow() + @@ -278,11 +287,14 @@ class AdminMain(Thread): allowed_actions, actions_at, state, metadata) allowed_states = [state_ack, state_nack] - self.wait_projects_state(allowed_states, wait_seconds) - if self.projects_not_in_state(state_ack): - raise Exception('Admin tool session %s: all states not %s %s' % - (self.session_id, state_ack, self.projects_state)) - self.actions_to_have_empty_host(host) + if not self.wait_projects_state(allowed_states, wait_seconds): + self.state = 'MAINTENANCE_FAILED' + elif self.projects_not_in_state(state_ack): + self.log.error('%s: all states not %s' % + (self.session_id, state_ack)) + self.state = 'MAINTENANCE_FAILED' + else: + self.actions_to_have_empty_host(host) def notify_action_done(self, project, instance_id): instance_ids = instance_id @@ -463,7 +475,8 @@ class AdminMain(Thread): time.sleep(5) def run(self): - while self.state != 'MAINTENANCE_COMPLETE': + while (self.state not in ['MAINTENANCE_DONE', 'MAINTENANCE_FAILED'] and + not self.stopped): self.log.info('--==session %s: processing state %s==--' % (self.session_id, self.state)) if self.state == 'MAINTENANCE': @@ -474,7 +487,8 @@ class AdminMain(Thread): raise Exception('all projects do not listen maintenance ' 'alarm') self.maintenance() - + if self.state == 'MAINTENANCE_FAILED': + continue maint_at = self.str_to_datetime(self.maintenance_at) if maint_at > datetime.datetime.utcnow(): time_now = (datetime.datetime.utcnow().strftime( @@ -492,14 +506,14 @@ class AdminMain(Thread): # True -> PLANNED_MAINTENANCE # False -> check if we can migrate VMs to get empty host # True -> PREPARE_MAINTENANCE - # False -> DOWN_SCALE + # False -> SCALE_IN maintenance_empty_hosts = ([h for h in self.hosts if h not in host_servers]) if len(maintenance_empty_hosts) == 0: - if self.need_down_scale(host_servers): + if self.need_in_scale(host_servers): self.log.info('Need to down scale') - self.state = 'DOWN_SCALE' + self.state = 'SCALE_IN' else: self.log.info('Free capacity, but need empty host') self.state = 'PREPARE_MAINTENANCE' @@ -508,14 +522,17 @@ class AdminMain(Thread): self.state = 'PLANNED_MAINTENANCE' self.log.info('--==State change from MAINTENANCE to %s==--' % self.state) - elif self.state == 'DOWN_SCALE': + elif self.state == 'SCALE_IN': # Test case is hard coded to have all compute capacity used # We need to down scale to have one empty compute host - self.down_scale() + self.update_server_info() + self.in_scale() + if self.state == 'MAINTENANCE_FAILED': + continue self.state = 'PREPARE_MAINTENANCE' host_servers = self.update_server_info() self.servers_log_info(host_servers) - self.log.info('--==State change from DOWN_SCALE to' + self.log.info('--==State change from SCALE_IN to' ' %s==--' % self.state) elif self.state == 'PREPARE_MAINTENANCE': @@ -527,7 +544,7 @@ class AdminMain(Thread): host_servers]) if len(maintenance_empty_hosts) == 0: self.log.info('no empty hosts for maintenance') - if self.need_down_scale(host_servers): + if self.need_in_scale(host_servers): raise Exception('Admin tool session %s: Not enough ' 'free capacity for maintenance' % self.session_id) @@ -535,6 +552,8 @@ class AdminMain(Thread): if host: self.make_compute_host_empty(host, host_servers[host], 'PREPARE_MAINTENANCE') + if self.state == 'MAINTENANCE_FAILED': + continue else: # We do not currently support another down scale if # first was not enough @@ -566,6 +585,7 @@ class AdminMain(Thread): maintenance_empty_hosts.append(host) self.log.info('--==Start to maintain empty hosts==--\n%s' % maintenance_empty_hosts) + self.update_server_info() for host in maintenance_empty_hosts: # scheduler has problems, let's see if just down scaled # host is really empty @@ -586,6 +606,8 @@ class AdminMain(Thread): self.log.info('PLANNED_MAINTENANCE host %s' % host) self.make_compute_host_empty(host, host_servers[host], 'PLANNED_MAINTENANCE') + if self.state == 'MAINTENANCE_FAILED': + continue self.log.info('IN_MAINTENANCE host %s' % host) self._admin_notify(admin_project, host, 'IN_MAINTENANCE', self.session_id) @@ -603,14 +625,16 @@ class AdminMain(Thread): self.log.info('Projects still need to up scale back to full ' 'capcity') self.maintenance_complete() + if self.state == 'MAINTENANCE_FAILED': + continue host_servers = self.update_server_info() self.servers_log_info(host_servers) - self.state = 'MAINTENANCE_COMPLETE' + self.state = 'MAINTENANCE_DONE' else: raise Exception('Admin tool session %s: session in invalid ' 'state %s' % (self.session_id, self.state)) - self.log.info('--==Maintenance session %s: ' - 'MAINTENANCE SESSION COMPLETE==--' % self.session_id) + self.log.info('--==Maintenance session %s: %s==--' % + (self.session_id, self.state)) def project_input(self, project_id, data): self.log.debug('Admin tool session %s: project %s input' % @@ -637,7 +661,6 @@ class AdminTool(Thread): self.admin_tool = admin_tool self.log = log self.conf = conf - self.port = self.conf.admin_tool.port self.maint_sessions = {} self.projects = {} self.maintenance_hosts = [] @@ -650,63 +673,55 @@ class AdminTool(Thread): def admin_maintenance_api_post(): data = json.loads(request.data.decode('utf8')) self.log.info('maintenance message: %s' % data) - if 'session_id' in data: - if data['state'] == 'REMOVE_MAINTENANCE_SESSION': - session_id = data['session_id'] - self.log.info('remove session %s' - % session_id) - self.maint_sessions[session_id].cleanup() - self.maint_sessions[session_id].stop() - del self.maint_sessions[session_id] - else: - session_id = str(generate_uuid()) - self.log.info('creating session: %s' % session_id) - self.maint_sessions[session_id] = ( - AdminMain(self.trasport_url, - session_id, - data, - self, - self.conf, - self.log)) - self.maint_sessions[session_id].start() + session_id = str(generate_uuid()) + self.log.info('creating session: %s' % session_id) + self.maint_sessions[session_id] = ( + AdminMain(self.trasport_url, + session_id, + data, + self, + self.conf, + self.log)) + self.maint_sessions[session_id].start() reply = json.dumps({'session_id': session_id, 'state': 'ACK_%s' % data['state']}) self.log.debug('reply: %s' % reply) return reply, 200, None - @app.route('/maintenance', methods=['GET']) - def admin_maintenance_api_get(): - data = json.loads(request.data.decode('utf8')) - self.log.debug('Admin get maintenance: %s' % data) - session_id = data['session_id'] + @app.route('/maintenance/<session_id>', methods=['GET']) + def admin_maintenance_api_get(session_id=None): + self.log.debug('Admin get maintenance') reply = json.dumps({'state': self.maint_sessions[session_id].state}) - self.log.debug('reply: %s' % reply) + self.log.info('reply: %s' % reply) return reply, 200, None - @app.route('/<projet_id>/maintenance', methods=['PUT']) - def project_maintenance_api_put(projet_id=None): + @app.route('/maintenance/<session_id>/<projet_id>', methods=['PUT']) + def project_maintenance_api_put(session_id=None, projet_id=None): data = json.loads(request.data.decode('utf8')) self.log.debug('%s project put: %s' % (projet_id, data)) - self.project_input(projet_id, data) + self.project_input(session_id, projet_id, data) return 'OK' - @app.route('/<projet_id>/maintenance', methods=['GET']) - def project_maintenance_api_get(projet_id=None): - data = json.loads(request.data.decode('utf8')) - self.log.debug('%s project get %s' % (projet_id, data)) - instances = self.project_get_instances(projet_id, data) + @app.route('/maintenance/<session_id>/<projet_id>', methods=['GET']) + def project_maintenance_api_get(session_id=None, projet_id=None): + self.log.debug('%s project get %s' % (projet_id, session_id)) + instances = self.project_get_instances(session_id, projet_id) reply = json.dumps({'instance_ids': instances}) self.log.debug('%s reply: %s' % (projet_id, reply)) return reply, 200, None + @app.route('/maintenance/<session_id>', methods=['DELETE']) + def remove_session(session_id=None): + self.log.info('remove session %s' + % session_id) + self.maint_sessions[session_id].cleanup() + self.maint_sessions[session_id].stop() + del self.maint_sessions[session_id] + return 'OK' + @app.route('/shutdown', methods=['POST']) def shutdown(): - for session in self.maint_sessions: - self.log.info('shutdown admin tool session %s thread' % - session) - self.maint_sessions[session].cleanup() - self.maint_sessions[session].stop() self.log.info('shutdown admin_tool server at %s' % time.time()) func = request.environ.get('werkzeug.server.shutdown') if func is None: @@ -714,13 +729,11 @@ class AdminTool(Thread): func() return 'admin_tool app shutting down...' - app.run(host='0.0.0.0', port=self.port) + app.run(host=self.conf.admin_tool.ip, port=self.conf.admin_tool.port) - def project_input(self, project_id, data): - session_id = data['session_id'] + def project_input(self, session_id, project_id, data): self.maint_sessions[session_id].project_input(project_id, data) - def project_get_instances(self, project_id, data): - session_id = data['session_id'] + def project_get_instances(self, session_id, project_id): return self.maint_sessions[session_id].project_get_instances( project_id) diff --git a/doctor_tests/app_manager/sample.py b/doctor_tests/app_manager/sample.py index 94926ee2..a7bc4126 100644 --- a/doctor_tests/app_manager/sample.py +++ b/doctor_tests/app_manager/sample.py @@ -114,8 +114,7 @@ class AppManager(Thread): for t in data['reason_data']['event']['traits']}) def get_session_instance_ids(self, url, session_id): - data = {'session_id': session_id} - ret = requests.get(url, data=json.dumps(data), headers=self.headers) + ret = requests.get(url, data=None, headers=self.headers) if ret.status_code != 200: raise Exception(ret.text) self.log.info('get_instance_ids %s' % ret.json()) @@ -177,12 +176,12 @@ class AppManager(Thread): reply['instance_ids'] = instance_ids reply_state = 'ACK_MAINTENANCE' - elif state == 'DOWN_SCALE': + elif state == 'SCALE_IN': # scale down 2 isntances that is VCPUS equaling to single # compute node self.scale_instances(-2) reply['instance_ids'] = self.get_instance_ids() - reply_state = 'ACK_DOWN_SCALE' + reply_state = 'ACK_SCALE_IN' elif state == 'MAINTENANCE_COMPLETE': # possibly need to upscale diff --git a/doctor_tests/installer/apex.py b/doctor_tests/installer/apex.py index 2aa81ff9..3c97378c 100644 --- a/doctor_tests/installer/apex.py +++ b/doctor_tests/installer/apex.py @@ -114,6 +114,22 @@ class ApexInstaller(BaseInstaller): def set_apply_patches(self): self.log.info('Set apply patches start......') + if self.conf.test_case != 'fault_management': + if self.use_containers: + restart_cmd = self._set_docker_restart_cmd("nova-compute") + else: + restart_cmd = 'sudo systemctl restart' \ + ' openstack-nova-compute.service' + for node_ip in self.computes: + client = SSHClient(node_ip, self.node_user_name, + key_filename=self.key_file) + self.compute_clients.append(client) + self._run_apply_patches(client, + restart_cmd, + [self.nc_set_compute_script], + python=self.python) + time.sleep(10) + set_scripts = [self.cm_set_script] if self.use_containers: @@ -147,24 +163,6 @@ class ApexInstaller(BaseInstaller): set_scripts, python=self.python) - if self.conf.test_case != 'fault_management': - if self.use_containers: - restart_cmd = self._set_docker_restart_cmd("nova-compute") - else: - restart_cmd = 'sudo systemctl restart' \ - ' openstack-nova-compute.service' - for node_ip in self.computes: - client = SSHClient(node_ip, self.node_user_name, - key_filename=self.key_file) - self.compute_clients.append(client) - self._run_apply_patches(client, - restart_cmd, - [self.nc_set_compute_script], - python=self.python) - - if self.conf.test_case != 'fault_management': - time.sleep(10) - def restore_apply_patches(self): self.log.info('restore apply patches start......') diff --git a/doctor_tests/installer/base.py b/doctor_tests/installer/base.py index 30435931..124b1910 100644 --- a/doctor_tests/installer/base.py +++ b/doctor_tests/installer/base.py @@ -14,8 +14,9 @@ import pwd import six import stat import subprocess +import time -from doctor_tests.common.utils import get_doctor_test_root_dir +from doctor_tests.common import utils from doctor_tests.identity_auth import get_session from doctor_tests.os_clients import nova_client @@ -75,7 +76,7 @@ class BaseInstaller(object): cmd = ("ssh -o UserKnownHostsFile=/dev/null" " -o StrictHostKeyChecking=no" " -i %s %s@%s -R %s:localhost:%s" - " sleep %s > ssh_tunnel.%s" + " sleep %s > ssh_tunnel.%s.%s" " 2>&1 < /dev/null " % (self.key_file, self.node_user_name, @@ -83,9 +84,28 @@ class BaseInstaller(object): port, port, tunnel_uptime, - node_ip)) + node_ip, + port)) server = subprocess.Popen('exec ' + cmd, shell=True) self.servers.append(server) + if self.conf.admin_tool.type == 'fenix': + port = self.conf.admin_tool.port + self.log.info('tunnel for port %s' % port) + cmd = ("ssh -o UserKnownHostsFile=/dev/null" + " -o StrictHostKeyChecking=no" + " -i %s %s@%s -L %s:localhost:%s" + " sleep %s > ssh_tunnel.%s.%s" + " 2>&1 < /dev/null " + % (self.key_file, + self.node_user_name, + node_ip, + port, + port, + tunnel_uptime, + node_ip, + port)) + server = subprocess.Popen('exec ' + cmd, shell=True) + self.servers.append(server) def _get_ssh_key(self, client, key_path): self.log.info('Get SSH keys from %s installer......' @@ -96,7 +116,8 @@ class BaseInstaller(object): % self.conf.installer.type) return self.key_file - ssh_key = '{0}/{1}'.format(get_doctor_test_root_dir(), 'instack_key') + ssh_key = '{0}/{1}'.format(utils.get_doctor_test_root_dir(), + 'instack_key') client.scp(key_path, ssh_key, method='get') user = getpass.getuser() uid = pwd.getpwnam(user).pw_uid @@ -131,6 +152,7 @@ class BaseInstaller(object): ret = False return ret + @utils.run_async def _run_apply_patches(self, client, restart_cmd, script_names, python='python3'): installer_dir = os.path.dirname(os.path.realpath(__file__)) @@ -146,4 +168,7 @@ class BaseInstaller(object): raise Exception('Do the command in remote' ' node failed, ret=%s, cmd=%s, output=%s' % (ret, cmd, output)) + if 'nova-scheduler' in restart_cmd: + # Make sure scheduler has proper cpu_allocation_ratio + time.sleep(5) client.ssh(restart_cmd) diff --git a/doctor_tests/installer/common/set_config.py b/doctor_tests/installer/common/set_config.py index 3dc6cd9a..e66d4c2c 100644 --- a/doctor_tests/installer/common/set_config.py +++ b/doctor_tests/installer/common/set_config.py @@ -125,6 +125,7 @@ def set_event_definitions(): 'reply_url': {'fields': 'payload.reply_url'}, 'actions_at': {'fields': 'payload.actions_at', 'type': 'datetime'}, + 'reply_at': {'fields': 'payload.reply_at', 'type': 'datetime'}, 'state': {'fields': 'payload.state'}, 'session_id': {'fields': 'payload.session_id'}, 'project_id': {'fields': 'payload.project_id'}, diff --git a/doctor_tests/scenario/maintenance.py b/doctor_tests/scenario/maintenance.py index 9fcd4128..09795c2a 100644 --- a/doctor_tests/scenario/maintenance.py +++ b/doctor_tests/scenario/maintenance.py @@ -34,7 +34,11 @@ class Maintenance(object): auth = get_identity_auth(project=self.conf.doctor_project) self.neutron = neutron_client(get_session(auth=auth)) self.stack = Stack(self.conf, self.log) - self.admin_tool = get_admin_tool(trasport_url, self.conf, self.log) + if self.conf.admin_tool.type == 'sample': + self.admin_tool = get_admin_tool(trasport_url, self.conf, self.log) + self.endpoint = 'maintenance' + else: + self.endpoint = 'v1/maintenance' self.app_manager = get_app_manager(self.stack, self.conf, self.log) self.inspector = get_inspector(self.conf, self.log) @@ -110,7 +114,11 @@ class Maintenance(object): parameters=parameters, files=files) - self.admin_tool.start() + if self.conf.admin_tool.type == 'sample': + self.admin_tool.start() + else: + # TBD Now we expect Fenix is running in self.conf.admin_tool.port + pass self.app_manager.start() self.inspector.start() @@ -122,16 +130,21 @@ class Maintenance(object): hostname = hvisor.__getattr__('hypervisor_hostname') maintenance_hosts.append(hostname) - url = 'http://0.0.0.0:%s/maintenance' % self.conf.admin_tool.port + url = ('http://%s:%s/%s' % + (self.conf.admin_tool.ip, + self.conf.admin_tool.port, + self.endpoint)) + # let's start maintenance 20sec from now, so projects will have # time to ACK to it before that maintenance_at = (datetime.datetime.utcnow() + - datetime.timedelta(seconds=20) + datetime.timedelta(seconds=30) ).strftime('%Y-%m-%d %H:%M:%S') data = {'hosts': maintenance_hosts, 'state': 'MAINTENANCE', 'maintenance_at': maintenance_at, - 'metadata': {'openstack_version': 'Pike'}} + 'metadata': {'openstack_version': 'Rocky'}, + 'workflow': 'default'} headers = { 'Content-Type': 'application/json', 'Accept': 'application/json'} @@ -143,49 +156,56 @@ class Maintenance(object): def remove_maintenance_session(self, session_id): self.log.info('remove maintenance session %s.......' % session_id) + url = ('http://%s:%s/%s/%s' % + (self.conf.admin_tool.ip, + self.conf.admin_tool.port, + self.endpoint, + session_id)) - url = 'http://0.0.0.0:%s/maintenance' % self.conf.admin_tool.port - - data = {'state': 'REMOVE_MAINTENANCE_SESSION', - 'session_id': session_id} headers = { 'Content-Type': 'application/json', 'Accept': 'application/json'} - ret = requests.post(url, data=json.dumps(data), headers=headers) + ret = requests.delete(url, data=None, headers=headers) if ret.status_code != 200: raise Exception(ret.text) def get_maintenance_state(self, session_id): - url = 'http://0.0.0.0:%s/maintenance' % self.conf.admin_tool.port - data = {'session_id': session_id} + url = ('http://%s:%s/%s/%s' % + (self.conf.admin_tool.ip, + self.conf.admin_tool.port, + self.endpoint, + session_id)) + headers = { 'Content-Type': 'application/json', 'Accept': 'application/json'} - ret = requests.get(url, data=json.dumps(data), headers=headers) + ret = requests.get(url, data=None, headers=headers) if ret.status_code != 200: raise Exception(ret.text) return ret.json()['state'] def wait_maintenance_complete(self, session_id): - retries = 66 + retries = 90 state = None - time.sleep(540) - while state != 'MAINTENANCE_COMPLETE' and retries > 0: + time.sleep(300) + while (state not in ['MAINTENANCE_DONE', 'MAINTENANCE_FAILED'] and + retries > 0): time.sleep(10) state = self.get_maintenance_state(session_id) retries = retries - 1 - if retries == 0 and state != 'MAINTENANCE_COMPLETE': - raise Exception('maintenance %s not completed within 20min, status' - ' %s' % (session_id, state)) - elif state == 'MAINTENANCE_COMPLETE': - self.log.info('maintenance %s %s' % (session_id, state)) - self.remove_maintenance_session(session_id) - elif state == 'MAINTENANCE_FAILED': + self.remove_maintenance_session(session_id) + self.log.info('maintenance %s ended with state %s' % + (session_id, state)) + if state == 'MAINTENANCE_FAILED': raise Exception('maintenance %s failed' % session_id) + elif retries == 0: + raise Exception('maintenance %s not completed within 20min' % + session_id) def cleanup_maintenance(self): - self.admin_tool.stop() + if self.conf.admin_tool.type == 'sample': + self.admin_tool.stop() self.app_manager.stop() self.inspector.stop() self.log.info('stack delete start.......') diff --git a/doctor_tests/stack.py b/doctor_tests/stack.py index ee586fa8..8a921beb 100644 --- a/doctor_tests/stack.py +++ b/doctor_tests/stack.py @@ -94,7 +94,7 @@ class Stack(object): # It might not always work at first self.log.info('retry creating maintenance stack.......') self.delete() - time.sleep(3) + time.sleep(5) stack = self.heat.stacks.create(stack_name=self.stack_name, files=files, template=template, |