summaryrefslogtreecommitdiffstats
path: root/src/ceph/qa/tasks/systemd.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/ceph/qa/tasks/systemd.py')
-rw-r--r--src/ceph/qa/tasks/systemd.py142
1 files changed, 0 insertions, 142 deletions
diff --git a/src/ceph/qa/tasks/systemd.py b/src/ceph/qa/tasks/systemd.py
deleted file mode 100644
index 50471db..0000000
--- a/src/ceph/qa/tasks/systemd.py
+++ /dev/null
@@ -1,142 +0,0 @@
-"""
-Systemd test
-"""
-import contextlib
-import logging
-import re
-import time
-
-from cStringIO import StringIO
-from teuthology.orchestra import run
-from teuthology.misc import reconnect, get_first_mon, wait_until_healthy
-
-log = logging.getLogger(__name__)
-
-
-@contextlib.contextmanager
-def task(ctx, config):
- """
- - tasks:
- ceph-deploy:
- systemd:
-
- Test ceph systemd services can start, stop and restart and
- check for any failed services and report back errors
- """
- for remote, roles in ctx.cluster.remotes.iteritems():
- remote.run(args=['sudo', 'ps', '-eaf', run.Raw('|'),
- 'grep', 'ceph'])
- r = remote.run(args=['sudo', 'systemctl', 'list-units', run.Raw('|'),
- 'grep', 'ceph'], stdout=StringIO(),
- check_status=False)
- log.info(r.stdout.getvalue())
- if r.stdout.getvalue().find('failed'):
- log.info("Ceph services in failed state")
-
- # test overall service stop and start using ceph.target
- # ceph.target tests are meant for ceph systemd tests
- # and not actual process testing using 'ps'
- log.info("Stopping all Ceph services")
- remote.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'])
- r = remote.run(args=['sudo', 'systemctl', 'status', 'ceph.target'],
- stdout=StringIO(), check_status=False)
- log.info(r.stdout.getvalue())
- log.info("Checking process status")
- r = remote.run(args=['sudo', 'ps', '-eaf', run.Raw('|'),
- 'grep', 'ceph'], stdout=StringIO())
- if r.stdout.getvalue().find('Active: inactive'):
- log.info("Sucessfully stopped all ceph services")
- else:
- log.info("Failed to stop ceph services")
-
- log.info("Starting all Ceph services")
- remote.run(args=['sudo', 'systemctl', 'start', 'ceph.target'])
- r = remote.run(args=['sudo', 'systemctl', 'status', 'ceph.target'],
- stdout=StringIO())
- log.info(r.stdout.getvalue())
- if r.stdout.getvalue().find('Active: active'):
- log.info("Sucessfully started all Ceph services")
- else:
- log.info("info", "Failed to start Ceph services")
- r = remote.run(args=['sudo', 'ps', '-eaf', run.Raw('|'),
- 'grep', 'ceph'], stdout=StringIO())
- log.info(r.stdout.getvalue())
- time.sleep(4)
-
- # test individual services start stop
- name = remote.shortname
- mon_name = 'ceph-mon@' + name + '.service'
- mds_name = 'ceph-mds@' + name + '.service'
- mgr_name = 'ceph-mgr@' + name + '.service'
- mon_role_name = 'mon.' + name
- mds_role_name = 'mds.' + name
- mgr_role_name = 'mgr.' + name
- m_osd = re.search('--id (\d+) --setuser ceph', r.stdout.getvalue())
- if m_osd:
- osd_service = 'ceph-osd@{m}.service'.format(m=m_osd.group(1))
- remote.run(args=['sudo', 'systemctl', 'status',
- osd_service])
- remote.run(args=['sudo', 'systemctl', 'stop',
- osd_service])
- time.sleep(4) # immediate check will result in deactivating state
- r = remote.run(args=['sudo', 'systemctl', 'status', osd_service],
- stdout=StringIO(), check_status=False)
- log.info(r.stdout.getvalue())
- if r.stdout.getvalue().find('Active: inactive'):
- log.info("Sucessfully stopped single osd ceph service")
- else:
- log.info("Failed to stop ceph osd services")
- remote.run(args=['sudo', 'systemctl', 'start',
- osd_service])
- time.sleep(4)
- if mon_role_name in roles:
- remote.run(args=['sudo', 'systemctl', 'status', mon_name])
- remote.run(args=['sudo', 'systemctl', 'stop', mon_name])
- time.sleep(4) # immediate check will result in deactivating state
- r = remote.run(args=['sudo', 'systemctl', 'status', mon_name],
- stdout=StringIO(), check_status=False)
- if r.stdout.getvalue().find('Active: inactive'):
- log.info("Sucessfully stopped single mon ceph service")
- else:
- log.info("Failed to stop ceph mon service")
- remote.run(args=['sudo', 'systemctl', 'start', mon_name])
- time.sleep(4)
- if mgr_role_name in roles:
- remote.run(args=['sudo', 'systemctl', 'status', mgr_name])
- remote.run(args=['sudo', 'systemctl', 'stop', mgr_name])
- time.sleep(4) # immediate check will result in deactivating state
- r = remote.run(args=['sudo', 'systemctl', 'status', mgr_name],
- stdout=StringIO(), check_status=False)
- if r.stdout.getvalue().find('Active: inactive'):
- log.info("Sucessfully stopped single ceph mgr service")
- else:
- log.info("Failed to stop ceph mgr service")
- remote.run(args=['sudo', 'systemctl', 'start', mgr_name])
- time.sleep(4)
- if mds_role_name in roles:
- remote.run(args=['sudo', 'systemctl', 'status', mds_name])
- remote.run(args=['sudo', 'systemctl', 'stop', mds_name])
- time.sleep(4) # immediate check will result in deactivating state
- r = remote.run(args=['sudo', 'systemctl', 'status', mds_name],
- stdout=StringIO(), check_status=False)
- if r.stdout.getvalue().find('Active: inactive'):
- log.info("Sucessfully stopped single ceph mds service")
- else:
- log.info("Failed to stop ceph mds service")
- remote.run(args=['sudo', 'systemctl', 'start', mds_name])
- time.sleep(4)
-
- # reboot all nodes and verify the systemd units restart
- # workunit that runs would fail if any of the systemd unit doesnt start
- ctx.cluster.run(args='sudo reboot', wait=False, check_status=False)
- # avoid immediate reconnect
- time.sleep(120)
- reconnect(ctx, 480) # reconnect all nodes
- # for debug info
- ctx.cluster.run(args=['sudo', 'ps', '-eaf', run.Raw('|'),
- 'grep', 'ceph'])
- # wait for HEALTH_OK
- mon = get_first_mon(ctx, config)
- (mon_remote,) = ctx.cluster.only(mon).remotes.iterkeys()
- wait_until_healthy(ctx, mon_remote, use_sudo=True)
- yield