From 7da45d65be36d36b880cc55c5036e96c24b53f00 Mon Sep 17 00:00:00 2001 From: Qiaowei Ren Date: Thu, 1 Mar 2018 14:38:11 +0800 Subject: remove ceph code This patch removes initial ceph code, due to license issue. Change-Id: I092d44f601cdf34aed92300fe13214925563081c Signed-off-by: Qiaowei Ren --- src/ceph/qa/tasks/mon_thrash.py | 343 ---------------------------------------- 1 file changed, 343 deletions(-) delete mode 100644 src/ceph/qa/tasks/mon_thrash.py (limited to 'src/ceph/qa/tasks/mon_thrash.py') diff --git a/src/ceph/qa/tasks/mon_thrash.py b/src/ceph/qa/tasks/mon_thrash.py deleted file mode 100644 index 0754bcd..0000000 --- a/src/ceph/qa/tasks/mon_thrash.py +++ /dev/null @@ -1,343 +0,0 @@ -""" -Monitor thrash -""" -import logging -import contextlib -import ceph_manager -import random -import time -import gevent -import json -import math -from teuthology import misc as teuthology - -log = logging.getLogger(__name__) - -def _get_mons(ctx): - """ - Get monitor names from the context value. - """ - mons = [f[len('mon.'):] for f in teuthology.get_mon_names(ctx)] - return mons - -class MonitorThrasher: - """ - How it works:: - - - pick a monitor - - kill it - - wait for quorum to be formed - - sleep for 'revive_delay' seconds - - revive monitor - - wait for quorum to be formed - - sleep for 'thrash_delay' seconds - - Options:: - - seed Seed to use on the RNG to reproduce a previous - behaviour (default: None; i.e., not set) - revive_delay Number of seconds to wait before reviving - the monitor (default: 10) - thrash_delay Number of seconds to wait in-between - test iterations (default: 0) - thrash_store Thrash monitor store before killing the monitor being thrashed (default: False) - thrash_store_probability Probability of thrashing a monitor's store - (default: 50) - thrash_many Thrash multiple monitors instead of just one. If - 'maintain-quorum' is set to False, then we will - thrash up to as many monitors as there are - available. (default: False) - maintain_quorum Always maintain quorum, taking care on how many - monitors we kill during the thrashing. If we - happen to only have one or two monitors configured, - if this option is set to True, then we won't run - this task as we cannot guarantee maintenance of - quorum. Setting it to false however would allow the - task to run with as many as just one single monitor. - (default: True) - freeze_mon_probability: how often to freeze the mon instead of killing it, - in % (default: 0) - freeze_mon_duration: how many seconds to freeze the mon (default: 15) - scrub Scrub after each iteration (default: True) - - Note: if 'store-thrash' is set to True, then 'maintain-quorum' must also - be set to True. - - For example:: - - tasks: - - ceph: - - mon_thrash: - revive_delay: 20 - thrash_delay: 1 - thrash_store: true - thrash_store_probability: 40 - seed: 31337 - maintain_quorum: true - thrash_many: true - - ceph-fuse: - - workunit: - clients: - all: - - mon/workloadgen.sh - """ - def __init__(self, ctx, manager, config, logger): - self.ctx = ctx - self.manager = manager - self.manager.wait_for_clean() - - self.stopping = False - self.logger = logger - self.config = config - - if self.config is None: - self.config = dict() - - """ Test reproducibility """ - self.random_seed = self.config.get('seed', None) - - if self.random_seed is None: - self.random_seed = int(time.time()) - - self.rng = random.Random() - self.rng.seed(int(self.random_seed)) - - """ Monitor thrashing """ - self.revive_delay = float(self.config.get('revive_delay', 10.0)) - self.thrash_delay = float(self.config.get('thrash_delay', 0.0)) - - self.thrash_many = self.config.get('thrash_many', False) - self.maintain_quorum = self.config.get('maintain_quorum', True) - - self.scrub = self.config.get('scrub', True) - - self.freeze_mon_probability = float(self.config.get('freeze_mon_probability', 10)) - self.freeze_mon_duration = float(self.config.get('freeze_mon_duration', 15.0)) - - assert self.max_killable() > 0, \ - 'Unable to kill at least one monitor with the current config.' - - """ Store thrashing """ - self.store_thrash = self.config.get('store_thrash', False) - self.store_thrash_probability = int( - self.config.get('store_thrash_probability', 50)) - if self.store_thrash: - assert self.store_thrash_probability > 0, \ - 'store_thrash is set, probability must be > 0' - assert self.maintain_quorum, \ - 'store_thrash = true must imply maintain_quorum = true' - - self.thread = gevent.spawn(self.do_thrash) - - def log(self, x): - """ - locally log info messages - """ - self.logger.info(x) - - def do_join(self): - """ - Break out of this processes thrashing loop. - """ - self.stopping = True - self.thread.get() - - def should_thrash_store(self): - """ - If allowed, indicate that we should thrash a certain percentage of - the time as determined by the store_thrash_probability value. - """ - if not self.store_thrash: - return False - return self.rng.randrange(0, 101) < self.store_thrash_probability - - def thrash_store(self, mon): - """ - Thrash the monitor specified. - :param mon: monitor to thrash - """ - addr = self.ctx.ceph['ceph'].conf['mon.%s' % mon]['mon addr'] - self.log('thrashing mon.{id}@{addr} store'.format(id=mon, addr=addr)) - out = self.manager.raw_cluster_cmd('-m', addr, 'sync', 'force') - j = json.loads(out) - assert j['ret'] == 0, \ - 'error forcing store sync on mon.{id}:\n{ret}'.format( - id=mon,ret=out) - - def should_freeze_mon(self): - """ - Indicate that we should freeze a certain percentago of the time - as determined by the freeze_mon_probability value. - """ - return self.rng.randrange(0, 101) < self.freeze_mon_probability - - def freeze_mon(self, mon): - """ - Send STOP signal to freeze the monitor. - """ - log.info('Sending STOP to mon %s', mon) - self.manager.signal_mon(mon, 19) # STOP - - def unfreeze_mon(self, mon): - """ - Send CONT signal to unfreeze the monitor. - """ - log.info('Sending CONT to mon %s', mon) - self.manager.signal_mon(mon, 18) # CONT - - def kill_mon(self, mon): - """ - Kill the monitor specified - """ - self.log('killing mon.{id}'.format(id=mon)) - self.manager.kill_mon(mon) - - def revive_mon(self, mon): - """ - Revive the monitor specified - """ - self.log('killing mon.{id}'.format(id=mon)) - self.log('reviving mon.{id}'.format(id=mon)) - self.manager.revive_mon(mon) - - def max_killable(self): - """ - Return the maximum number of monitors we can kill. - """ - m = len(_get_mons(self.ctx)) - if self.maintain_quorum: - return max(math.ceil(m/2.0)-1, 0) - else: - return m - - def do_thrash(self): - """ - Cotinuously loop and thrash the monitors. - """ - self.log('start thrashing') - self.log('seed: {s}, revive delay: {r}, thrash delay: {t} '\ - 'thrash many: {tm}, maintain quorum: {mq} '\ - 'store thrash: {st}, probability: {stp} '\ - 'freeze mon: prob {fp} duration {fd}'.format( - s=self.random_seed,r=self.revive_delay,t=self.thrash_delay, - tm=self.thrash_many, mq=self.maintain_quorum, - st=self.store_thrash,stp=self.store_thrash_probability, - fp=self.freeze_mon_probability,fd=self.freeze_mon_duration, - )) - - while not self.stopping: - mons = _get_mons(self.ctx) - self.manager.wait_for_mon_quorum_size(len(mons)) - self.log('making sure all monitors are in the quorum') - for m in mons: - s = self.manager.get_mon_status(m) - assert s['state'] == 'leader' or s['state'] == 'peon' - assert len(s['quorum']) == len(mons) - - kill_up_to = self.rng.randrange(1, self.max_killable()+1) - mons_to_kill = self.rng.sample(mons, kill_up_to) - self.log('monitors to thrash: {m}'.format(m=mons_to_kill)) - - mons_to_freeze = [] - for mon in mons: - if mon in mons_to_kill: - continue - if self.should_freeze_mon(): - mons_to_freeze.append(mon) - self.log('monitors to freeze: {m}'.format(m=mons_to_freeze)) - - for mon in mons_to_kill: - self.log('thrashing mon.{m}'.format(m=mon)) - - """ we only thrash stores if we are maintaining quorum """ - if self.should_thrash_store() and self.maintain_quorum: - self.thrash_store(mon) - - self.kill_mon(mon) - - if mons_to_freeze: - for mon in mons_to_freeze: - self.freeze_mon(mon) - self.log('waiting for {delay} secs to unfreeze mons'.format( - delay=self.freeze_mon_duration)) - time.sleep(self.freeze_mon_duration) - for mon in mons_to_freeze: - self.unfreeze_mon(mon) - - if self.maintain_quorum: - self.manager.wait_for_mon_quorum_size(len(mons)-len(mons_to_kill)) - for m in mons: - if m in mons_to_kill: - continue - s = self.manager.get_mon_status(m) - assert s['state'] == 'leader' or s['state'] == 'peon' - assert len(s['quorum']) == len(mons)-len(mons_to_kill) - - self.log('waiting for {delay} secs before reviving monitors'.format( - delay=self.revive_delay)) - time.sleep(self.revive_delay) - - for mon in mons_to_kill: - self.revive_mon(mon) - # do more freezes - if mons_to_freeze: - for mon in mons_to_freeze: - self.freeze_mon(mon) - self.log('waiting for {delay} secs to unfreeze mons'.format( - delay=self.freeze_mon_duration)) - time.sleep(self.freeze_mon_duration) - for mon in mons_to_freeze: - self.unfreeze_mon(mon) - - self.manager.wait_for_mon_quorum_size(len(mons)) - for m in mons: - s = self.manager.get_mon_status(m) - assert s['state'] == 'leader' or s['state'] == 'peon' - assert len(s['quorum']) == len(mons) - - if self.scrub: - self.log('triggering scrub') - try: - self.manager.raw_cluster_cmd('scrub') - except Exception: - log.exception("Saw exception while triggering scrub") - - if self.thrash_delay > 0.0: - self.log('waiting for {delay} secs before continuing thrashing'.format( - delay=self.thrash_delay)) - time.sleep(self.thrash_delay) - -@contextlib.contextmanager -def task(ctx, config): - """ - Stress test the monitor by thrashing them while another task/workunit - is running. - - Please refer to MonitorThrasher class for further information on the - available options. - """ - if config is None: - config = {} - assert isinstance(config, dict), \ - 'mon_thrash task only accepts a dict for configuration' - assert len(_get_mons(ctx)) > 2, \ - 'mon_thrash task requires at least 3 monitors' - log.info('Beginning mon_thrash...') - first_mon = teuthology.get_first_mon(ctx, config) - (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() - manager = ceph_manager.CephManager( - mon, - ctx=ctx, - logger=log.getChild('ceph_manager'), - ) - thrash_proc = MonitorThrasher(ctx, - manager, config, - logger=log.getChild('mon_thrasher')) - try: - log.debug('Yielding') - yield - finally: - log.info('joining mon_thrasher') - thrash_proc.do_join() - mons = _get_mons(ctx) - manager.wait_for_mon_quorum_size(len(mons)) -- cgit 1.2.3-korg