initial code repo

This patch creates initial code repo. For ceph, luminous stable release will be used for base code, and next changes and optimization for ceph will be added to it. For opensds, currently any changes can be upstreamed into original opensds repo (https://github.com/opensds/opensds), and so stor4nfv will directly clone opensds code to deploy stor4nfv environment. And the scripts for deployment based on ceph and opensds will be put into 'ci' directory. Change-Id: I46a32218884c75dda2936337604ff03c554648e4 Signed-off-by: Qiaowei Ren <qiaowei.ren@intel.com>
author: Qiaowei Ren <qiaowei.ren@intel.com> 2018-01-04 13:43:33 +0800
committer: Qiaowei Ren <qiaowei.ren@intel.com> 2018-01-05 11:59:39 +0800
commit: 812ff6ca9fcd3e629e49d4328905f33eee8ca3f5 (patch)
tree: 04ece7b4da00d9d2f98093774594f4057ae561d4 /src/ceph/qa/tasks/thrashosds.py
parent: 15280273faafb77777eab341909a3f495cf248d9 (diff)
1 files changed, 204 insertions, 0 deletions
diff --git a/src/ceph/qa/tasks/thrashosds.py b/src/ceph/qa/tasks/thrashosds.py
new file mode 100644
index 0000000..420b735
--- /dev/null
+++ b/src/ceph/qa/tasks/thrashosds.py
@@ -0,0 +1,204 @@
+"""
+Thrash -- Simulate random osd failures.
+"""
+import contextlib
+import logging
+import ceph_manager
+from teuthology import misc as teuthology
+
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    "Thrash" the OSDs by randomly marking them out/down (and then back
+    in) until the task is ended. This loops, and every op_delay
+    seconds it randomly chooses to add or remove an OSD (even odds)
+    unless there are fewer than min_out OSDs out of the cluster, or
+    more than min_in OSDs in the cluster.
+
+    All commands are run on mon0 and it stops when __exit__ is called.
+
+    The config is optional, and is a dict containing some or all of:
+
+    cluster: (default 'ceph') the name of the cluster to thrash
+
+    min_in: (default 4) the minimum number of OSDs to keep in the
+       cluster
+
+    min_out: (default 0) the minimum number of OSDs to keep out of the
+       cluster
+
+    op_delay: (5) the length of time to sleep between changing an
+       OSD's status
+
+    min_dead: (0) minimum number of osds to leave down/dead.
+
+    max_dead: (0) maximum number of osds to leave down/dead before waiting
+       for clean.  This should probably be num_replicas - 1.
+
+    clean_interval: (60) the approximate length of time to loop before
+       waiting until the cluster goes clean. (In reality this is used
+       to probabilistically choose when to wait, and the method used
+       makes it closer to -- but not identical to -- the half-life.)
+
+    scrub_interval: (-1) the approximate length of time to loop before
+       waiting until a scrub is performed while cleaning. (In reality
+       this is used to probabilistically choose when to wait, and it
+       only applies to the cases where cleaning is being performed).
+       -1 is used to indicate that no scrubbing will be done.
+
+    chance_down: (0.4) the probability that the thrasher will mark an
+       OSD down rather than marking it out. (The thrasher will not
+       consider that OSD out of the cluster, since presently an OSD
+       wrongly marked down will mark itself back up again.) This value
+       can be either an integer (eg, 75) or a float probability (eg
+       0.75).
+
+    chance_test_min_size: (0) chance to run test_pool_min_size,
+       which:
+       - kills all but one osd
+       - waits
+       - kills that osd
+       - revives all other osds
+       - verifies that the osds fully recover
+
+    timeout: (360) the number of seconds to wait for the cluster
+       to become clean after each cluster change. If this doesn't
+       happen within the timeout, an exception will be raised.
+
+    revive_timeout: (150) number of seconds to wait for an osd asok to
+       appear after attempting to revive the osd
+
+    thrash_primary_affinity: (true) randomly adjust primary-affinity
+
+    chance_pgnum_grow: (0) chance to increase a pool's size
+    chance_pgpnum_fix: (0) chance to adjust pgpnum to pg for a pool
+    pool_grow_by: (10) amount to increase pgnum by
+    max_pgs_per_pool_osd: (1200) don't expand pools past this size per osd
+
+    pause_short: (3) duration of short pause
+    pause_long: (80) duration of long pause
+    pause_check_after: (50) assert osd down after this long
+    chance_inject_pause_short: (1) chance of injecting short stall
+    chance_inject_pause_long: (0) chance of injecting long stall
+
+    clean_wait: (0) duration to wait before resuming thrashing once clean
+
+    sighup_delay: (0.1) duration to delay between sending signal.SIGHUP to a
+                  random live osd
+
+    powercycle: (false) whether to power cycle the node instead
+        of just the osd process. Note that this assumes that a single
+        osd is the only important process on the node.
+
+    bdev_inject_crash: (0) seconds to delay while inducing a synthetic crash.
+        the delay lets the BlockDevice "accept" more aio operations but blocks
+        any flush, and then eventually crashes (losing some or all ios).  If 0,
+        no bdev failure injection is enabled.
+
+    bdev_inject_crash_probability: (.5) probability of doing a bdev failure
+        injection crash vs a normal OSD kill.
+
+    chance_test_backfill_full: (0) chance to simulate full disks stopping
+        backfill
+
+    chance_test_map_discontinuity: (0) chance to test map discontinuity
+    map_discontinuity_sleep_time: (40) time to wait for map trims
+
+    ceph_objectstore_tool: (true) whether to export/import a pg while an osd is down
+    chance_move_pg: (1.0) chance of moving a pg if more than 1 osd is down (default 100%)
+
+    optrack_toggle_delay: (2.0) duration to delay between toggling op tracker
+                  enablement to all osds
+
+    dump_ops_enable: (true) continuously dump ops on all live osds
+
+    noscrub_toggle_delay: (2.0) duration to delay between toggling noscrub
+
+    disable_objectstore_tool_tests: (false) disable ceph_objectstore_tool based
+                                    tests
+
+    chance_thrash_cluster_full: .05
+
+    chance_thrash_pg_upmap: 1.0
+    chance_thrash_pg_upmap_items: 1.0
+
+    example:
+
+    tasks:
+    - ceph:
+    - thrashosds:
+        cluster: ceph
+        chance_down: 10
+        op_delay: 3
+        min_in: 1
+        timeout: 600
+    - interactive:
+    """
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'thrashosds task only accepts a dict for configuration'
+    # add default value for sighup_delay
+    config['sighup_delay'] = config.get('sighup_delay', 0.1)
+    # add default value for optrack_toggle_delay
+    config['optrack_toggle_delay'] = config.get('optrack_toggle_delay', 2.0)
+    # add default value for dump_ops_enable
+    config['dump_ops_enable'] = config.get('dump_ops_enable', "true")
+    # add default value for noscrub_toggle_delay
+    config['noscrub_toggle_delay'] = config.get('noscrub_toggle_delay', 2.0)
+    # add default value for random_eio
+    config['random_eio'] = config.get('random_eio', 0.0)
+
+    log.info("config is {config}".format(config=str(config)))
+
+    overrides = ctx.config.get('overrides', {})
+    log.info("overrides is {overrides}".format(overrides=str(overrides)))
+    teuthology.deep_merge(config, overrides.get('thrashosds', {}))
+    cluster = config.get('cluster', 'ceph')
+
+    log.info("config is {config}".format(config=str(config)))
+
+    if 'powercycle' in config:
+
+        # sync everyone first to avoid collateral damage to / etc.
+        log.info('Doing preliminary sync to avoid collateral damage...')
+        ctx.cluster.run(args=['sync'])
+
+        if 'ipmi_user' in ctx.teuthology_config:
+            for remote in ctx.cluster.remotes.keys():
+                log.debug('checking console status of %s' % remote.shortname)
+                if not remote.console.check_status():
+                    log.warn('Failed to get console status for %s',
+                             remote.shortname)
+
+            # check that all osd remotes have a valid console
+            osds = ctx.cluster.only(teuthology.is_type('osd', cluster))
+            for remote in osds.remotes.keys():
+                if not remote.console.has_ipmi_credentials:
+                    raise Exception(
+                        'IPMI console required for powercycling, '
+                        'but not available on osd role: {r}'.format(
+                            r=remote.name))
+
+    cluster_manager = ctx.managers[cluster]
+    for f in ['powercycle', 'bdev_inject_crash']:
+        if config.get(f):
+            cluster_manager.config[f] = config.get(f)
+
+    log.info('Beginning thrashosds...')
+    thrash_proc = ceph_manager.Thrasher(
+        cluster_manager,
+        config,
+        logger=log.getChild('thrasher')
+        )
+    try:
+        yield
+    finally:
+        log.info('joining thrashosds')
+        thrash_proc.do_join()
+        cluster_manager.wait_for_all_osds_up()
+        cluster_manager.flush_all_pg_stats()
+        cluster_manager.wait_for_recovery(config.get('timeout', 360))
author	Qiaowei Ren <qiaowei.ren@intel.com>	2018-01-04 13:43:33 +0800
committer	Qiaowei Ren <qiaowei.ren@intel.com>	2018-01-05 11:59:39 +0800
commit	812ff6ca9fcd3e629e49d4328905f33eee8ca3f5 (patch)
tree	04ece7b4da00d9d2f98093774594f4057ae561d4 /src/ceph/qa/tasks/thrashosds.py
parent	15280273faafb77777eab341909a3f495cf248d9 (diff)