summaryrefslogtreecommitdiffstats
path: root/src/ceph/qa/tasks/cephfs/test_damage.py
diff options
context:
space:
mode:
authorQiaowei Ren <qiaowei.ren@intel.com>2018-01-04 13:43:33 +0800
committerQiaowei Ren <qiaowei.ren@intel.com>2018-01-05 11:59:39 +0800
commit812ff6ca9fcd3e629e49d4328905f33eee8ca3f5 (patch)
tree04ece7b4da00d9d2f98093774594f4057ae561d4 /src/ceph/qa/tasks/cephfs/test_damage.py
parent15280273faafb77777eab341909a3f495cf248d9 (diff)
initial code repo
This patch creates initial code repo. For ceph, luminous stable release will be used for base code, and next changes and optimization for ceph will be added to it. For opensds, currently any changes can be upstreamed into original opensds repo (https://github.com/opensds/opensds), and so stor4nfv will directly clone opensds code to deploy stor4nfv environment. And the scripts for deployment based on ceph and opensds will be put into 'ci' directory. Change-Id: I46a32218884c75dda2936337604ff03c554648e4 Signed-off-by: Qiaowei Ren <qiaowei.ren@intel.com>
Diffstat (limited to 'src/ceph/qa/tasks/cephfs/test_damage.py')
-rw-r--r--src/ceph/qa/tasks/cephfs/test_damage.py548
1 files changed, 548 insertions, 0 deletions
diff --git a/src/ceph/qa/tasks/cephfs/test_damage.py b/src/ceph/qa/tasks/cephfs/test_damage.py
new file mode 100644
index 0000000..380b49c
--- /dev/null
+++ b/src/ceph/qa/tasks/cephfs/test_damage.py
@@ -0,0 +1,548 @@
+import json
+import logging
+import errno
+import re
+from teuthology.contextutil import MaxWhileTries
+from teuthology.exceptions import CommandFailedError
+from teuthology.orchestra.run import wait
+from tasks.cephfs.fuse_mount import FuseMount
+from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
+
+DAMAGED_ON_START = "damaged_on_start"
+DAMAGED_ON_LS = "damaged_on_ls"
+CRASHED = "server crashed"
+NO_DAMAGE = "no damage"
+FAILED_CLIENT = "client failed"
+FAILED_SERVER = "server failed"
+
+# An EIO in response to a stat from the client
+EIO_ON_LS = "eio"
+
+# An EIO, but nothing in damage table (not ever what we expect)
+EIO_NO_DAMAGE = "eio without damage entry"
+
+
+log = logging.getLogger(__name__)
+
+
+class TestDamage(CephFSTestCase):
+ def _simple_workload_write(self):
+ self.mount_a.run_shell(["mkdir", "subdir"])
+ self.mount_a.write_n_mb("subdir/sixmegs", 6)
+ return self.mount_a.stat("subdir/sixmegs")
+
+ def is_marked_damaged(self, rank):
+ mds_map = self.fs.get_mds_map()
+ return rank in mds_map['damaged']
+
+ @for_teuthology #459s
+ def test_object_deletion(self):
+ """
+ That the MDS has a clean 'damaged' response to loss of any single metadata object
+ """
+
+ self._simple_workload_write()
+
+ # Hmm, actually it would be nice to permute whether the metadata pool
+ # state contains sessions or not, but for the moment close this session
+ # to avoid waiting through reconnect on every MDS start.
+ self.mount_a.umount_wait()
+ for mds_name in self.fs.get_active_names():
+ self.fs.mds_asok(["flush", "journal"], mds_name)
+
+ self.fs.mds_stop()
+ self.fs.mds_fail()
+
+ self.fs.rados(['export', '/tmp/metadata.bin'])
+
+ def is_ignored(obj_id, dentry=None):
+ """
+ A filter to avoid redundantly mutating many similar objects (e.g.
+ stray dirfrags) or similar dentries (e.g. stray dir dentries)
+ """
+ if re.match("60.\.00000000", obj_id) and obj_id != "600.00000000":
+ return True
+
+ if dentry and obj_id == "100.00000000":
+ if re.match("stray.+_head", dentry) and dentry != "stray0_head":
+ return True
+
+ return False
+
+ def get_path(obj_id, dentry=None):
+ """
+ What filesystem path does this object or dentry correspond to? i.e.
+ what should I poke to see EIO after damaging it?
+ """
+
+ if obj_id == "1.00000000" and dentry == "subdir_head":
+ return "./subdir"
+ elif obj_id == "10000000000.00000000" and dentry == "sixmegs_head":
+ return "./subdir/sixmegs"
+
+ # None means ls will do an "ls -R" in hope of seeing some errors
+ return None
+
+ objects = self.fs.rados(["ls"]).split("\n")
+ objects = [o for o in objects if not is_ignored(o)]
+
+ # Find all objects with an OMAP header
+ omap_header_objs = []
+ for o in objects:
+ header = self.fs.rados(["getomapheader", o])
+ # The rados CLI wraps the header output in a hex-printed style
+ header_bytes = int(re.match("header \((.+) bytes\)", header).group(1))
+ if header_bytes > 0:
+ omap_header_objs.append(o)
+
+ # Find all OMAP key/vals
+ omap_keys = []
+ for o in objects:
+ keys_str = self.fs.rados(["listomapkeys", o])
+ if keys_str:
+ for key in keys_str.split("\n"):
+ if not is_ignored(o, key):
+ omap_keys.append((o, key))
+
+ # Find objects that have data in their bodies
+ data_objects = []
+ for obj_id in objects:
+ stat_out = self.fs.rados(["stat", obj_id])
+ size = int(re.match(".+, size (.+)$", stat_out).group(1))
+ if size > 0:
+ data_objects.append(obj_id)
+
+ # Define the various forms of damage we will inflict
+ class MetadataMutation(object):
+ def __init__(self, obj_id_, desc_, mutate_fn_, expectation_, ls_path=None):
+ self.obj_id = obj_id_
+ self.desc = desc_
+ self.mutate_fn = mutate_fn_
+ self.expectation = expectation_
+ if ls_path is None:
+ self.ls_path = "."
+ else:
+ self.ls_path = ls_path
+
+ def __eq__(self, other):
+ return self.desc == other.desc
+
+ def __hash__(self):
+ return hash(self.desc)
+
+ junk = "deadbeef" * 10
+ mutations = []
+
+ # Removals
+ for obj_id in objects:
+ if obj_id in [
+ # JournalPointers are auto-replaced if missing (same path as upgrade)
+ "400.00000000",
+ # Missing dirfrags for non-system dirs result in empty directory
+ "10000000000.00000000",
+ # PurgeQueue is auto-created if not found on startup
+ "500.00000000"
+ ]:
+ expectation = NO_DAMAGE
+ else:
+ expectation = DAMAGED_ON_START
+
+ log.info("Expectation on rm '{0}' will be '{1}'".format(
+ obj_id, expectation
+ ))
+
+ mutations.append(MetadataMutation(
+ obj_id,
+ "Delete {0}".format(obj_id),
+ lambda o=obj_id: self.fs.rados(["rm", o]),
+ expectation
+ ))
+
+ # Blatant corruptions
+ mutations.extend([
+ MetadataMutation(
+ o,
+ "Corrupt {0}".format(o),
+ lambda o=o: self.fs.rados(["put", o, "-"], stdin_data=junk),
+ DAMAGED_ON_START
+ ) for o in data_objects
+ ])
+
+ # Truncations
+ for obj_id in data_objects:
+ if obj_id == "500.00000000":
+ # The PurgeQueue is allowed to be empty: Journaler interprets
+ # an empty header object as an empty journal.
+ expectation = NO_DAMAGE
+ else:
+ expectation = DAMAGED_ON_START
+
+ mutations.append(
+ MetadataMutation(
+ o,
+ "Truncate {0}".format(o),
+ lambda o=o: self.fs.rados(["truncate", o, "0"]),
+ DAMAGED_ON_START
+ ))
+
+ # OMAP value corruptions
+ for o, k in omap_keys:
+ if o.startswith("100."):
+ # Anything in rank 0's 'mydir'
+ expectation = DAMAGED_ON_START
+ else:
+ expectation = EIO_ON_LS
+
+ mutations.append(
+ MetadataMutation(
+ o,
+ "Corrupt omap key {0}:{1}".format(o, k),
+ lambda o=o,k=k: self.fs.rados(["setomapval", o, k, junk]),
+ expectation,
+ get_path(o, k)
+ )
+ )
+
+ # OMAP header corruptions
+ for obj_id in omap_header_objs:
+ if re.match("60.\.00000000", obj_id) \
+ or obj_id in ["1.00000000", "100.00000000", "mds0_sessionmap"]:
+ expectation = DAMAGED_ON_START
+ else:
+ expectation = NO_DAMAGE
+
+ log.info("Expectation on corrupt header '{0}' will be '{1}'".format(
+ obj_id, expectation
+ ))
+
+ mutations.append(
+ MetadataMutation(
+ obj_id,
+ "Corrupt omap header on {0}".format(obj_id),
+ lambda o=obj_id: self.fs.rados(["setomapheader", o, junk]),
+ expectation
+ )
+ )
+
+ results = {}
+
+ for mutation in mutations:
+ log.info("Applying mutation '{0}'".format(mutation.desc))
+
+ # Reset MDS state
+ self.mount_a.umount_wait(force=True)
+ self.fs.mds_stop()
+ self.fs.mds_fail()
+ self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
+
+ # Reset RADOS pool state
+ self.fs.rados(['import', '/tmp/metadata.bin'])
+
+ # Inject the mutation
+ mutation.mutate_fn()
+
+ # Try starting the MDS
+ self.fs.mds_restart()
+
+ # How long we'll wait between starting a daemon and expecting
+ # it to make it through startup, and potentially declare itself
+ # damaged to the mon cluster.
+ startup_timeout = 60
+
+ if mutation.expectation not in (EIO_ON_LS, DAMAGED_ON_LS, NO_DAMAGE):
+ if mutation.expectation == DAMAGED_ON_START:
+ # The MDS may pass through active before making it to damaged
+ try:
+ self.wait_until_true(lambda: self.is_marked_damaged(0), startup_timeout)
+ except RuntimeError:
+ pass
+
+ # Wait for MDS to either come up or go into damaged state
+ try:
+ self.wait_until_true(lambda: self.is_marked_damaged(0) or self.fs.are_daemons_healthy(), startup_timeout)
+ except RuntimeError:
+ crashed = False
+ # Didn't make it to healthy or damaged, did it crash?
+ for daemon_id, daemon in self.fs.mds_daemons.items():
+ if daemon.proc and daemon.proc.finished:
+ crashed = True
+ log.error("Daemon {0} crashed!".format(daemon_id))
+ daemon.proc = None # So that subsequent stop() doesn't raise error
+ if not crashed:
+ # Didn't go health, didn't go damaged, didn't crash, so what?
+ raise
+ else:
+ log.info("Result: Mutation '{0}' led to crash".format(mutation.desc))
+ results[mutation] = CRASHED
+ continue
+ if self.is_marked_damaged(0):
+ log.info("Result: Mutation '{0}' led to DAMAGED state".format(mutation.desc))
+ results[mutation] = DAMAGED_ON_START
+ continue
+ else:
+ log.info("Mutation '{0}' did not prevent MDS startup, attempting ls...".format(mutation.desc))
+ else:
+ try:
+ self.wait_until_true(self.fs.are_daemons_healthy, 60)
+ except RuntimeError:
+ log.info("Result: Mutation '{0}' should have left us healthy, actually not.".format(mutation.desc))
+ if self.is_marked_damaged(0):
+ results[mutation] = DAMAGED_ON_START
+ else:
+ results[mutation] = FAILED_SERVER
+ continue
+ log.info("Daemons came up after mutation '{0}', proceeding to ls".format(mutation.desc))
+
+ # MDS is up, should go damaged on ls or client mount
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+ if mutation.ls_path == ".":
+ proc = self.mount_a.run_shell(["ls", "-R", mutation.ls_path], wait=False)
+ else:
+ proc = self.mount_a.stat(mutation.ls_path, wait=False)
+
+ if mutation.expectation == DAMAGED_ON_LS:
+ try:
+ self.wait_until_true(lambda: self.is_marked_damaged(0), 60)
+ log.info("Result: Mutation '{0}' led to DAMAGED state after ls".format(mutation.desc))
+ results[mutation] = DAMAGED_ON_LS
+ except RuntimeError:
+ if self.fs.are_daemons_healthy():
+ log.error("Result: Failed to go damaged on mutation '{0}', actually went active".format(
+ mutation.desc))
+ results[mutation] = NO_DAMAGE
+ else:
+ log.error("Result: Failed to go damaged on mutation '{0}'".format(mutation.desc))
+ results[mutation] = FAILED_SERVER
+
+ else:
+ try:
+ wait([proc], 20)
+ log.info("Result: Mutation '{0}' did not caused DAMAGED state".format(mutation.desc))
+ results[mutation] = NO_DAMAGE
+ except MaxWhileTries:
+ log.info("Result: Failed to complete client IO on mutation '{0}'".format(mutation.desc))
+ results[mutation] = FAILED_CLIENT
+ except CommandFailedError as e:
+ if e.exitstatus == errno.EIO:
+ log.info("Result: EIO on client")
+ results[mutation] = EIO_ON_LS
+ else:
+ log.info("Result: unexpected error {0} on client".format(e))
+ results[mutation] = FAILED_CLIENT
+
+ if mutation.expectation == EIO_ON_LS:
+ # EIOs mean something handled by DamageTable: assert that it has
+ # been populated
+ damage = json.loads(
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), "damage", "ls", '--format=json-pretty'))
+ if len(damage) == 0:
+ results[mutation] = EIO_NO_DAMAGE
+
+ failures = [(mutation, result) for (mutation, result) in results.items() if mutation.expectation != result]
+ if failures:
+ log.error("{0} mutations had unexpected outcomes:".format(len(failures)))
+ for mutation, result in failures:
+ log.error(" Expected '{0}' actually '{1}' from '{2}'".format(
+ mutation.expectation, result, mutation.desc
+ ))
+ raise RuntimeError("{0} mutations had unexpected outcomes".format(len(failures)))
+ else:
+ log.info("All {0} mutations had expected outcomes".format(len(mutations)))
+
+ def test_damaged_dentry(self):
+ # Damage to dentrys is interesting because it leaves the
+ # directory's `complete` flag in a subtle state where
+ # we have marked the dir complete in order that folks
+ # can access it, but in actual fact there is a dentry
+ # missing
+ self.mount_a.run_shell(["mkdir", "subdir/"])
+
+ self.mount_a.run_shell(["touch", "subdir/file_undamaged"])
+ self.mount_a.run_shell(["touch", "subdir/file_to_be_damaged"])
+
+ subdir_ino = self.mount_a.path_to_ino("subdir")
+
+ self.mount_a.umount_wait()
+ for mds_name in self.fs.get_active_names():
+ self.fs.mds_asok(["flush", "journal"], mds_name)
+
+ self.fs.mds_stop()
+ self.fs.mds_fail()
+
+ # Corrupt a dentry
+ junk = "deadbeef" * 10
+ dirfrag_obj = "{0:x}.00000000".format(subdir_ino)
+ self.fs.rados(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
+
+ # Start up and try to list it
+ self.fs.mds_restart()
+ self.fs.wait_for_daemons()
+
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+ dentries = self.mount_a.ls("subdir/")
+
+ # The damaged guy should have disappeared
+ self.assertEqual(dentries, ["file_undamaged"])
+
+ # I should get ENOENT if I try and read it normally, because
+ # the dir is considered complete
+ try:
+ self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.ENOENT)
+ else:
+ raise AssertionError("Expected ENOENT")
+
+ # The fact that there is damaged should have bee recorded
+ damage = json.loads(
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+ "damage", "ls", '--format=json-pretty'))
+ self.assertEqual(len(damage), 1)
+ damage_id = damage[0]['id']
+
+ # If I try to create a dentry with the same name as the damaged guy
+ # then that should be forbidden
+ try:
+ self.mount_a.touch("subdir/file_to_be_damaged")
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.EIO)
+ else:
+ raise AssertionError("Expected EIO")
+
+ # Attempting that touch will clear the client's complete flag, now
+ # when I stat it I'll get EIO instead of ENOENT
+ try:
+ self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
+ except CommandFailedError as e:
+ if isinstance(self.mount_a, FuseMount):
+ self.assertEqual(e.exitstatus, errno.EIO)
+ else:
+ # Kernel client handles this case differently
+ self.assertEqual(e.exitstatus, errno.ENOENT)
+ else:
+ raise AssertionError("Expected EIO")
+
+ nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
+ self.assertEqual(nfiles, "2")
+
+ self.mount_a.umount_wait()
+
+ # Now repair the stats
+ scrub_json = self.fs.mds_asok(["scrub_path", "/subdir", "repair"])
+ log.info(json.dumps(scrub_json, indent=2))
+
+ self.assertEqual(scrub_json["passed_validation"], False)
+ self.assertEqual(scrub_json["raw_stats"]["checked"], True)
+ self.assertEqual(scrub_json["raw_stats"]["passed"], False)
+
+ # Check that the file count is now correct
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+ nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
+ self.assertEqual(nfiles, "1")
+
+ # Clean up the omap object
+ self.fs.rados(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
+
+ # Clean up the damagetable entry
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+ "damage", "rm", "{did}".format(did=damage_id))
+
+ # Now I should be able to create a file with the same name as the
+ # damaged guy if I want.
+ self.mount_a.touch("subdir/file_to_be_damaged")
+
+ def test_open_ino_errors(self):
+ """
+ That errors encountered during opening inos are properly propagated
+ """
+
+ self.mount_a.run_shell(["mkdir", "dir1"])
+ self.mount_a.run_shell(["touch", "dir1/file1"])
+ self.mount_a.run_shell(["mkdir", "dir2"])
+ self.mount_a.run_shell(["touch", "dir2/file2"])
+ self.mount_a.run_shell(["mkdir", "testdir"])
+ self.mount_a.run_shell(["ln", "dir1/file1", "testdir/hardlink1"])
+ self.mount_a.run_shell(["ln", "dir2/file2", "testdir/hardlink2"])
+
+ file1_ino = self.mount_a.path_to_ino("dir1/file1")
+ file2_ino = self.mount_a.path_to_ino("dir2/file2")
+ dir2_ino = self.mount_a.path_to_ino("dir2")
+
+ # Ensure everything is written to backing store
+ self.mount_a.umount_wait()
+ self.fs.mds_asok(["flush", "journal"])
+
+ # Drop everything from the MDS cache
+ self.mds_cluster.mds_stop()
+ self.fs.journal_tool(['journal', 'reset'])
+ self.mds_cluster.mds_fail_restart()
+ self.fs.wait_for_daemons()
+
+ self.mount_a.mount()
+
+ # Case 1: un-decodeable backtrace
+
+ # Validate that the backtrace is present and decodable
+ self.fs.read_backtrace(file1_ino)
+ # Go corrupt the backtrace of alpha/target (used for resolving
+ # bravo/hardlink).
+ self.fs._write_data_xattr(file1_ino, "parent", "rhubarb")
+
+ # Check that touching the hardlink gives EIO
+ ran = self.mount_a.run_shell(["stat", "testdir/hardlink1"], wait=False)
+ try:
+ ran.wait()
+ except CommandFailedError:
+ self.assertTrue("Input/output error" in ran.stderr.getvalue())
+
+ # Check that an entry is created in the damage table
+ damage = json.loads(
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+ "damage", "ls", '--format=json-pretty'))
+ self.assertEqual(len(damage), 1)
+ self.assertEqual(damage[0]['damage_type'], "backtrace")
+ self.assertEqual(damage[0]['ino'], file1_ino)
+
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+ "damage", "rm", str(damage[0]['id']))
+
+
+ # Case 2: missing dirfrag for the target inode
+
+ self.fs.rados(["rm", "{0:x}.00000000".format(dir2_ino)])
+
+ # Check that touching the hardlink gives EIO
+ ran = self.mount_a.run_shell(["stat", "testdir/hardlink2"], wait=False)
+ try:
+ ran.wait()
+ except CommandFailedError:
+ self.assertTrue("Input/output error" in ran.stderr.getvalue())
+
+ # Check that an entry is created in the damage table
+ damage = json.loads(
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+ "damage", "ls", '--format=json-pretty'))
+ self.assertEqual(len(damage), 2)
+ if damage[0]['damage_type'] == "backtrace" :
+ self.assertEqual(damage[0]['ino'], file2_ino)
+ self.assertEqual(damage[1]['damage_type'], "dir_frag")
+ self.assertEqual(damage[1]['ino'], dir2_ino)
+ else:
+ self.assertEqual(damage[0]['damage_type'], "dir_frag")
+ self.assertEqual(damage[0]['ino'], dir2_ino)
+ self.assertEqual(damage[1]['damage_type'], "backtrace")
+ self.assertEqual(damage[1]['ino'], file2_ino)
+
+ for entry in damage:
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+ "damage", "rm", str(entry['id']))