diff options
Diffstat (limited to 'src/ceph/qa/tasks/cephfs/test_journal_repair.py')
-rw-r--r-- | src/ceph/qa/tasks/cephfs/test_journal_repair.py | 443 |
1 files changed, 0 insertions, 443 deletions
diff --git a/src/ceph/qa/tasks/cephfs/test_journal_repair.py b/src/ceph/qa/tasks/cephfs/test_journal_repair.py deleted file mode 100644 index 62cbbb0..0000000 --- a/src/ceph/qa/tasks/cephfs/test_journal_repair.py +++ /dev/null @@ -1,443 +0,0 @@ - -""" -Test our tools for recovering the content of damaged journals -""" - -import json -import logging -from textwrap import dedent -import time - -from teuthology.exceptions import CommandFailedError, ConnectionLostError -from tasks.cephfs.filesystem import ObjectNotFound, ROOT_INO -from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology -from tasks.workunit import task as workunit - -log = logging.getLogger(__name__) - - -class TestJournalRepair(CephFSTestCase): - MDSS_REQUIRED = 2 - - def test_inject_to_empty(self): - """ - That when some dentries in the journal but nothing is in - the backing store, we correctly populate the backing store - from the journalled dentries. - """ - - # Inject metadata operations - self.mount_a.run_shell(["touch", "rootfile"]) - self.mount_a.run_shell(["mkdir", "subdir"]) - self.mount_a.run_shell(["touch", "subdir/subdirfile"]) - # There are several different paths for handling hardlinks, depending - # on whether an existing dentry (being overwritten) is also a hardlink - self.mount_a.run_shell(["mkdir", "linkdir"]) - - # Test inode -> remote transition for a dentry - self.mount_a.run_shell(["touch", "linkdir/link0"]) - self.mount_a.run_shell(["rm", "-f", "linkdir/link0"]) - self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link0"]) - - # Test nothing -> remote transition - self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link1"]) - - # Test remote -> inode transition - self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link2"]) - self.mount_a.run_shell(["rm", "-f", "linkdir/link2"]) - self.mount_a.run_shell(["touch", "linkdir/link2"]) - - # Test remote -> diff remote transition - self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link3"]) - self.mount_a.run_shell(["rm", "-f", "linkdir/link3"]) - self.mount_a.run_shell(["ln", "rootfile", "linkdir/link3"]) - - # Test an empty directory - self.mount_a.run_shell(["mkdir", "subdir/subsubdir"]) - self.mount_a.run_shell(["sync"]) - - # Before we unmount, make a note of the inode numbers, later we will - # check that they match what we recover from the journal - rootfile_ino = self.mount_a.path_to_ino("rootfile") - subdir_ino = self.mount_a.path_to_ino("subdir") - linkdir_ino = self.mount_a.path_to_ino("linkdir") - subdirfile_ino = self.mount_a.path_to_ino("subdir/subdirfile") - subsubdir_ino = self.mount_a.path_to_ino("subdir/subsubdir") - - self.mount_a.umount_wait() - - # Stop the MDS - self.fs.mds_stop() - self.fs.mds_fail() - - # Now, the journal should contain the operations, but the backing - # store shouldn't - with self.assertRaises(ObjectNotFound): - self.fs.list_dirfrag(subdir_ino) - self.assertEqual(self.fs.list_dirfrag(ROOT_INO), []) - - # Execute the dentry recovery, this should populate the backing store - self.fs.journal_tool(['event', 'recover_dentries', 'list']) - - # Dentries in ROOT_INO are present - self.assertEqual(sorted(self.fs.list_dirfrag(ROOT_INO)), sorted(['rootfile_head', 'subdir_head', 'linkdir_head'])) - self.assertEqual(self.fs.list_dirfrag(subdir_ino), ['subdirfile_head', 'subsubdir_head']) - self.assertEqual(sorted(self.fs.list_dirfrag(linkdir_ino)), - sorted(['link0_head', 'link1_head', 'link2_head', 'link3_head'])) - - # Now check the MDS can read what we wrote: truncate the journal - # and start the mds. - self.fs.journal_tool(['journal', 'reset']) - self.fs.mds_fail_restart() - self.fs.wait_for_daemons() - - # List files - self.mount_a.mount() - self.mount_a.wait_until_mounted() - - # First ls -R to populate MDCache, such that hardlinks will - # resolve properly (recover_dentries does not create backtraces, - # so ordinarily hardlinks to inodes that happen not to have backtraces - # will be invisible in readdir). - # FIXME: hook in forward scrub here to regenerate backtraces - proc = self.mount_a.run_shell(['ls', '-R']) - self.mount_a.umount_wait() # remount to clear client cache before our second ls - self.mount_a.mount() - self.mount_a.wait_until_mounted() - - proc = self.mount_a.run_shell(['ls', '-R']) - self.assertEqual(proc.stdout.getvalue().strip(), - dedent(""" - .: - linkdir - rootfile - subdir - - ./linkdir: - link0 - link1 - link2 - link3 - - ./subdir: - subdirfile - subsubdir - - ./subdir/subsubdir: - """).strip()) - - # Check the correct inos were preserved by path - self.assertEqual(rootfile_ino, self.mount_a.path_to_ino("rootfile")) - self.assertEqual(subdir_ino, self.mount_a.path_to_ino("subdir")) - self.assertEqual(subdirfile_ino, self.mount_a.path_to_ino("subdir/subdirfile")) - self.assertEqual(subsubdir_ino, self.mount_a.path_to_ino("subdir/subsubdir")) - - # Check that the hard link handling came out correctly - self.assertEqual(self.mount_a.path_to_ino("linkdir/link0"), subdirfile_ino) - self.assertEqual(self.mount_a.path_to_ino("linkdir/link1"), subdirfile_ino) - self.assertNotEqual(self.mount_a.path_to_ino("linkdir/link2"), subdirfile_ino) - self.assertEqual(self.mount_a.path_to_ino("linkdir/link3"), rootfile_ino) - - # Create a new file, ensure it is not issued the same ino as one of the - # recovered ones - self.mount_a.run_shell(["touch", "afterwards"]) - new_ino = self.mount_a.path_to_ino("afterwards") - self.assertNotIn(new_ino, [rootfile_ino, subdir_ino, subdirfile_ino]) - - # Check that we can do metadata ops in the recovered directory - self.mount_a.run_shell(["touch", "subdir/subsubdir/subsubdirfile"]) - - @for_teuthology # 308s - def test_reset(self): - """ - That after forcibly modifying the backing store, we can get back into - a good state by resetting the MDSMap. - - The scenario is that we have two active MDSs, and we lose the journals. Once - we have completely lost confidence in the integrity of the metadata, we want to - return the system to a single-MDS state to go into a scrub to recover what we - can. - """ - - # Set max_mds to 2 - self.fs.set_max_mds(2) - - # See that we have two active MDSs - self.wait_until_equal(lambda: len(self.fs.get_active_names()), 2, 30, - reject_fn=lambda v: v > 2 or v < 1) - active_mds_names = self.fs.get_active_names() - - # Switch off any unneeded MDS daemons - for unneeded_mds in set(self.mds_cluster.mds_ids) - set(active_mds_names): - self.mds_cluster.mds_stop(unneeded_mds) - self.mds_cluster.mds_fail(unneeded_mds) - - # Create a dir on each rank - self.mount_a.run_shell(["mkdir", "alpha"]) - self.mount_a.run_shell(["mkdir", "bravo"]) - self.mount_a.setfattr("alpha/", "ceph.dir.pin", "0") - self.mount_a.setfattr("bravo/", "ceph.dir.pin", "1") - - def subtrees_assigned(): - got_subtrees = self.fs.mds_asok(["get", "subtrees"], mds_id=active_mds_names[0]) - - for s in got_subtrees: - if s['dir']['path'] == '/bravo': - if s['auth_first'] == 1: - return True - else: - # Should not happen - raise RuntimeError("/bravo is subtree but not rank 1!") - - return False - - # Ensure the pinning has taken effect and the /bravo dir is now - # migrated to rank 1. - self.wait_until_true(subtrees_assigned, 30) - - # Do some IO (this should be split across ranks according to - # the rank-pinned dirs) - self.mount_a.create_n_files("alpha/file", 1000) - self.mount_a.create_n_files("bravo/file", 1000) - - # Flush the journals so that we have some backing store data - # belonging to one MDS, and some to the other MDS. - for mds_name in active_mds_names: - self.fs.mds_asok(["flush", "journal"], mds_name) - - # Stop (hard) the second MDS daemon - self.fs.mds_stop(active_mds_names[1]) - - # Wipe out the tables for MDS rank 1 so that it is broken and can't start - # (this is the simulated failure that we will demonstrate that the disaster - # recovery tools can get us back from) - self.fs.erase_metadata_objects(prefix="mds1_") - - # Try to access files from the client - blocked_ls = self.mount_a.run_shell(["ls", "-R"], wait=False) - - # Check that this "ls -R" blocked rather than completing: indicates - # it got stuck trying to access subtrees which were on the now-dead MDS. - log.info("Sleeping to check ls is blocked...") - time.sleep(60) - self.assertFalse(blocked_ls.finished) - - # This mount is now useless because it will depend on MDS rank 1, and MDS rank 1 - # is not coming back. Kill it. - log.info("Killing mount, it's blocked on the MDS we killed") - self.mount_a.kill() - self.mount_a.kill_cleanup() - try: - # Now that the mount is dead, the ls -R should error out. - blocked_ls.wait() - except (CommandFailedError, ConnectionLostError): - # The ConnectionLostError case is for kernel client, where - # killing the mount also means killing the node. - pass - - # See that the second MDS will crash when it starts and tries to - # acquire rank 1 - damaged_id = active_mds_names[1] - self.fs.mds_restart(damaged_id) - - # The daemon taking the damaged rank should start starting, then - # restart back into standby after asking the mon to mark the rank - # damaged. - def is_marked_damaged(): - mds_map = self.fs.get_mds_map() - return 1 in mds_map['damaged'] - - self.wait_until_true(is_marked_damaged, 60) - - def get_state(): - info = self.mds_cluster.get_mds_info(damaged_id) - return info['state'] if info is not None else None - - self.wait_until_equal( - get_state, - "up:standby", - timeout=60) - - self.fs.mds_stop(damaged_id) - self.fs.mds_fail(damaged_id) - - # Now give up and go through a disaster recovery procedure - self.fs.mds_stop(active_mds_names[0]) - self.fs.mds_fail(active_mds_names[0]) - # Invoke recover_dentries quietly, because otherwise log spews millions of lines - self.fs.journal_tool(["event", "recover_dentries", "summary"], rank=0, quiet=True) - self.fs.journal_tool(["event", "recover_dentries", "summary"], rank=1, quiet=True) - self.fs.table_tool(["0", "reset", "session"]) - self.fs.journal_tool(["journal", "reset"], rank=0) - self.fs.erase_mds_objects(1) - self.fs.mon_manager.raw_cluster_cmd('fs', 'reset', self.fs.name, - '--yes-i-really-mean-it') - - # Bring an MDS back online, mount a client, and see that we can walk the full - # filesystem tree again - self.fs.mds_fail_restart(active_mds_names[0]) - self.wait_until_equal(lambda: self.fs.get_active_names(), [active_mds_names[0]], 30, - reject_fn=lambda v: len(v) > 1) - self.mount_a.mount() - self.mount_a.run_shell(["ls", "-R"], wait=True) - - def test_table_tool(self): - active_mdss = self.fs.get_active_names() - self.assertEqual(len(active_mdss), 1) - mds_name = active_mdss[0] - - self.mount_a.run_shell(["touch", "foo"]) - self.fs.mds_asok(["flush", "journal"], mds_name) - - log.info(self.fs.table_tool(["all", "show", "inode"])) - log.info(self.fs.table_tool(["all", "show", "snap"])) - log.info(self.fs.table_tool(["all", "show", "session"])) - - # Inode table should always be the same because initial state - # and choice of inode are deterministic. - # Should see one inode consumed - self.assertEqual( - json.loads(self.fs.table_tool(["all", "show", "inode"])), - {"0": { - "data": { - "version": 2, - "inotable": { - "projected_free": [ - {"start": 1099511628777, - "len": 1099511626775}], - "free": [ - {"start": 1099511628777, - "len": 1099511626775}]}}, - "result": 0}} - - ) - - # Should see one session - session_data = json.loads(self.fs.table_tool( - ["all", "show", "session"])) - self.assertEqual(len(session_data["0"]["data"]["Sessions"]), 1) - self.assertEqual(session_data["0"]["result"], 0) - - # Should see no snaps - self.assertEqual( - json.loads(self.fs.table_tool(["all", "show", "snap"])), - {"version": 0, - "snapserver": {"last_snap": 1, - "pending_noop": [], - "snaps": [], - "need_to_purge": {}, - "pending_update": [], - "pending_destroy": []}, - "result": 0} - ) - - # Reset everything - for table in ["session", "inode", "snap"]: - self.fs.table_tool(["all", "reset", table]) - - log.info(self.fs.table_tool(["all", "show", "inode"])) - log.info(self.fs.table_tool(["all", "show", "snap"])) - log.info(self.fs.table_tool(["all", "show", "session"])) - - # Should see 0 sessions - session_data = json.loads(self.fs.table_tool( - ["all", "show", "session"])) - self.assertEqual(len(session_data["0"]["data"]["Sessions"]), 0) - self.assertEqual(session_data["0"]["result"], 0) - - # Should see entire inode range now marked free - self.assertEqual( - json.loads(self.fs.table_tool(["all", "show", "inode"])), - {"0": {"data": {"version": 1, - "inotable": {"projected_free": [ - {"start": 1099511627776, - "len": 1099511627776}], - "free": [ - {"start": 1099511627776, - "len": 1099511627776}]}}, - "result": 0}} - ) - - # Should see no snaps - self.assertEqual( - json.loads(self.fs.table_tool(["all", "show", "snap"])), - {"version": 1, - "snapserver": {"last_snap": 1, - "pending_noop": [], - "snaps": [], - "need_to_purge": {}, - "pending_update": [], - "pending_destroy": []}, - "result": 0} - ) - - def test_table_tool_take_inos(self): - initial_range_start = 1099511627776 - initial_range_len = 1099511627776 - # Initially a completely clear range - self.assertEqual( - json.loads(self.fs.table_tool(["all", "show", "inode"])), - {"0": {"data": {"version": 0, - "inotable": {"projected_free": [ - {"start": initial_range_start, - "len": initial_range_len}], - "free": [ - {"start": initial_range_start, - "len": initial_range_len}]}}, - "result": 0}} - ) - - # Remove some - self.assertEqual( - json.loads(self.fs.table_tool(["all", "take_inos", "{0}".format(initial_range_start + 100)])), - {"0": {"data": {"version": 1, - "inotable": {"projected_free": [ - {"start": initial_range_start + 101, - "len": initial_range_len - 101}], - "free": [ - {"start": initial_range_start + 101, - "len": initial_range_len - 101}]}}, - "result": 0}} - ) - - @for_teuthology # Hack: "for_teuthology" because .sh doesn't work outside teuth - def test_journal_smoke(self): - workunit(self.ctx, { - 'clients': { - "client.{0}".format(self.mount_a.client_id): [ - "fs/misc/trivial_sync.sh"], - }, - "timeout": "1h" - }) - - for mount in self.mounts: - mount.umount_wait() - - self.fs.mds_stop() - self.fs.mds_fail() - - # journal tool smoke - workunit(self.ctx, { - 'clients': { - "client.{0}".format(self.mount_a.client_id): [ - "suites/cephfs_journal_tool_smoke.sh"], - }, - "timeout": "1h" - }) - - - - self.fs.mds_restart() - self.fs.wait_for_daemons() - - self.mount_a.mount() - - # trivial sync moutn a - workunit(self.ctx, { - 'clients': { - "client.{0}".format(self.mount_a.client_id): [ - "fs/misc/trivial_sync.sh"], - }, - "timeout": "1h" - }) - |