summaryrefslogtreecommitdiffstats
path: root/src/ceph/qa/tasks/cephfs/test_journal_repair.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/ceph/qa/tasks/cephfs/test_journal_repair.py')
-rw-r--r--src/ceph/qa/tasks/cephfs/test_journal_repair.py443
1 files changed, 0 insertions, 443 deletions
diff --git a/src/ceph/qa/tasks/cephfs/test_journal_repair.py b/src/ceph/qa/tasks/cephfs/test_journal_repair.py
deleted file mode 100644
index 62cbbb0..0000000
--- a/src/ceph/qa/tasks/cephfs/test_journal_repair.py
+++ /dev/null
@@ -1,443 +0,0 @@
-
-"""
-Test our tools for recovering the content of damaged journals
-"""
-
-import json
-import logging
-from textwrap import dedent
-import time
-
-from teuthology.exceptions import CommandFailedError, ConnectionLostError
-from tasks.cephfs.filesystem import ObjectNotFound, ROOT_INO
-from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
-from tasks.workunit import task as workunit
-
-log = logging.getLogger(__name__)
-
-
-class TestJournalRepair(CephFSTestCase):
- MDSS_REQUIRED = 2
-
- def test_inject_to_empty(self):
- """
- That when some dentries in the journal but nothing is in
- the backing store, we correctly populate the backing store
- from the journalled dentries.
- """
-
- # Inject metadata operations
- self.mount_a.run_shell(["touch", "rootfile"])
- self.mount_a.run_shell(["mkdir", "subdir"])
- self.mount_a.run_shell(["touch", "subdir/subdirfile"])
- # There are several different paths for handling hardlinks, depending
- # on whether an existing dentry (being overwritten) is also a hardlink
- self.mount_a.run_shell(["mkdir", "linkdir"])
-
- # Test inode -> remote transition for a dentry
- self.mount_a.run_shell(["touch", "linkdir/link0"])
- self.mount_a.run_shell(["rm", "-f", "linkdir/link0"])
- self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link0"])
-
- # Test nothing -> remote transition
- self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link1"])
-
- # Test remote -> inode transition
- self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link2"])
- self.mount_a.run_shell(["rm", "-f", "linkdir/link2"])
- self.mount_a.run_shell(["touch", "linkdir/link2"])
-
- # Test remote -> diff remote transition
- self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link3"])
- self.mount_a.run_shell(["rm", "-f", "linkdir/link3"])
- self.mount_a.run_shell(["ln", "rootfile", "linkdir/link3"])
-
- # Test an empty directory
- self.mount_a.run_shell(["mkdir", "subdir/subsubdir"])
- self.mount_a.run_shell(["sync"])
-
- # Before we unmount, make a note of the inode numbers, later we will
- # check that they match what we recover from the journal
- rootfile_ino = self.mount_a.path_to_ino("rootfile")
- subdir_ino = self.mount_a.path_to_ino("subdir")
- linkdir_ino = self.mount_a.path_to_ino("linkdir")
- subdirfile_ino = self.mount_a.path_to_ino("subdir/subdirfile")
- subsubdir_ino = self.mount_a.path_to_ino("subdir/subsubdir")
-
- self.mount_a.umount_wait()
-
- # Stop the MDS
- self.fs.mds_stop()
- self.fs.mds_fail()
-
- # Now, the journal should contain the operations, but the backing
- # store shouldn't
- with self.assertRaises(ObjectNotFound):
- self.fs.list_dirfrag(subdir_ino)
- self.assertEqual(self.fs.list_dirfrag(ROOT_INO), [])
-
- # Execute the dentry recovery, this should populate the backing store
- self.fs.journal_tool(['event', 'recover_dentries', 'list'])
-
- # Dentries in ROOT_INO are present
- self.assertEqual(sorted(self.fs.list_dirfrag(ROOT_INO)), sorted(['rootfile_head', 'subdir_head', 'linkdir_head']))
- self.assertEqual(self.fs.list_dirfrag(subdir_ino), ['subdirfile_head', 'subsubdir_head'])
- self.assertEqual(sorted(self.fs.list_dirfrag(linkdir_ino)),
- sorted(['link0_head', 'link1_head', 'link2_head', 'link3_head']))
-
- # Now check the MDS can read what we wrote: truncate the journal
- # and start the mds.
- self.fs.journal_tool(['journal', 'reset'])
- self.fs.mds_fail_restart()
- self.fs.wait_for_daemons()
-
- # List files
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
-
- # First ls -R to populate MDCache, such that hardlinks will
- # resolve properly (recover_dentries does not create backtraces,
- # so ordinarily hardlinks to inodes that happen not to have backtraces
- # will be invisible in readdir).
- # FIXME: hook in forward scrub here to regenerate backtraces
- proc = self.mount_a.run_shell(['ls', '-R'])
- self.mount_a.umount_wait() # remount to clear client cache before our second ls
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
-
- proc = self.mount_a.run_shell(['ls', '-R'])
- self.assertEqual(proc.stdout.getvalue().strip(),
- dedent("""
- .:
- linkdir
- rootfile
- subdir
-
- ./linkdir:
- link0
- link1
- link2
- link3
-
- ./subdir:
- subdirfile
- subsubdir
-
- ./subdir/subsubdir:
- """).strip())
-
- # Check the correct inos were preserved by path
- self.assertEqual(rootfile_ino, self.mount_a.path_to_ino("rootfile"))
- self.assertEqual(subdir_ino, self.mount_a.path_to_ino("subdir"))
- self.assertEqual(subdirfile_ino, self.mount_a.path_to_ino("subdir/subdirfile"))
- self.assertEqual(subsubdir_ino, self.mount_a.path_to_ino("subdir/subsubdir"))
-
- # Check that the hard link handling came out correctly
- self.assertEqual(self.mount_a.path_to_ino("linkdir/link0"), subdirfile_ino)
- self.assertEqual(self.mount_a.path_to_ino("linkdir/link1"), subdirfile_ino)
- self.assertNotEqual(self.mount_a.path_to_ino("linkdir/link2"), subdirfile_ino)
- self.assertEqual(self.mount_a.path_to_ino("linkdir/link3"), rootfile_ino)
-
- # Create a new file, ensure it is not issued the same ino as one of the
- # recovered ones
- self.mount_a.run_shell(["touch", "afterwards"])
- new_ino = self.mount_a.path_to_ino("afterwards")
- self.assertNotIn(new_ino, [rootfile_ino, subdir_ino, subdirfile_ino])
-
- # Check that we can do metadata ops in the recovered directory
- self.mount_a.run_shell(["touch", "subdir/subsubdir/subsubdirfile"])
-
- @for_teuthology # 308s
- def test_reset(self):
- """
- That after forcibly modifying the backing store, we can get back into
- a good state by resetting the MDSMap.
-
- The scenario is that we have two active MDSs, and we lose the journals. Once
- we have completely lost confidence in the integrity of the metadata, we want to
- return the system to a single-MDS state to go into a scrub to recover what we
- can.
- """
-
- # Set max_mds to 2
- self.fs.set_max_mds(2)
-
- # See that we have two active MDSs
- self.wait_until_equal(lambda: len(self.fs.get_active_names()), 2, 30,
- reject_fn=lambda v: v > 2 or v < 1)
- active_mds_names = self.fs.get_active_names()
-
- # Switch off any unneeded MDS daemons
- for unneeded_mds in set(self.mds_cluster.mds_ids) - set(active_mds_names):
- self.mds_cluster.mds_stop(unneeded_mds)
- self.mds_cluster.mds_fail(unneeded_mds)
-
- # Create a dir on each rank
- self.mount_a.run_shell(["mkdir", "alpha"])
- self.mount_a.run_shell(["mkdir", "bravo"])
- self.mount_a.setfattr("alpha/", "ceph.dir.pin", "0")
- self.mount_a.setfattr("bravo/", "ceph.dir.pin", "1")
-
- def subtrees_assigned():
- got_subtrees = self.fs.mds_asok(["get", "subtrees"], mds_id=active_mds_names[0])
-
- for s in got_subtrees:
- if s['dir']['path'] == '/bravo':
- if s['auth_first'] == 1:
- return True
- else:
- # Should not happen
- raise RuntimeError("/bravo is subtree but not rank 1!")
-
- return False
-
- # Ensure the pinning has taken effect and the /bravo dir is now
- # migrated to rank 1.
- self.wait_until_true(subtrees_assigned, 30)
-
- # Do some IO (this should be split across ranks according to
- # the rank-pinned dirs)
- self.mount_a.create_n_files("alpha/file", 1000)
- self.mount_a.create_n_files("bravo/file", 1000)
-
- # Flush the journals so that we have some backing store data
- # belonging to one MDS, and some to the other MDS.
- for mds_name in active_mds_names:
- self.fs.mds_asok(["flush", "journal"], mds_name)
-
- # Stop (hard) the second MDS daemon
- self.fs.mds_stop(active_mds_names[1])
-
- # Wipe out the tables for MDS rank 1 so that it is broken and can't start
- # (this is the simulated failure that we will demonstrate that the disaster
- # recovery tools can get us back from)
- self.fs.erase_metadata_objects(prefix="mds1_")
-
- # Try to access files from the client
- blocked_ls = self.mount_a.run_shell(["ls", "-R"], wait=False)
-
- # Check that this "ls -R" blocked rather than completing: indicates
- # it got stuck trying to access subtrees which were on the now-dead MDS.
- log.info("Sleeping to check ls is blocked...")
- time.sleep(60)
- self.assertFalse(blocked_ls.finished)
-
- # This mount is now useless because it will depend on MDS rank 1, and MDS rank 1
- # is not coming back. Kill it.
- log.info("Killing mount, it's blocked on the MDS we killed")
- self.mount_a.kill()
- self.mount_a.kill_cleanup()
- try:
- # Now that the mount is dead, the ls -R should error out.
- blocked_ls.wait()
- except (CommandFailedError, ConnectionLostError):
- # The ConnectionLostError case is for kernel client, where
- # killing the mount also means killing the node.
- pass
-
- # See that the second MDS will crash when it starts and tries to
- # acquire rank 1
- damaged_id = active_mds_names[1]
- self.fs.mds_restart(damaged_id)
-
- # The daemon taking the damaged rank should start starting, then
- # restart back into standby after asking the mon to mark the rank
- # damaged.
- def is_marked_damaged():
- mds_map = self.fs.get_mds_map()
- return 1 in mds_map['damaged']
-
- self.wait_until_true(is_marked_damaged, 60)
-
- def get_state():
- info = self.mds_cluster.get_mds_info(damaged_id)
- return info['state'] if info is not None else None
-
- self.wait_until_equal(
- get_state,
- "up:standby",
- timeout=60)
-
- self.fs.mds_stop(damaged_id)
- self.fs.mds_fail(damaged_id)
-
- # Now give up and go through a disaster recovery procedure
- self.fs.mds_stop(active_mds_names[0])
- self.fs.mds_fail(active_mds_names[0])
- # Invoke recover_dentries quietly, because otherwise log spews millions of lines
- self.fs.journal_tool(["event", "recover_dentries", "summary"], rank=0, quiet=True)
- self.fs.journal_tool(["event", "recover_dentries", "summary"], rank=1, quiet=True)
- self.fs.table_tool(["0", "reset", "session"])
- self.fs.journal_tool(["journal", "reset"], rank=0)
- self.fs.erase_mds_objects(1)
- self.fs.mon_manager.raw_cluster_cmd('fs', 'reset', self.fs.name,
- '--yes-i-really-mean-it')
-
- # Bring an MDS back online, mount a client, and see that we can walk the full
- # filesystem tree again
- self.fs.mds_fail_restart(active_mds_names[0])
- self.wait_until_equal(lambda: self.fs.get_active_names(), [active_mds_names[0]], 30,
- reject_fn=lambda v: len(v) > 1)
- self.mount_a.mount()
- self.mount_a.run_shell(["ls", "-R"], wait=True)
-
- def test_table_tool(self):
- active_mdss = self.fs.get_active_names()
- self.assertEqual(len(active_mdss), 1)
- mds_name = active_mdss[0]
-
- self.mount_a.run_shell(["touch", "foo"])
- self.fs.mds_asok(["flush", "journal"], mds_name)
-
- log.info(self.fs.table_tool(["all", "show", "inode"]))
- log.info(self.fs.table_tool(["all", "show", "snap"]))
- log.info(self.fs.table_tool(["all", "show", "session"]))
-
- # Inode table should always be the same because initial state
- # and choice of inode are deterministic.
- # Should see one inode consumed
- self.assertEqual(
- json.loads(self.fs.table_tool(["all", "show", "inode"])),
- {"0": {
- "data": {
- "version": 2,
- "inotable": {
- "projected_free": [
- {"start": 1099511628777,
- "len": 1099511626775}],
- "free": [
- {"start": 1099511628777,
- "len": 1099511626775}]}},
- "result": 0}}
-
- )
-
- # Should see one session
- session_data = json.loads(self.fs.table_tool(
- ["all", "show", "session"]))
- self.assertEqual(len(session_data["0"]["data"]["Sessions"]), 1)
- self.assertEqual(session_data["0"]["result"], 0)
-
- # Should see no snaps
- self.assertEqual(
- json.loads(self.fs.table_tool(["all", "show", "snap"])),
- {"version": 0,
- "snapserver": {"last_snap": 1,
- "pending_noop": [],
- "snaps": [],
- "need_to_purge": {},
- "pending_update": [],
- "pending_destroy": []},
- "result": 0}
- )
-
- # Reset everything
- for table in ["session", "inode", "snap"]:
- self.fs.table_tool(["all", "reset", table])
-
- log.info(self.fs.table_tool(["all", "show", "inode"]))
- log.info(self.fs.table_tool(["all", "show", "snap"]))
- log.info(self.fs.table_tool(["all", "show", "session"]))
-
- # Should see 0 sessions
- session_data = json.loads(self.fs.table_tool(
- ["all", "show", "session"]))
- self.assertEqual(len(session_data["0"]["data"]["Sessions"]), 0)
- self.assertEqual(session_data["0"]["result"], 0)
-
- # Should see entire inode range now marked free
- self.assertEqual(
- json.loads(self.fs.table_tool(["all", "show", "inode"])),
- {"0": {"data": {"version": 1,
- "inotable": {"projected_free": [
- {"start": 1099511627776,
- "len": 1099511627776}],
- "free": [
- {"start": 1099511627776,
- "len": 1099511627776}]}},
- "result": 0}}
- )
-
- # Should see no snaps
- self.assertEqual(
- json.loads(self.fs.table_tool(["all", "show", "snap"])),
- {"version": 1,
- "snapserver": {"last_snap": 1,
- "pending_noop": [],
- "snaps": [],
- "need_to_purge": {},
- "pending_update": [],
- "pending_destroy": []},
- "result": 0}
- )
-
- def test_table_tool_take_inos(self):
- initial_range_start = 1099511627776
- initial_range_len = 1099511627776
- # Initially a completely clear range
- self.assertEqual(
- json.loads(self.fs.table_tool(["all", "show", "inode"])),
- {"0": {"data": {"version": 0,
- "inotable": {"projected_free": [
- {"start": initial_range_start,
- "len": initial_range_len}],
- "free": [
- {"start": initial_range_start,
- "len": initial_range_len}]}},
- "result": 0}}
- )
-
- # Remove some
- self.assertEqual(
- json.loads(self.fs.table_tool(["all", "take_inos", "{0}".format(initial_range_start + 100)])),
- {"0": {"data": {"version": 1,
- "inotable": {"projected_free": [
- {"start": initial_range_start + 101,
- "len": initial_range_len - 101}],
- "free": [
- {"start": initial_range_start + 101,
- "len": initial_range_len - 101}]}},
- "result": 0}}
- )
-
- @for_teuthology # Hack: "for_teuthology" because .sh doesn't work outside teuth
- def test_journal_smoke(self):
- workunit(self.ctx, {
- 'clients': {
- "client.{0}".format(self.mount_a.client_id): [
- "fs/misc/trivial_sync.sh"],
- },
- "timeout": "1h"
- })
-
- for mount in self.mounts:
- mount.umount_wait()
-
- self.fs.mds_stop()
- self.fs.mds_fail()
-
- # journal tool smoke
- workunit(self.ctx, {
- 'clients': {
- "client.{0}".format(self.mount_a.client_id): [
- "suites/cephfs_journal_tool_smoke.sh"],
- },
- "timeout": "1h"
- })
-
-
-
- self.fs.mds_restart()
- self.fs.wait_for_daemons()
-
- self.mount_a.mount()
-
- # trivial sync moutn a
- workunit(self.ctx, {
- 'clients': {
- "client.{0}".format(self.mount_a.client_id): [
- "fs/misc/trivial_sync.sh"],
- },
- "timeout": "1h"
- })
-