summaryrefslogtreecommitdiffstats
path: root/src/ceph/qa/tasks/cephfs
diff options
context:
space:
mode:
Diffstat (limited to 'src/ceph/qa/tasks/cephfs')
-rw-r--r--src/ceph/qa/tasks/cephfs/__init__.py0
-rw-r--r--src/ceph/qa/tasks/cephfs/cephfs_test_case.py315
-rw-r--r--src/ceph/qa/tasks/cephfs/filesystem.py1213
-rw-r--r--src/ceph/qa/tasks/cephfs/fuse_mount.py428
-rw-r--r--src/ceph/qa/tasks/cephfs/kernel_mount.py267
-rw-r--r--src/ceph/qa/tasks/cephfs/mount.py627
-rw-r--r--src/ceph/qa/tasks/cephfs/test_auto_repair.py90
-rw-r--r--src/ceph/qa/tasks/cephfs/test_backtrace.py78
-rw-r--r--src/ceph/qa/tasks/cephfs/test_cap_flush.py64
-rw-r--r--src/ceph/qa/tasks/cephfs/test_client_limits.py239
-rw-r--r--src/ceph/qa/tasks/cephfs/test_client_recovery.py474
-rw-r--r--src/ceph/qa/tasks/cephfs/test_config_commands.py63
-rw-r--r--src/ceph/qa/tasks/cephfs/test_damage.py548
-rw-r--r--src/ceph/qa/tasks/cephfs/test_data_scan.py600
-rw-r--r--src/ceph/qa/tasks/cephfs/test_dump_tree.py66
-rw-r--r--src/ceph/qa/tasks/cephfs/test_exports.py107
-rw-r--r--src/ceph/qa/tasks/cephfs/test_failover.py645
-rw-r--r--src/ceph/qa/tasks/cephfs/test_flush.py113
-rw-r--r--src/ceph/qa/tasks/cephfs/test_forward_scrub.py291
-rw-r--r--src/ceph/qa/tasks/cephfs/test_fragment.py232
-rw-r--r--src/ceph/qa/tasks/cephfs/test_full.py414
-rw-r--r--src/ceph/qa/tasks/cephfs/test_journal_migration.py118
-rw-r--r--src/ceph/qa/tasks/cephfs/test_journal_repair.py443
-rw-r--r--src/ceph/qa/tasks/cephfs/test_mantle.py109
-rw-r--r--src/ceph/qa/tasks/cephfs/test_misc.py149
-rw-r--r--src/ceph/qa/tasks/cephfs/test_pool_perm.py113
-rw-r--r--src/ceph/qa/tasks/cephfs/test_quota.py106
-rw-r--r--src/ceph/qa/tasks/cephfs/test_readahead.py31
-rw-r--r--src/ceph/qa/tasks/cephfs/test_recovery_pool.py220
-rw-r--r--src/ceph/qa/tasks/cephfs/test_scrub_checks.py245
-rw-r--r--src/ceph/qa/tasks/cephfs/test_sessionmap.py235
-rw-r--r--src/ceph/qa/tasks/cephfs/test_strays.py1049
-rw-r--r--src/ceph/qa/tasks/cephfs/test_volume_client.py1016
33 files changed, 0 insertions, 10708 deletions
diff --git a/src/ceph/qa/tasks/cephfs/__init__.py b/src/ceph/qa/tasks/cephfs/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/src/ceph/qa/tasks/cephfs/__init__.py
+++ /dev/null
diff --git a/src/ceph/qa/tasks/cephfs/cephfs_test_case.py b/src/ceph/qa/tasks/cephfs/cephfs_test_case.py
deleted file mode 100644
index 801d0d3..0000000
--- a/src/ceph/qa/tasks/cephfs/cephfs_test_case.py
+++ /dev/null
@@ -1,315 +0,0 @@
-import json
-import logging
-from unittest import case
-from tasks.ceph_test_case import CephTestCase
-import os
-import re
-from StringIO import StringIO
-
-from tasks.cephfs.fuse_mount import FuseMount
-
-from teuthology.orchestra import run
-from teuthology.orchestra.run import CommandFailedError
-
-
-log = logging.getLogger(__name__)
-
-
-def for_teuthology(f):
- """
- Decorator that adds an "is_for_teuthology" attribute to the wrapped function
- """
- f.is_for_teuthology = True
- return f
-
-
-def needs_trimming(f):
- """
- Mark fn as requiring a client capable of trimming its cache (i.e. for ceph-fuse
- this means it needs to be able to run as root, currently)
- """
- f.needs_trimming = True
- return f
-
-
-class CephFSTestCase(CephTestCase):
- """
- Test case for Ceph FS, requires caller to populate Filesystem and Mounts,
- into the fs, mount_a, mount_b class attributes (setting mount_b is optional)
-
- Handles resetting the cluster under test between tests.
- """
-
- # FIXME weird explicit naming
- mount_a = None
- mount_b = None
- recovery_mount = None
-
- # Declarative test requirements: subclasses should override these to indicate
- # their special needs. If not met, tests will be skipped.
- CLIENTS_REQUIRED = 1
- MDSS_REQUIRED = 1
- REQUIRE_KCLIENT_REMOTE = False
- REQUIRE_ONE_CLIENT_REMOTE = False
- REQUIRE_MEMSTORE = False
-
- # Whether to create the default filesystem during setUp
- REQUIRE_FILESYSTEM = True
-
- # requires REQUIRE_FILESYSTEM = True
- REQUIRE_RECOVERY_FILESYSTEM = False
-
- LOAD_SETTINGS = []
-
- def setUp(self):
- super(CephFSTestCase, self).setUp()
-
- if len(self.mds_cluster.mds_ids) < self.MDSS_REQUIRED:
- raise case.SkipTest("Only have {0} MDSs, require {1}".format(
- len(self.mds_cluster.mds_ids), self.MDSS_REQUIRED
- ))
-
- if len(self.mounts) < self.CLIENTS_REQUIRED:
- raise case.SkipTest("Only have {0} clients, require {1}".format(
- len(self.mounts), self.CLIENTS_REQUIRED
- ))
-
- if self.REQUIRE_KCLIENT_REMOTE:
- if not isinstance(self.mounts[0], FuseMount) or not isinstance(self.mounts[1], FuseMount):
- # kclient kill() power cycles nodes, so requires clients to each be on
- # their own node
- if self.mounts[0].client_remote.hostname == self.mounts[1].client_remote.hostname:
- raise case.SkipTest("kclient clients must be on separate nodes")
-
- if self.REQUIRE_ONE_CLIENT_REMOTE:
- if self.mounts[0].client_remote.hostname in self.mds_cluster.get_mds_hostnames():
- raise case.SkipTest("Require first client to be on separate server from MDSs")
-
- if self.REQUIRE_MEMSTORE:
- objectstore = self.mds_cluster.get_config("osd_objectstore", "osd")
- if objectstore != "memstore":
- # You certainly *could* run this on a real OSD, but you don't want to sit
- # here for hours waiting for the test to fill up a 1TB drive!
- raise case.SkipTest("Require `memstore` OSD backend to simulate full drives")
-
- # Create friendly mount_a, mount_b attrs
- for i in range(0, self.CLIENTS_REQUIRED):
- setattr(self, "mount_{0}".format(chr(ord('a') + i)), self.mounts[i])
-
- self.mds_cluster.clear_firewall()
-
- # Unmount all clients, we are about to blow away the filesystem
- for mount in self.mounts:
- if mount.is_mounted():
- mount.umount_wait(force=True)
-
- # To avoid any issues with e.g. unlink bugs, we destroy and recreate
- # the filesystem rather than just doing a rm -rf of files
- self.mds_cluster.mds_stop()
- self.mds_cluster.mds_fail()
- self.mds_cluster.delete_all_filesystems()
- self.fs = None # is now invalid!
- self.recovery_fs = None
-
- # In case the previous filesystem had filled up the RADOS cluster, wait for that
- # flag to pass.
- osd_mon_report_interval_max = int(self.mds_cluster.get_config("osd_mon_report_interval_max", service_type='osd'))
- self.wait_until_true(lambda: not self.mds_cluster.is_full(),
- timeout=osd_mon_report_interval_max * 5)
-
- # In case anything is in the OSD blacklist list, clear it out. This is to avoid
- # the OSD map changing in the background (due to blacklist expiry) while tests run.
- try:
- self.mds_cluster.mon_manager.raw_cluster_cmd("osd", "blacklist", "clear")
- except CommandFailedError:
- # Fallback for older Ceph cluster
- blacklist = json.loads(self.mds_cluster.mon_manager.raw_cluster_cmd("osd",
- "dump", "--format=json-pretty"))['blacklist']
- log.info("Removing {0} blacklist entries".format(len(blacklist)))
- for addr, blacklisted_at in blacklist.items():
- self.mds_cluster.mon_manager.raw_cluster_cmd("osd", "blacklist", "rm", addr)
-
- client_mount_ids = [m.client_id for m in self.mounts]
- # In case the test changes the IDs of clients, stash them so that we can
- # reset in tearDown
- self._original_client_ids = client_mount_ids
- log.info(client_mount_ids)
-
- # In case there were any extra auth identities around from a previous
- # test, delete them
- for entry in self.auth_list():
- ent_type, ent_id = entry['entity'].split(".")
- if ent_type == "client" and ent_id not in client_mount_ids and ent_id != "admin":
- self.mds_cluster.mon_manager.raw_cluster_cmd("auth", "del", entry['entity'])
-
- if self.REQUIRE_FILESYSTEM:
- self.fs = self.mds_cluster.newfs(create=True)
- self.fs.mds_restart()
-
- # In case some test messed with auth caps, reset them
- for client_id in client_mount_ids:
- self.mds_cluster.mon_manager.raw_cluster_cmd_result(
- 'auth', 'caps', "client.{0}".format(client_id),
- 'mds', 'allow',
- 'mon', 'allow r',
- 'osd', 'allow rw pool={0}'.format(self.fs.get_data_pool_name()))
-
- # wait for mds restart to complete...
- self.fs.wait_for_daemons()
-
- # Mount the requested number of clients
- for i in range(0, self.CLIENTS_REQUIRED):
- self.mounts[i].mount()
- self.mounts[i].wait_until_mounted()
-
- if self.REQUIRE_RECOVERY_FILESYSTEM:
- if not self.REQUIRE_FILESYSTEM:
- raise case.SkipTest("Recovery filesystem requires a primary filesystem as well")
- self.fs.mon_manager.raw_cluster_cmd('fs', 'flag', 'set',
- 'enable_multiple', 'true',
- '--yes-i-really-mean-it')
- self.recovery_fs = self.mds_cluster.newfs(name="recovery_fs", create=False)
- self.recovery_fs.set_metadata_overlay(True)
- self.recovery_fs.set_data_pool_name(self.fs.get_data_pool_name())
- self.recovery_fs.create()
- self.recovery_fs.getinfo(refresh=True)
- self.recovery_fs.mds_restart()
- self.recovery_fs.wait_for_daemons()
-
- # Load an config settings of interest
- for setting in self.LOAD_SETTINGS:
- setattr(self, setting, float(self.fs.mds_asok(
- ['config', 'get', setting], self.mds_cluster.mds_ids[0]
- )[setting]))
-
- self.configs_set = set()
-
- def tearDown(self):
- super(CephFSTestCase, self).tearDown()
-
- self.mds_cluster.clear_firewall()
- for m in self.mounts:
- m.teardown()
-
- for i, m in enumerate(self.mounts):
- m.client_id = self._original_client_ids[i]
-
- for subsys, key in self.configs_set:
- self.mds_cluster.clear_ceph_conf(subsys, key)
-
- def set_conf(self, subsys, key, value):
- self.configs_set.add((subsys, key))
- self.mds_cluster.set_ceph_conf(subsys, key, value)
-
- def auth_list(self):
- """
- Convenience wrapper on "ceph auth ls"
- """
- return json.loads(self.mds_cluster.mon_manager.raw_cluster_cmd(
- "auth", "ls", "--format=json-pretty"
- ))['auth_dump']
-
- def assert_session_count(self, expected, ls_data=None, mds_id=None):
- if ls_data is None:
- ls_data = self.fs.mds_asok(['session', 'ls'], mds_id=mds_id)
-
- alive_count = len([s for s in ls_data if s['state'] != 'killing'])
-
- self.assertEqual(expected, alive_count, "Expected {0} sessions, found {1}".format(
- expected, alive_count
- ))
-
- def assert_session_state(self, client_id, expected_state):
- self.assertEqual(
- self._session_by_id(
- self.fs.mds_asok(['session', 'ls'])).get(client_id, {'state': None})['state'],
- expected_state)
-
- def get_session_data(self, client_id):
- return self._session_by_id(client_id)
-
- def _session_list(self):
- ls_data = self.fs.mds_asok(['session', 'ls'])
- ls_data = [s for s in ls_data if s['state'] not in ['stale', 'closed']]
- return ls_data
-
- def get_session(self, client_id, session_ls=None):
- if session_ls is None:
- session_ls = self.fs.mds_asok(['session', 'ls'])
-
- return self._session_by_id(session_ls)[client_id]
-
- def _session_by_id(self, session_ls):
- return dict([(s['id'], s) for s in session_ls])
-
- def wait_for_daemon_start(self, daemon_ids=None):
- """
- Wait until all the daemons appear in the FSMap, either assigned
- MDS ranks or in the list of standbys
- """
- def get_daemon_names():
- return [info['name'] for info in self.mds_cluster.status().get_all()]
-
- if daemon_ids is None:
- daemon_ids = self.mds_cluster.mds_ids
-
- try:
- self.wait_until_true(
- lambda: set(daemon_ids) & set(get_daemon_names()) == set(daemon_ids),
- timeout=30
- )
- except RuntimeError:
- log.warn("Timeout waiting for daemons {0}, while we have {1}".format(
- daemon_ids, get_daemon_names()
- ))
- raise
-
- def assert_mds_crash(self, daemon_id):
- """
- Assert that the a particular MDS daemon crashes (block until
- it does)
- """
- try:
- self.mds_cluster.mds_daemons[daemon_id].proc.wait()
- except CommandFailedError as e:
- log.info("MDS '{0}' crashed with status {1} as expected".format(daemon_id, e.exitstatus))
- self.mds_cluster.mds_daemons[daemon_id].proc = None
-
- # Go remove the coredump from the crash, otherwise teuthology.internal.coredump will
- # catch it later and treat it as a failure.
- p = self.mds_cluster.mds_daemons[daemon_id].remote.run(args=[
- "sudo", "sysctl", "-n", "kernel.core_pattern"], stdout=StringIO())
- core_pattern = p.stdout.getvalue().strip()
- if os.path.dirname(core_pattern): # Non-default core_pattern with a directory in it
- # We have seen a core_pattern that looks like it's from teuthology's coredump
- # task, so proceed to clear out the core file
- log.info("Clearing core from pattern: {0}".format(core_pattern))
-
- # Determine the PID of the crashed MDS by inspecting the MDSMap, it had
- # to talk to the mons to get assigned a rank to reach the point of crashing
- addr = self.mds_cluster.mon_manager.get_mds_status(daemon_id)['addr']
- pid_str = addr.split("/")[1]
- log.info("Determined crasher PID was {0}".format(pid_str))
-
- # Substitute PID into core_pattern to get a glob
- core_glob = core_pattern.replace("%p", pid_str)
- core_glob = re.sub("%[a-z]", "*", core_glob) # Match all for all other % tokens
-
- # Verify that we see the expected single coredump matching the expected pattern
- ls_proc = self.mds_cluster.mds_daemons[daemon_id].remote.run(args=[
- "sudo", "ls", run.Raw(core_glob)
- ], stdout=StringIO())
- cores = [f for f in ls_proc.stdout.getvalue().strip().split("\n") if f]
- log.info("Enumerated cores: {0}".format(cores))
- self.assertEqual(len(cores), 1)
-
- log.info("Found core file {0}, deleting it".format(cores[0]))
-
- self.mds_cluster.mds_daemons[daemon_id].remote.run(args=[
- "sudo", "rm", "-f", cores[0]
- ])
- else:
- log.info("No core_pattern directory set, nothing to clear (internal.coredump not enabled?)")
-
- else:
- raise AssertionError("MDS daemon '{0}' did not crash as expected".format(daemon_id))
diff --git a/src/ceph/qa/tasks/cephfs/filesystem.py b/src/ceph/qa/tasks/cephfs/filesystem.py
deleted file mode 100644
index 9638fd5..0000000
--- a/src/ceph/qa/tasks/cephfs/filesystem.py
+++ /dev/null
@@ -1,1213 +0,0 @@
-
-from StringIO import StringIO
-import json
-import logging
-from gevent import Greenlet
-import os
-import time
-import datetime
-import re
-import errno
-import random
-
-from teuthology.exceptions import CommandFailedError
-from teuthology import misc
-from teuthology.nuke import clear_firewall
-from teuthology.parallel import parallel
-from tasks.ceph_manager import write_conf
-from tasks import ceph_manager
-
-
-log = logging.getLogger(__name__)
-
-
-DAEMON_WAIT_TIMEOUT = 120
-ROOT_INO = 1
-
-
-class ObjectNotFound(Exception):
- def __init__(self, object_name):
- self._object_name = object_name
-
- def __str__(self):
- return "Object not found: '{0}'".format(self._object_name)
-
-class FSStatus(object):
- """
- Operations on a snapshot of the FSMap.
- """
- def __init__(self, mon_manager):
- self.mon = mon_manager
- self.map = json.loads(self.mon.raw_cluster_cmd("fs", "dump", "--format=json"))
-
- def __str__(self):
- return json.dumps(self.map, indent = 2, sort_keys = True)
-
- # Expose the fsmap for manual inspection.
- def __getitem__(self, key):
- """
- Get a field from the fsmap.
- """
- return self.map[key]
-
- def get_filesystems(self):
- """
- Iterator for all filesystems.
- """
- for fs in self.map['filesystems']:
- yield fs
-
- def get_all(self):
- """
- Iterator for all the mds_info components in the FSMap.
- """
- for info in self.get_standbys():
- yield info
- for fs in self.map['filesystems']:
- for info in fs['mdsmap']['info'].values():
- yield info
-
- def get_standbys(self):
- """
- Iterator for all standbys.
- """
- for info in self.map['standbys']:
- yield info
-
- def get_fsmap(self, fscid):
- """
- Get the fsmap for the given FSCID.
- """
- for fs in self.map['filesystems']:
- if fscid is None or fs['id'] == fscid:
- return fs
- raise RuntimeError("FSCID {0} not in map".format(fscid))
-
- def get_fsmap_byname(self, name):
- """
- Get the fsmap for the given file system name.
- """
- for fs in self.map['filesystems']:
- if name is None or fs['mdsmap']['fs_name'] == name:
- return fs
- raise RuntimeError("FS {0} not in map".format(name))
-
- def get_replays(self, fscid):
- """
- Get the standby:replay MDS for the given FSCID.
- """
- fs = self.get_fsmap(fscid)
- for info in fs['mdsmap']['info'].values():
- if info['state'] == 'up:standby-replay':
- yield info
-
- def get_ranks(self, fscid):
- """
- Get the ranks for the given FSCID.
- """
- fs = self.get_fsmap(fscid)
- for info in fs['mdsmap']['info'].values():
- if info['rank'] >= 0:
- yield info
-
- def get_rank(self, fscid, rank):
- """
- Get the rank for the given FSCID.
- """
- for info in self.get_ranks(fscid):
- if info['rank'] == rank:
- return info
- raise RuntimeError("FSCID {0} has no rank {1}".format(fscid, rank))
-
- def get_mds(self, name):
- """
- Get the info for the given MDS name.
- """
- for info in self.get_all():
- if info['name'] == name:
- return info
- return None
-
- def get_mds_addr(self, name):
- """
- Return the instance addr as a string, like "10.214.133.138:6807\/10825"
- """
- info = self.get_mds(name)
- if info:
- return info['addr']
- else:
- log.warn(json.dumps(list(self.get_all()), indent=2)) # dump for debugging
- raise RuntimeError("MDS id '{0}' not found in map".format(name))
-
-class CephCluster(object):
- @property
- def admin_remote(self):
- first_mon = misc.get_first_mon(self._ctx, None)
- (result,) = self._ctx.cluster.only(first_mon).remotes.iterkeys()
- return result
-
- def __init__(self, ctx):
- self._ctx = ctx
- self.mon_manager = ceph_manager.CephManager(self.admin_remote, ctx=ctx, logger=log.getChild('ceph_manager'))
-
- def get_config(self, key, service_type=None):
- """
- Get config from mon by default, or a specific service if caller asks for it
- """
- if service_type is None:
- service_type = 'mon'
-
- service_id = sorted(misc.all_roles_of_type(self._ctx.cluster, service_type))[0]
- return self.json_asok(['config', 'get', key], service_type, service_id)[key]
-
- def set_ceph_conf(self, subsys, key, value):
- if subsys not in self._ctx.ceph['ceph'].conf:
- self._ctx.ceph['ceph'].conf[subsys] = {}
- self._ctx.ceph['ceph'].conf[subsys][key] = value
- write_conf(self._ctx) # XXX because we don't have the ceph task's config object, if they
- # used a different config path this won't work.
-
- def clear_ceph_conf(self, subsys, key):
- del self._ctx.ceph['ceph'].conf[subsys][key]
- write_conf(self._ctx)
-
- def json_asok(self, command, service_type, service_id):
- proc = self.mon_manager.admin_socket(service_type, service_id, command)
- response_data = proc.stdout.getvalue()
- log.info("_json_asok output: {0}".format(response_data))
- if response_data.strip():
- return json.loads(response_data)
- else:
- return None
-
-
-class MDSCluster(CephCluster):
- """
- Collective operations on all the MDS daemons in the Ceph cluster. These
- daemons may be in use by various Filesystems.
-
- For the benefit of pre-multi-filesystem tests, this class is also
- a parent of Filesystem. The correct way to use MDSCluster going forward is
- as a separate instance outside of your (multiple) Filesystem instances.
- """
- def __init__(self, ctx):
- super(MDSCluster, self).__init__(ctx)
-
- self.mds_ids = list(misc.all_roles_of_type(ctx.cluster, 'mds'))
-
- if len(self.mds_ids) == 0:
- raise RuntimeError("This task requires at least one MDS")
-
- if hasattr(self._ctx, "daemons"):
- # Presence of 'daemons' attribute implies ceph task rather than ceph_deploy task
- self.mds_daemons = dict([(mds_id, self._ctx.daemons.get_daemon('mds', mds_id)) for mds_id in self.mds_ids])
-
- def _one_or_all(self, mds_id, cb, in_parallel=True):
- """
- Call a callback for a single named MDS, or for all.
-
- Note that the parallelism here isn't for performance, it's to avoid being overly kind
- to the cluster by waiting a graceful ssh-latency of time between doing things, and to
- avoid being overly kind by executing them in a particular order. However, some actions
- don't cope with being done in parallel, so it's optional (`in_parallel`)
-
- :param mds_id: MDS daemon name, or None
- :param cb: Callback taking single argument of MDS daemon name
- :param in_parallel: whether to invoke callbacks concurrently (else one after the other)
- """
- if mds_id is None:
- if in_parallel:
- with parallel() as p:
- for mds_id in self.mds_ids:
- p.spawn(cb, mds_id)
- else:
- for mds_id in self.mds_ids:
- cb(mds_id)
- else:
- cb(mds_id)
-
- def get_config(self, key, service_type=None):
- """
- get_config specialization of service_type="mds"
- """
- if service_type != "mds":
- return super(MDSCluster, self).get_config(key, service_type)
-
- # Some tests stop MDS daemons, don't send commands to a dead one:
- service_id = random.sample(filter(lambda i: self.mds_daemons[i].running(), self.mds_daemons), 1)[0]
- return self.json_asok(['config', 'get', key], service_type, service_id)[key]
-
- def mds_stop(self, mds_id=None):
- """
- Stop the MDS daemon process(se). If it held a rank, that rank
- will eventually go laggy.
- """
- self._one_or_all(mds_id, lambda id_: self.mds_daemons[id_].stop())
-
- def mds_fail(self, mds_id=None):
- """
- Inform MDSMonitor of the death of the daemon process(es). If it held
- a rank, that rank will be relinquished.
- """
- self._one_or_all(mds_id, lambda id_: self.mon_manager.raw_cluster_cmd("mds", "fail", id_))
-
- def mds_restart(self, mds_id=None):
- self._one_or_all(mds_id, lambda id_: self.mds_daemons[id_].restart())
-
- def mds_fail_restart(self, mds_id=None):
- """
- Variation on restart that includes marking MDSs as failed, so that doing this
- operation followed by waiting for healthy daemon states guarantees that they
- have gone down and come up, rather than potentially seeing the healthy states
- that existed before the restart.
- """
- def _fail_restart(id_):
- self.mds_daemons[id_].stop()
- self.mon_manager.raw_cluster_cmd("mds", "fail", id_)
- self.mds_daemons[id_].restart()
-
- self._one_or_all(mds_id, _fail_restart)
-
- def newfs(self, name='cephfs', create=True):
- return Filesystem(self._ctx, name=name, create=create)
-
- def status(self):
- return FSStatus(self.mon_manager)
-
- def delete_all_filesystems(self):
- """
- Remove all filesystems that exist, and any pools in use by them.
- """
- pools = json.loads(self.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['pools']
- pool_id_name = {}
- for pool in pools:
- pool_id_name[pool['pool']] = pool['pool_name']
-
- # mark cluster down for each fs to prevent churn during deletion
- status = self.status()
- for fs in status.get_filesystems():
- self.mon_manager.raw_cluster_cmd("fs", "set", fs['mdsmap']['fs_name'], "cluster_down", "true")
-
- # get a new copy as actives may have since changed
- status = self.status()
- for fs in status.get_filesystems():
- mdsmap = fs['mdsmap']
- metadata_pool = pool_id_name[mdsmap['metadata_pool']]
-
- for gid in mdsmap['up'].values():
- self.mon_manager.raw_cluster_cmd('mds', 'fail', gid.__str__())
-
- self.mon_manager.raw_cluster_cmd('fs', 'rm', mdsmap['fs_name'], '--yes-i-really-mean-it')
- self.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete',
- metadata_pool, metadata_pool,
- '--yes-i-really-really-mean-it')
- for data_pool in mdsmap['data_pools']:
- data_pool = pool_id_name[data_pool]
- try:
- self.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete',
- data_pool, data_pool,
- '--yes-i-really-really-mean-it')
- except CommandFailedError as e:
- if e.exitstatus == 16: # EBUSY, this data pool is used
- pass # by two metadata pools, let the 2nd
- else: # pass delete it
- raise
-
- def get_standby_daemons(self):
- return set([s['name'] for s in self.status().get_standbys()])
-
- def get_mds_hostnames(self):
- result = set()
- for mds_id in self.mds_ids:
- mds_remote = self.mon_manager.find_remote('mds', mds_id)
- result.add(mds_remote.hostname)
-
- return list(result)
-
- def set_clients_block(self, blocked, mds_id=None):
- """
- Block (using iptables) client communications to this MDS. Be careful: if
- other services are running on this MDS, or other MDSs try to talk to this
- MDS, their communications may also be blocked as collatoral damage.
-
- :param mds_id: Optional ID of MDS to block, default to all
- :return:
- """
- da_flag = "-A" if blocked else "-D"
-
- def set_block(_mds_id):
- remote = self.mon_manager.find_remote('mds', _mds_id)
- status = self.status()
-
- addr = status.get_mds_addr(_mds_id)
- ip_str, port_str, inst_str = re.match("(.+):(.+)/(.+)", addr).groups()
-
- remote.run(
- args=["sudo", "iptables", da_flag, "OUTPUT", "-p", "tcp", "--sport", port_str, "-j", "REJECT", "-m",
- "comment", "--comment", "teuthology"])
- remote.run(
- args=["sudo", "iptables", da_flag, "INPUT", "-p", "tcp", "--dport", port_str, "-j", "REJECT", "-m",
- "comment", "--comment", "teuthology"])
-
- self._one_or_all(mds_id, set_block, in_parallel=False)
-
- def clear_firewall(self):
- clear_firewall(self._ctx)
-
- def get_mds_info(self, mds_id):
- return FSStatus(self.mon_manager).get_mds(mds_id)
-
- def is_full(self):
- flags = json.loads(self.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['flags']
- return 'full' in flags
-
- def is_pool_full(self, pool_name):
- pools = json.loads(self.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['pools']
- for pool in pools:
- if pool['pool_name'] == pool_name:
- return 'full' in pool['flags_names'].split(",")
-
- raise RuntimeError("Pool not found '{0}'".format(pool_name))
-
-class Filesystem(MDSCluster):
- """
- This object is for driving a CephFS filesystem. The MDS daemons driven by
- MDSCluster may be shared with other Filesystems.
- """
- def __init__(self, ctx, fscid=None, name=None, create=False,
- ec_profile=None):
- super(Filesystem, self).__init__(ctx)
-
- self.name = name
- self.ec_profile = ec_profile
- self.id = None
- self.metadata_pool_name = None
- self.metadata_overlay = False
- self.data_pool_name = None
- self.data_pools = None
-
- client_list = list(misc.all_roles_of_type(self._ctx.cluster, 'client'))
- self.client_id = client_list[0]
- self.client_remote = list(misc.get_clients(ctx=ctx, roles=["client.{0}".format(self.client_id)]))[0][1]
-
- if name is not None:
- if fscid is not None:
- raise RuntimeError("cannot specify fscid when creating fs")
- if create and not self.legacy_configured():
- self.create()
- else:
- if fscid is not None:
- self.id = fscid
- self.getinfo(refresh = True)
-
- # Stash a reference to the first created filesystem on ctx, so
- # that if someone drops to the interactive shell they can easily
- # poke our methods.
- if not hasattr(self._ctx, "filesystem"):
- self._ctx.filesystem = self
-
- def getinfo(self, refresh = False):
- status = self.status()
- if self.id is not None:
- fsmap = status.get_fsmap(self.id)
- elif self.name is not None:
- fsmap = status.get_fsmap_byname(self.name)
- else:
- fss = [fs for fs in status.get_filesystems()]
- if len(fss) == 1:
- fsmap = fss[0]
- elif len(fss) == 0:
- raise RuntimeError("no file system available")
- else:
- raise RuntimeError("more than one file system available")
- self.id = fsmap['id']
- self.name = fsmap['mdsmap']['fs_name']
- self.get_pool_names(status = status, refresh = refresh)
- return status
-
- def set_metadata_overlay(self, overlay):
- if self.id is not None:
- raise RuntimeError("cannot specify fscid when configuring overlay")
- self.metadata_overlay = overlay
-
- def deactivate(self, rank):
- if rank < 0:
- raise RuntimeError("invalid rank")
- elif rank == 0:
- raise RuntimeError("cannot deactivate rank 0")
- self.mon_manager.raw_cluster_cmd("mds", "deactivate", "%d:%d" % (self.id, rank))
-
- def set_max_mds(self, max_mds):
- self.mon_manager.raw_cluster_cmd("fs", "set", self.name, "max_mds", "%d" % max_mds)
-
- def set_allow_dirfrags(self, yes):
- self.mon_manager.raw_cluster_cmd("fs", "set", self.name, "allow_dirfrags", str(yes).lower(), '--yes-i-really-mean-it')
-
- def get_pgs_per_fs_pool(self):
- """
- Calculate how many PGs to use when creating a pool, in order to avoid raising any
- health warnings about mon_pg_warn_min_per_osd
-
- :return: an integer number of PGs
- """
- pg_warn_min_per_osd = int(self.get_config('mon_pg_warn_min_per_osd'))
- osd_count = len(list(misc.all_roles_of_type(self._ctx.cluster, 'osd')))
- return pg_warn_min_per_osd * osd_count
-
- def create(self):
- if self.name is None:
- self.name = "cephfs"
- if self.metadata_pool_name is None:
- self.metadata_pool_name = "{0}_metadata".format(self.name)
- if self.data_pool_name is None:
- data_pool_name = "{0}_data".format(self.name)
- else:
- data_pool_name = self.data_pool_name
-
- log.info("Creating filesystem '{0}'".format(self.name))
-
- pgs_per_fs_pool = self.get_pgs_per_fs_pool()
-
- self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
- self.metadata_pool_name, pgs_per_fs_pool.__str__())
- if self.metadata_overlay:
- self.mon_manager.raw_cluster_cmd('fs', 'new',
- self.name, self.metadata_pool_name, data_pool_name,
- '--allow-dangerous-metadata-overlay')
- else:
- if self.ec_profile:
- log.info("EC profile is %s", self.ec_profile)
- cmd = ['osd', 'erasure-code-profile', 'set', data_pool_name]
- cmd.extend(self.ec_profile)
- self.mon_manager.raw_cluster_cmd(*cmd)
- self.mon_manager.raw_cluster_cmd(
- 'osd', 'pool', 'create',
- data_pool_name, pgs_per_fs_pool.__str__(), 'erasure',
- data_pool_name)
- self.mon_manager.raw_cluster_cmd(
- 'osd', 'pool', 'set',
- data_pool_name, 'allow_ec_overwrites', 'true')
- else:
- self.mon_manager.raw_cluster_cmd(
- 'osd', 'pool', 'create',
- data_pool_name, pgs_per_fs_pool.__str__())
- self.mon_manager.raw_cluster_cmd('fs', 'new',
- self.name, self.metadata_pool_name, data_pool_name)
- self.check_pool_application(self.metadata_pool_name)
- self.check_pool_application(data_pool_name)
- # Turn off spurious standby count warnings from modifying max_mds in tests.
- try:
- self.mon_manager.raw_cluster_cmd('fs', 'set', self.name, 'standby_count_wanted', '0')
- except CommandFailedError as e:
- if e.exitstatus == 22:
- # standby_count_wanted not available prior to luminous (upgrade tests would fail otherwise)
- pass
- else:
- raise
-
- self.getinfo(refresh = True)
-
-
- def check_pool_application(self, pool_name):
- osd_map = self.mon_manager.get_osd_dump_json()
- for pool in osd_map['pools']:
- if pool['pool_name'] == pool_name:
- if "application_metadata" in pool:
- if not "cephfs" in pool['application_metadata']:
- raise RuntimeError("Pool %p does not name cephfs as application!".\
- format(pool_name))
-
-
- def __del__(self):
- if getattr(self._ctx, "filesystem", None) == self:
- delattr(self._ctx, "filesystem")
-
- def exists(self):
- """
- Whether a filesystem exists in the mon's filesystem list
- """
- fs_list = json.loads(self.mon_manager.raw_cluster_cmd('fs', 'ls', '--format=json-pretty'))
- return self.name in [fs['name'] for fs in fs_list]
-
- def legacy_configured(self):
- """
- Check if a legacy (i.e. pre "fs new") filesystem configuration is present. If this is
- the case, the caller should avoid using Filesystem.create
- """
- try:
- out_text = self.mon_manager.raw_cluster_cmd('--format=json-pretty', 'osd', 'lspools')
- pools = json.loads(out_text)
- metadata_pool_exists = 'metadata' in [p['poolname'] for p in pools]
- if metadata_pool_exists:
- self.metadata_pool_name = 'metadata'
- except CommandFailedError as e:
- # For use in upgrade tests, Ceph cuttlefish and earlier don't support
- # structured output (--format) from the CLI.
- if e.exitstatus == 22:
- metadata_pool_exists = True
- else:
- raise
-
- return metadata_pool_exists
-
- def _df(self):
- return json.loads(self.mon_manager.raw_cluster_cmd("df", "--format=json-pretty"))
-
- def get_mds_map(self):
- return self.status().get_fsmap(self.id)['mdsmap']
-
- def add_data_pool(self, name):
- self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', name, self.get_pgs_per_fs_pool().__str__())
- self.mon_manager.raw_cluster_cmd('fs', 'add_data_pool', self.name, name)
- self.get_pool_names(refresh = True)
- for poolid, fs_name in self.data_pools.items():
- if name == fs_name:
- return poolid
- raise RuntimeError("could not get just created pool '{0}'".format(name))
-
- def get_pool_names(self, refresh = False, status = None):
- if refresh or self.metadata_pool_name is None or self.data_pools is None:
- if status is None:
- status = self.status()
- fsmap = status.get_fsmap(self.id)
-
- osd_map = self.mon_manager.get_osd_dump_json()
- id_to_name = {}
- for p in osd_map['pools']:
- id_to_name[p['pool']] = p['pool_name']
-
- self.metadata_pool_name = id_to_name[fsmap['mdsmap']['metadata_pool']]
- self.data_pools = {}
- for data_pool in fsmap['mdsmap']['data_pools']:
- self.data_pools[data_pool] = id_to_name[data_pool]
-
- def get_data_pool_name(self, refresh = False):
- if refresh or self.data_pools is None:
- self.get_pool_names(refresh = True)
- assert(len(self.data_pools) == 1)
- return self.data_pools.values()[0]
-
- def get_data_pool_id(self, refresh = False):
- """
- Don't call this if you have multiple data pools
- :return: integer
- """
- if refresh or self.data_pools is None:
- self.get_pool_names(refresh = True)
- assert(len(self.data_pools) == 1)
- return self.data_pools.keys()[0]
-
- def get_data_pool_names(self, refresh = False):
- if refresh or self.data_pools is None:
- self.get_pool_names(refresh = True)
- return self.data_pools.values()
-
- def get_metadata_pool_name(self):
- return self.metadata_pool_name
-
- def set_data_pool_name(self, name):
- if self.id is not None:
- raise RuntimeError("can't set filesystem name if its fscid is set")
- self.data_pool_name = name
-
- def get_namespace_id(self):
- return self.id
-
- def get_pool_df(self, pool_name):
- """
- Return a dict like:
- {u'bytes_used': 0, u'max_avail': 83848701, u'objects': 0, u'kb_used': 0}
- """
- for pool_df in self._df()['pools']:
- if pool_df['name'] == pool_name:
- return pool_df['stats']
-
- raise RuntimeError("Pool name '{0}' not found".format(pool_name))
-
- def get_usage(self):
- return self._df()['stats']['total_used_bytes']
-
- def are_daemons_healthy(self):
- """
- Return true if all daemons are in one of active, standby, standby-replay, and
- at least max_mds daemons are in 'active'.
-
- Unlike most of Filesystem, this function is tolerant of new-style `fs`
- commands being missing, because we are part of the ceph installation
- process during upgrade suites, so must fall back to old style commands
- when we get an EINVAL on a new style command.
-
- :return:
- """
-
- active_count = 0
- try:
- mds_map = self.get_mds_map()
- except CommandFailedError as cfe:
- # Old version, fall back to non-multi-fs commands
- if cfe.exitstatus == errno.EINVAL:
- mds_map = json.loads(
- self.mon_manager.raw_cluster_cmd('mds', 'dump', '--format=json'))
- else:
- raise
-
- log.info("are_daemons_healthy: mds map: {0}".format(mds_map))
-
- for mds_id, mds_status in mds_map['info'].items():
- if mds_status['state'] not in ["up:active", "up:standby", "up:standby-replay"]:
- log.warning("Unhealthy mds state {0}:{1}".format(mds_id, mds_status['state']))
- return False
- elif mds_status['state'] == 'up:active':
- active_count += 1
-
- log.info("are_daemons_healthy: {0}/{1}".format(
- active_count, mds_map['max_mds']
- ))
-
- if active_count >= mds_map['max_mds']:
- # The MDSMap says these guys are active, but let's check they really are
- for mds_id, mds_status in mds_map['info'].items():
- if mds_status['state'] == 'up:active':
- try:
- daemon_status = self.mds_asok(["status"], mds_id=mds_status['name'])
- except CommandFailedError as cfe:
- if cfe.exitstatus == errno.EINVAL:
- # Old version, can't do this check
- continue
- else:
- # MDS not even running
- return False
-
- if daemon_status['state'] != 'up:active':
- # MDS hasn't taken the latest map yet
- return False
-
- return True
- else:
- return False
-
- def get_daemon_names(self, state=None):
- """
- Return MDS daemon names of those daemons in the given state
- :param state:
- :return:
- """
- status = self.get_mds_map()
- result = []
- for mds_status in sorted(status['info'].values(), lambda a, b: cmp(a['rank'], b['rank'])):
- if mds_status['state'] == state or state is None:
- result.append(mds_status['name'])
-
- return result
-
- def get_active_names(self):
- """
- Return MDS daemon names of those daemons holding ranks
- in state up:active
-
- :return: list of strings like ['a', 'b'], sorted by rank
- """
- return self.get_daemon_names("up:active")
-
- def get_all_mds_rank(self):
- status = self.get_mds_map()
- result = []
- for mds_status in sorted(status['info'].values(), lambda a, b: cmp(a['rank'], b['rank'])):
- if mds_status['rank'] != -1 and mds_status['state'] != 'up:standby-replay':
- result.append(mds_status['rank'])
-
- return result
-
- def get_rank_names(self):
- """
- Return MDS daemon names of those daemons holding a rank,
- sorted by rank. This includes e.g. up:replay/reconnect
- as well as active, but does not include standby or
- standby-replay.
- """
- status = self.get_mds_map()
- result = []
- for mds_status in sorted(status['info'].values(), lambda a, b: cmp(a['rank'], b['rank'])):
- if mds_status['rank'] != -1 and mds_status['state'] != 'up:standby-replay':
- result.append(mds_status['name'])
-
- return result
-
- def wait_for_daemons(self, timeout=None):
- """
- Wait until all daemons are healthy
- :return:
- """
-
- if timeout is None:
- timeout = DAEMON_WAIT_TIMEOUT
-
- elapsed = 0
- while True:
- if self.are_daemons_healthy():
- return
- else:
- time.sleep(1)
- elapsed += 1
-
- if elapsed > timeout:
- raise RuntimeError("Timed out waiting for MDS daemons to become healthy")
-
- def get_lone_mds_id(self):
- """
- Get a single MDS ID: the only one if there is only one
- configured, else the only one currently holding a rank,
- else raise an error.
- """
- if len(self.mds_ids) != 1:
- alive = self.get_rank_names()
- if len(alive) == 1:
- return alive[0]
- else:
- raise ValueError("Explicit MDS argument required when multiple MDSs in use")
- else:
- return self.mds_ids[0]
-
- def recreate(self):
- log.info("Creating new filesystem")
- self.delete_all_filesystems()
- self.id = None
- self.create()
-
- def put_metadata_object_raw(self, object_id, infile):
- """
- Save an object to the metadata pool
- """
- temp_bin_path = infile
- self.client_remote.run(args=[
- 'sudo', os.path.join(self._prefix, 'rados'), '-p', self.metadata_pool_name, 'put', object_id, temp_bin_path
- ])
-
- def get_metadata_object_raw(self, object_id):
- """
- Retrieve an object from the metadata pool and store it in a file.
- """
- temp_bin_path = '/tmp/' + object_id + '.bin'
-
- self.client_remote.run(args=[
- 'sudo', os.path.join(self._prefix, 'rados'), '-p', self.metadata_pool_name, 'get', object_id, temp_bin_path
- ])
-
- return temp_bin_path
-
- def get_metadata_object(self, object_type, object_id):
- """
- Retrieve an object from the metadata pool, pass it through
- ceph-dencoder to dump it to JSON, and return the decoded object.
- """
- temp_bin_path = '/tmp/out.bin'
-
- self.client_remote.run(args=[
- 'sudo', os.path.join(self._prefix, 'rados'), '-p', self.metadata_pool_name, 'get', object_id, temp_bin_path
- ])
-
- stdout = StringIO()
- self.client_remote.run(args=[
- 'sudo', os.path.join(self._prefix, 'ceph-dencoder'), 'type', object_type, 'import', temp_bin_path, 'decode', 'dump_json'
- ], stdout=stdout)
- dump_json = stdout.getvalue().strip()
- try:
- dump = json.loads(dump_json)
- except (TypeError, ValueError):
- log.error("Failed to decode JSON: '{0}'".format(dump_json))
- raise
-
- return dump
-
- def get_journal_version(self):
- """
- Read the JournalPointer and Journal::Header objects to learn the version of
- encoding in use.
- """
- journal_pointer_object = '400.00000000'
- journal_pointer_dump = self.get_metadata_object("JournalPointer", journal_pointer_object)
- journal_ino = journal_pointer_dump['journal_pointer']['front']
-
- journal_header_object = "{0:x}.00000000".format(journal_ino)
- journal_header_dump = self.get_metadata_object('Journaler::Header', journal_header_object)
-
- version = journal_header_dump['journal_header']['stream_format']
- log.info("Read journal version {0}".format(version))
-
- return version
-
- def mds_asok(self, command, mds_id=None):
- if mds_id is None:
- mds_id = self.get_lone_mds_id()
-
- return self.json_asok(command, 'mds', mds_id)
-
- def read_cache(self, path, depth=None):
- cmd = ["dump", "tree", path]
- if depth is not None:
- cmd.append(depth.__str__())
- result = self.mds_asok(cmd)
- if len(result) == 0:
- raise RuntimeError("Path not found in cache: {0}".format(path))
-
- return result
-
- def wait_for_state(self, goal_state, reject=None, timeout=None, mds_id=None, rank=None):
- """
- Block until the MDS reaches a particular state, or a failure condition
- is met.
-
- When there are multiple MDSs, succeed when exaclty one MDS is in the
- goal state, or fail when any MDS is in the reject state.
-
- :param goal_state: Return once the MDS is in this state
- :param reject: Fail if the MDS enters this state before the goal state
- :param timeout: Fail if this many seconds pass before reaching goal
- :return: number of seconds waited, rounded down to integer
- """
-
- started_at = time.time()
- while True:
- status = self.status()
- if rank is not None:
- mds_info = status.get_rank(self.id, rank)
- current_state = mds_info['state'] if mds_info else None
- log.info("Looked up MDS state for mds.{0}: {1}".format(rank, current_state))
- elif mds_id is not None:
- # mds_info is None if no daemon with this ID exists in the map
- mds_info = status.get_mds(mds_id)
- current_state = mds_info['state'] if mds_info else None
- log.info("Looked up MDS state for {0}: {1}".format(mds_id, current_state))
- else:
- # In general, look for a single MDS
- states = [m['state'] for m in status.get_ranks(self.id)]
- if [s for s in states if s == goal_state] == [goal_state]:
- current_state = goal_state
- elif reject in states:
- current_state = reject
- else:
- current_state = None
- log.info("mapped states {0} to {1}".format(states, current_state))
-
- elapsed = time.time() - started_at
- if current_state == goal_state:
- log.info("reached state '{0}' in {1}s".format(current_state, elapsed))
- return elapsed
- elif reject is not None and current_state == reject:
- raise RuntimeError("MDS in reject state {0}".format(current_state))
- elif timeout is not None and elapsed > timeout:
- log.error("MDS status at timeout: {0}".format(status.get_fsmap(self.id)))
- raise RuntimeError(
- "Reached timeout after {0} seconds waiting for state {1}, while in state {2}".format(
- elapsed, goal_state, current_state
- ))
- else:
- time.sleep(1)
-
- def _read_data_xattr(self, ino_no, xattr_name, type, pool):
- mds_id = self.mds_ids[0]
- remote = self.mds_daemons[mds_id].remote
- if pool is None:
- pool = self.get_data_pool_name()
-
- obj_name = "{0:x}.00000000".format(ino_no)
-
- args = [
- os.path.join(self._prefix, "rados"), "-p", pool, "getxattr", obj_name, xattr_name
- ]
- try:
- proc = remote.run(
- args=args,
- stdout=StringIO())
- except CommandFailedError as e:
- log.error(e.__str__())
- raise ObjectNotFound(obj_name)
-
- data = proc.stdout.getvalue()
-
- p = remote.run(
- args=[os.path.join(self._prefix, "ceph-dencoder"), "type", type, "import", "-", "decode", "dump_json"],
- stdout=StringIO(),
- stdin=data
- )
-
- return json.loads(p.stdout.getvalue().strip())
-
- def _write_data_xattr(self, ino_no, xattr_name, data, pool=None):
- """
- Write to an xattr of the 0th data object of an inode. Will
- succeed whether the object and/or xattr already exist or not.
-
- :param ino_no: integer inode number
- :param xattr_name: string name of the xattr
- :param data: byte array data to write to the xattr
- :param pool: name of data pool or None to use primary data pool
- :return: None
- """
- remote = self.mds_daemons[self.mds_ids[0]].remote
- if pool is None:
- pool = self.get_data_pool_name()
-
- obj_name = "{0:x}.00000000".format(ino_no)
- args = [
- os.path.join(self._prefix, "rados"), "-p", pool, "setxattr",
- obj_name, xattr_name, data
- ]
- remote.run(
- args=args,
- stdout=StringIO())
-
- def read_backtrace(self, ino_no, pool=None):
- """
- Read the backtrace from the data pool, return a dict in the format
- given by inode_backtrace_t::dump, which is something like:
-
- ::
-
- rados -p cephfs_data getxattr 10000000002.00000000 parent > out.bin
- ceph-dencoder type inode_backtrace_t import out.bin decode dump_json
-
- { "ino": 1099511627778,
- "ancestors": [
- { "dirino": 1,
- "dname": "blah",
- "version": 11}],
- "pool": 1,
- "old_pools": []}
-
- :param pool: name of pool to read backtrace from. If omitted, FS must have only
- one data pool and that will be used.
- """
- return self._read_data_xattr(ino_no, "parent", "inode_backtrace_t", pool)
-
- def read_layout(self, ino_no, pool=None):
- """
- Read 'layout' xattr of an inode and parse the result, returning a dict like:
- ::
- {
- "stripe_unit": 4194304,
- "stripe_count": 1,
- "object_size": 4194304,
- "pool_id": 1,
- "pool_ns": "",
- }
-
- :param pool: name of pool to read backtrace from. If omitted, FS must have only
- one data pool and that will be used.
- """
- return self._read_data_xattr(ino_no, "layout", "file_layout_t", pool)
-
- def _enumerate_data_objects(self, ino, size):
- """
- Get the list of expected data objects for a range, and the list of objects
- that really exist.
-
- :return a tuple of two lists of strings (expected, actual)
- """
- stripe_size = 1024 * 1024 * 4
-
- size = max(stripe_size, size)
-
- want_objects = [
- "{0:x}.{1:08x}".format(ino, n)
- for n in range(0, ((size - 1) / stripe_size) + 1)
- ]
-
- exist_objects = self.rados(["ls"], pool=self.get_data_pool_name()).split("\n")
-
- return want_objects, exist_objects
-
- def data_objects_present(self, ino, size):
- """
- Check that *all* the expected data objects for an inode are present in the data pool
- """
-
- want_objects, exist_objects = self._enumerate_data_objects(ino, size)
- missing = set(want_objects) - set(exist_objects)
-
- if missing:
- log.info("Objects missing (ino {0}, size {1}): {2}".format(
- ino, size, missing
- ))
- return False
- else:
- log.info("All objects for ino {0} size {1} found".format(ino, size))
- return True
-
- def data_objects_absent(self, ino, size):
- want_objects, exist_objects = self._enumerate_data_objects(ino, size)
- present = set(want_objects) & set(exist_objects)
-
- if present:
- log.info("Objects not absent (ino {0}, size {1}): {2}".format(
- ino, size, present
- ))
- return False
- else:
- log.info("All objects for ino {0} size {1} are absent".format(ino, size))
- return True
-
- def dirfrag_exists(self, ino, frag):
- try:
- self.rados(["stat", "{0:x}.{1:08x}".format(ino, frag)])
- except CommandFailedError as e:
- return False
- else:
- return True
-
- def rados(self, args, pool=None, namespace=None, stdin_data=None):
- """
- Call into the `rados` CLI from an MDS
- """
-
- if pool is None:
- pool = self.get_metadata_pool_name()
-
- # Doesn't matter which MDS we use to run rados commands, they all
- # have access to the pools
- mds_id = self.mds_ids[0]
- remote = self.mds_daemons[mds_id].remote
-
- # NB we could alternatively use librados pybindings for this, but it's a one-liner
- # using the `rados` CLI
- args = ([os.path.join(self._prefix, "rados"), "-p", pool] +
- (["--namespace", namespace] if namespace else []) +
- args)
- p = remote.run(
- args=args,
- stdin=stdin_data,
- stdout=StringIO())
- return p.stdout.getvalue().strip()
-
- def list_dirfrag(self, dir_ino):
- """
- Read the named object and return the list of omap keys
-
- :return a list of 0 or more strings
- """
-
- dirfrag_obj_name = "{0:x}.00000000".format(dir_ino)
-
- try:
- key_list_str = self.rados(["listomapkeys", dirfrag_obj_name])
- except CommandFailedError as e:
- log.error(e.__str__())
- raise ObjectNotFound(dirfrag_obj_name)
-
- return key_list_str.split("\n") if key_list_str else []
-
- def erase_metadata_objects(self, prefix):
- """
- For all objects in the metadata pool matching the prefix,
- erase them.
-
- This O(N) with the number of objects in the pool, so only suitable
- for use on toy test filesystems.
- """
- all_objects = self.rados(["ls"]).split("\n")
- matching_objects = [o for o in all_objects if o.startswith(prefix)]
- for o in matching_objects:
- self.rados(["rm", o])
-
- def erase_mds_objects(self, rank):
- """
- Erase all the per-MDS objects for a particular rank. This includes
- inotable, sessiontable, journal
- """
-
- def obj_prefix(multiplier):
- """
- MDS object naming conventions like rank 1's
- journal is at 201.***
- """
- return "%x." % (multiplier * 0x100 + rank)
-
- # MDS_INO_LOG_OFFSET
- self.erase_metadata_objects(obj_prefix(2))
- # MDS_INO_LOG_BACKUP_OFFSET
- self.erase_metadata_objects(obj_prefix(3))
- # MDS_INO_LOG_POINTER_OFFSET
- self.erase_metadata_objects(obj_prefix(4))
- # MDSTables & SessionMap
- self.erase_metadata_objects("mds{rank:d}_".format(rank=rank))
-
- @property
- def _prefix(self):
- """
- Override this to set a different
- """
- return ""
-
- def _run_tool(self, tool, args, rank=None, quiet=False):
- # Tests frequently have [client] configuration that jacks up
- # the objecter log level (unlikely to be interesting here)
- # and does not set the mds log level (very interesting here)
- if quiet:
- base_args = [os.path.join(self._prefix, tool), '--debug-mds=1', '--debug-objecter=1']
- else:
- base_args = [os.path.join(self._prefix, tool), '--debug-mds=4', '--debug-objecter=1']
-
- if rank is not None:
- base_args.extend(["--rank", "%d" % rank])
-
- t1 = datetime.datetime.now()
- r = self.tool_remote.run(
- args=base_args + args,
- stdout=StringIO()).stdout.getvalue().strip()
- duration = datetime.datetime.now() - t1
- log.info("Ran {0} in time {1}, result:\n{2}".format(
- base_args + args, duration, r
- ))
- return r
-
- @property
- def tool_remote(self):
- """
- An arbitrary remote to use when invoking recovery tools. Use an MDS host because
- it'll definitely have keys with perms to access cephfs metadata pool. This is public
- so that tests can use this remote to go get locally written output files from the tools.
- """
- mds_id = self.mds_ids[0]
- return self.mds_daemons[mds_id].remote
-
- def journal_tool(self, args, rank=None, quiet=False):
- """
- Invoke cephfs-journal-tool with the passed arguments, and return its stdout
- """
- return self._run_tool("cephfs-journal-tool", args, rank, quiet)
-
- def table_tool(self, args, quiet=False):
- """
- Invoke cephfs-table-tool with the passed arguments, and return its stdout
- """
- return self._run_tool("cephfs-table-tool", args, None, quiet)
-
- def data_scan(self, args, quiet=False, worker_count=1):
- """
- Invoke cephfs-data-scan with the passed arguments, and return its stdout
-
- :param worker_count: if greater than 1, multiple workers will be run
- in parallel and the return value will be None
- """
-
- workers = []
-
- for n in range(0, worker_count):
- if worker_count > 1:
- # data-scan args first token is a command, followed by args to it.
- # insert worker arguments after the command.
- cmd = args[0]
- worker_args = [cmd] + ["--worker_n", n.__str__(), "--worker_m", worker_count.__str__()] + args[1:]
- else:
- worker_args = args
-
- workers.append(Greenlet.spawn(lambda wargs=worker_args:
- self._run_tool("cephfs-data-scan", wargs, None, quiet)))
-
- for w in workers:
- w.get()
-
- if worker_count == 1:
- return workers[0].value
- else:
- return None
diff --git a/src/ceph/qa/tasks/cephfs/fuse_mount.py b/src/ceph/qa/tasks/cephfs/fuse_mount.py
deleted file mode 100644
index 8d8410c..0000000
--- a/src/ceph/qa/tasks/cephfs/fuse_mount.py
+++ /dev/null
@@ -1,428 +0,0 @@
-
-from StringIO import StringIO
-import json
-import time
-import logging
-from textwrap import dedent
-
-from teuthology import misc
-from teuthology.contextutil import MaxWhileTries
-from teuthology.orchestra import run
-from teuthology.orchestra.run import CommandFailedError
-from .mount import CephFSMount
-
-log = logging.getLogger(__name__)
-
-
-class FuseMount(CephFSMount):
- def __init__(self, client_config, test_dir, client_id, client_remote):
- super(FuseMount, self).__init__(test_dir, client_id, client_remote)
-
- self.client_config = client_config if client_config else {}
- self.fuse_daemon = None
- self._fuse_conn = None
-
- def mount(self, mount_path=None, mount_fs_name=None):
- try:
- return self._mount(mount_path, mount_fs_name)
- except RuntimeError:
- # Catch exceptions by the mount() logic (i.e. not remote command
- # failures) and ensure the mount is not left half-up.
- # Otherwise we might leave a zombie mount point that causes
- # anyone traversing cephtest/ to get hung up on.
- log.warn("Trying to clean up after failed mount")
- self.umount_wait(force=True)
- raise
-
- def _mount(self, mount_path, mount_fs_name):
- log.info("Client client.%s config is %s" % (self.client_id, self.client_config))
-
- daemon_signal = 'kill'
- if self.client_config.get('coverage') or self.client_config.get('valgrind') is not None:
- daemon_signal = 'term'
-
- log.info('Mounting ceph-fuse client.{id} at {remote} {mnt}...'.format(
- id=self.client_id, remote=self.client_remote, mnt=self.mountpoint))
-
- self.client_remote.run(
- args=[
- 'mkdir',
- '--',
- self.mountpoint,
- ],
- )
-
- run_cmd = [
- 'sudo',
- 'adjust-ulimits',
- 'ceph-coverage',
- '{tdir}/archive/coverage'.format(tdir=self.test_dir),
- 'daemon-helper',
- daemon_signal,
- ]
-
- fuse_cmd = ['ceph-fuse', "-f"]
-
- if mount_path is not None:
- fuse_cmd += ["--client_mountpoint={0}".format(mount_path)]
-
- if mount_fs_name is not None:
- fuse_cmd += ["--client_mds_namespace={0}".format(mount_fs_name)]
-
- fuse_cmd += [
- '--name', 'client.{id}'.format(id=self.client_id),
- # TODO ceph-fuse doesn't understand dash dash '--',
- self.mountpoint,
- ]
-
- if self.client_config.get('valgrind') is not None:
- run_cmd = misc.get_valgrind_args(
- self.test_dir,
- 'client.{id}'.format(id=self.client_id),
- run_cmd,
- self.client_config.get('valgrind'),
- )
-
- run_cmd.extend(fuse_cmd)
-
- def list_connections():
- self.client_remote.run(
- args=["sudo", "mount", "-t", "fusectl", "/sys/fs/fuse/connections", "/sys/fs/fuse/connections"],
- check_status=False
- )
- p = self.client_remote.run(
- args=["ls", "/sys/fs/fuse/connections"],
- stdout=StringIO(),
- check_status=False
- )
- if p.exitstatus != 0:
- return []
-
- ls_str = p.stdout.getvalue().strip()
- if ls_str:
- return [int(n) for n in ls_str.split("\n")]
- else:
- return []
-
- # Before starting ceph-fuse process, note the contents of
- # /sys/fs/fuse/connections
- pre_mount_conns = list_connections()
- log.info("Pre-mount connections: {0}".format(pre_mount_conns))
-
- proc = self.client_remote.run(
- args=run_cmd,
- logger=log.getChild('ceph-fuse.{id}'.format(id=self.client_id)),
- stdin=run.PIPE,
- wait=False,
- )
- self.fuse_daemon = proc
-
- # Wait for the connection reference to appear in /sys
- mount_wait = self.client_config.get('mount_wait', 0)
- if mount_wait > 0:
- log.info("Fuse mount waits {0} seconds before checking /sys/".format(mount_wait))
- time.sleep(mount_wait)
- timeout = int(self.client_config.get('mount_timeout', 30))
- waited = 0
-
- post_mount_conns = list_connections()
- while len(post_mount_conns) <= len(pre_mount_conns):
- if self.fuse_daemon.finished:
- # Did mount fail? Raise the CommandFailedError instead of
- # hitting the "failed to populate /sys/" timeout
- self.fuse_daemon.wait()
- time.sleep(1)
- waited += 1
- if waited > timeout:
- raise RuntimeError("Fuse mount failed to populate /sys/ after {0} seconds".format(
- waited
- ))
- else:
- post_mount_conns = list_connections()
-
- log.info("Post-mount connections: {0}".format(post_mount_conns))
-
- # Record our fuse connection number so that we can use it when
- # forcing an unmount
- new_conns = list(set(post_mount_conns) - set(pre_mount_conns))
- if len(new_conns) == 0:
- raise RuntimeError("New fuse connection directory not found ({0})".format(new_conns))
- elif len(new_conns) > 1:
- raise RuntimeError("Unexpectedly numerous fuse connections {0}".format(new_conns))
- else:
- self._fuse_conn = new_conns[0]
-
- def is_mounted(self):
- proc = self.client_remote.run(
- args=[
- 'stat',
- '--file-system',
- '--printf=%T\n',
- '--',
- self.mountpoint,
- ],
- stdout=StringIO(),
- stderr=StringIO(),
- wait=False
- )
- try:
- proc.wait()
- except CommandFailedError:
- if ("endpoint is not connected" in proc.stderr.getvalue()
- or "Software caused connection abort" in proc.stderr.getvalue()):
- # This happens is fuse is killed without unmount
- log.warn("Found stale moutn point at {0}".format(self.mountpoint))
- return True
- else:
- # This happens if the mount directory doesn't exist
- log.info('mount point does not exist: %s', self.mountpoint)
- return False
-
- fstype = proc.stdout.getvalue().rstrip('\n')
- if fstype == 'fuseblk':
- log.info('ceph-fuse is mounted on %s', self.mountpoint)
- return True
- else:
- log.debug('ceph-fuse not mounted, got fs type {fstype!r}'.format(
- fstype=fstype))
- return False
-
- def wait_until_mounted(self):
- """
- Check to make sure that fuse is mounted on mountpoint. If not,
- sleep for 5 seconds and check again.
- """
-
- while not self.is_mounted():
- # Even if it's not mounted, it should at least
- # be running: catch simple failures where it has terminated.
- assert not self.fuse_daemon.poll()
-
- time.sleep(5)
-
- # Now that we're mounted, set permissions so that the rest of the test will have
- # unrestricted access to the filesystem mount.
- self.client_remote.run(
- args=['sudo', 'chmod', '1777', self.mountpoint])
-
- def _mountpoint_exists(self):
- return self.client_remote.run(args=["ls", "-d", self.mountpoint], check_status=False).exitstatus == 0
-
- def umount(self):
- try:
- log.info('Running fusermount -u on {name}...'.format(name=self.client_remote.name))
- self.client_remote.run(
- args=[
- 'sudo',
- 'fusermount',
- '-u',
- self.mountpoint,
- ],
- )
- except run.CommandFailedError:
- log.info('Failed to unmount ceph-fuse on {name}, aborting...'.format(name=self.client_remote.name))
-
- self.client_remote.run(args=[
- 'sudo',
- run.Raw('PATH=/usr/sbin:$PATH'),
- 'lsof',
- run.Raw(';'),
- 'ps',
- 'auxf',
- ])
-
- # abort the fuse mount, killing all hung processes
- if self._fuse_conn:
- self.run_python(dedent("""
- import os
- path = "/sys/fs/fuse/connections/{0}/abort"
- if os.path.exists(path):
- open(path, "w").write("1")
- """).format(self._fuse_conn))
- self._fuse_conn = None
-
- stderr = StringIO()
- try:
- # make sure its unmounted
- self.client_remote.run(
- args=[
- 'sudo',
- 'umount',
- '-l',
- '-f',
- self.mountpoint,
- ],
- stderr=stderr
- )
- except CommandFailedError:
- if self.is_mounted():
- raise
-
- assert not self.is_mounted()
- self._fuse_conn = None
-
- def umount_wait(self, force=False, require_clean=False):
- """
- :param force: Complete cleanly even if the MDS is offline
- """
- if force:
- assert not require_clean # mutually exclusive
-
- # When we expect to be forcing, kill the ceph-fuse process directly.
- # This should avoid hitting the more aggressive fallback killing
- # in umount() which can affect other mounts too.
- self.fuse_daemon.stdin.close()
-
- # However, we will still hit the aggressive wait if there is an ongoing
- # mount -o remount (especially if the remount is stuck because MDSs
- # are unavailable)
-
- self.umount()
-
- try:
- if self.fuse_daemon:
- # Permit a timeout, so that we do not block forever
- run.wait([self.fuse_daemon], 900)
- except MaxWhileTries:
- log.error("process failed to terminate after unmount. This probably"
- "indicates a bug within ceph-fuse.")
- raise
- except CommandFailedError:
- if require_clean:
- raise
-
- self.cleanup()
-
- def cleanup(self):
- """
- Remove the mount point.
-
- Prerequisite: the client is not mounted.
- """
- stderr = StringIO()
- try:
- self.client_remote.run(
- args=[
- 'rmdir',
- '--',
- self.mountpoint,
- ],
- stderr=stderr
- )
- except CommandFailedError:
- if "No such file or directory" in stderr.getvalue():
- pass
- else:
- raise
-
- def kill(self):
- """
- Terminate the client without removing the mount point.
- """
- self.fuse_daemon.stdin.close()
- try:
- self.fuse_daemon.wait()
- except CommandFailedError:
- pass
-
- def kill_cleanup(self):
- """
- Follow up ``kill`` to get to a clean unmounted state.
- """
- self.umount()
- self.cleanup()
-
- def teardown(self):
- """
- Whatever the state of the mount, get it gone.
- """
- super(FuseMount, self).teardown()
-
- self.umount()
-
- if self.fuse_daemon and not self.fuse_daemon.finished:
- self.fuse_daemon.stdin.close()
- try:
- self.fuse_daemon.wait()
- except CommandFailedError:
- pass
-
- # Indiscriminate, unlike the touchier cleanup()
- self.client_remote.run(
- args=[
- 'rm',
- '-rf',
- self.mountpoint,
- ],
- )
-
- def _asok_path(self):
- return "/var/run/ceph/ceph-client.{0}.*.asok".format(self.client_id)
-
- @property
- def _prefix(self):
- return ""
-
- def admin_socket(self, args):
- pyscript = """
-import glob
-import re
-import os
-import subprocess
-
-def find_socket(client_name):
- asok_path = "{asok_path}"
- files = glob.glob(asok_path)
-
- # Given a non-glob path, it better be there
- if "*" not in asok_path:
- assert(len(files) == 1)
- return files[0]
-
- for f in files:
- pid = re.match(".*\.(\d+)\.asok$", f).group(1)
- if os.path.exists("/proc/{{0}}".format(pid)):
- return f
- raise RuntimeError("Client socket {{0}} not found".format(client_name))
-
-print find_socket("{client_name}")
-""".format(
- asok_path=self._asok_path(),
- client_name="client.{0}".format(self.client_id))
-
- # Find the admin socket
- p = self.client_remote.run(args=[
- 'python', '-c', pyscript
- ], stdout=StringIO())
- asok_path = p.stdout.getvalue().strip()
- log.info("Found client admin socket at {0}".format(asok_path))
-
- # Query client ID from admin socket
- p = self.client_remote.run(
- args=['sudo', self._prefix + 'ceph', '--admin-daemon', asok_path] + args,
- stdout=StringIO())
- return json.loads(p.stdout.getvalue())
-
- def get_global_id(self):
- """
- Look up the CephFS client ID for this mount
- """
-
- return self.admin_socket(['mds_sessions'])['id']
-
- def get_osd_epoch(self):
- """
- Return 2-tuple of osd_epoch, osd_epoch_barrier
- """
- status = self.admin_socket(['status'])
- return status['osd_epoch'], status['osd_epoch_barrier']
-
- def get_dentry_count(self):
- """
- Return 2-tuple of dentry_count, dentry_pinned_count
- """
- status = self.admin_socket(['status'])
- return status['dentry_count'], status['dentry_pinned_count']
-
- def set_cache_size(self, size):
- return self.admin_socket(['config', 'set', 'client_cache_size', str(size)])
diff --git a/src/ceph/qa/tasks/cephfs/kernel_mount.py b/src/ceph/qa/tasks/cephfs/kernel_mount.py
deleted file mode 100644
index bfa1ac6..0000000
--- a/src/ceph/qa/tasks/cephfs/kernel_mount.py
+++ /dev/null
@@ -1,267 +0,0 @@
-from StringIO import StringIO
-import json
-import logging
-from textwrap import dedent
-from teuthology.orchestra.run import CommandFailedError
-from teuthology import misc
-
-from teuthology.orchestra import remote as orchestra_remote
-from teuthology.orchestra import run
-from teuthology.contextutil import MaxWhileTries
-from .mount import CephFSMount
-
-log = logging.getLogger(__name__)
-
-
-UMOUNT_TIMEOUT = 300
-
-
-class KernelMount(CephFSMount):
- def __init__(self, mons, test_dir, client_id, client_remote,
- ipmi_user, ipmi_password, ipmi_domain):
- super(KernelMount, self).__init__(test_dir, client_id, client_remote)
- self.mons = mons
-
- self.mounted = False
- self.ipmi_user = ipmi_user
- self.ipmi_password = ipmi_password
- self.ipmi_domain = ipmi_domain
-
- def write_secret_file(self, remote, role, keyring, filename):
- """
- Stash the keyring in the filename specified.
- """
- remote.run(
- args=[
- 'adjust-ulimits',
- 'ceph-coverage',
- '{tdir}/archive/coverage'.format(tdir=self.test_dir),
- 'ceph-authtool',
- '--name={role}'.format(role=role),
- '--print-key',
- keyring,
- run.Raw('>'),
- filename,
- ],
- )
-
- def mount(self, mount_path=None, mount_fs_name=None):
- log.info('Mounting kclient client.{id} at {remote} {mnt}...'.format(
- id=self.client_id, remote=self.client_remote, mnt=self.mountpoint))
-
- keyring = self.get_keyring_path()
- secret = '{tdir}/ceph.data/client.{id}.secret'.format(tdir=self.test_dir, id=self.client_id)
- self.write_secret_file(self.client_remote, 'client.{id}'.format(id=self.client_id),
- keyring, secret)
-
- self.client_remote.run(
- args=[
- 'mkdir',
- '--',
- self.mountpoint,
- ],
- )
-
- if mount_path is None:
- mount_path = "/"
-
- opts = 'name={id},secretfile={secret},norequire_active_mds'.format(id=self.client_id,
- secret=secret)
-
- if mount_fs_name is not None:
- opts += ",mds_namespace={0}".format(mount_fs_name)
-
- self.client_remote.run(
- args=[
- 'sudo',
- 'adjust-ulimits',
- 'ceph-coverage',
- '{tdir}/archive/coverage'.format(tdir=self.test_dir),
- '/sbin/mount.ceph',
- '{mons}:{mount_path}'.format(mons=','.join(self.mons), mount_path=mount_path),
- self.mountpoint,
- '-v',
- '-o',
- opts
- ],
- )
-
- self.client_remote.run(
- args=['sudo', 'chmod', '1777', self.mountpoint])
-
- self.mounted = True
-
- def umount(self, force=False):
- log.debug('Unmounting client client.{id}...'.format(id=self.client_id))
-
- cmd=['sudo', 'umount', self.mountpoint]
- if force:
- cmd.append('-f')
-
- try:
- self.client_remote.run(args=cmd)
- except Exception as e:
- self.client_remote.run(args=[
- 'sudo',
- run.Raw('PATH=/usr/sbin:$PATH'),
- 'lsof',
- run.Raw(';'),
- 'ps', 'auxf',
- ])
- raise e
-
- rproc = self.client_remote.run(
- args=[
- 'rmdir',
- '--',
- self.mountpoint,
- ],
- wait=False
- )
- run.wait([rproc], UMOUNT_TIMEOUT)
- self.mounted = False
-
- def cleanup(self):
- pass
-
- def umount_wait(self, force=False, require_clean=False):
- """
- Unlike the fuse client, the kernel client's umount is immediate
- """
- if not self.is_mounted():
- return
-
- try:
- self.umount(force)
- except (CommandFailedError, MaxWhileTries):
- if not force:
- raise
-
- self.kill()
- self.kill_cleanup()
-
- self.mounted = False
-
- def is_mounted(self):
- return self.mounted
-
- def wait_until_mounted(self):
- """
- Unlike the fuse client, the kernel client is up and running as soon
- as the initial mount() function returns.
- """
- assert self.mounted
-
- def teardown(self):
- super(KernelMount, self).teardown()
- if self.mounted:
- self.umount()
-
- def kill(self):
- """
- The Ceph kernel client doesn't have a mechanism to kill itself (doing
- that in side the kernel would be weird anyway), so we reboot the whole node
- to get the same effect.
-
- We use IPMI to reboot, because we don't want the client to send any
- releases of capabilities.
- """
-
- con = orchestra_remote.getRemoteConsole(self.client_remote.hostname,
- self.ipmi_user,
- self.ipmi_password,
- self.ipmi_domain)
- con.power_off()
-
- self.mounted = False
-
- def kill_cleanup(self):
- assert not self.mounted
-
- con = orchestra_remote.getRemoteConsole(self.client_remote.hostname,
- self.ipmi_user,
- self.ipmi_password,
- self.ipmi_domain)
- con.power_on()
-
- # Wait for node to come back up after reboot
- misc.reconnect(None, 300, [self.client_remote])
-
- # Remove mount directory
- self.client_remote.run(
- args=[
- 'rmdir',
- '--',
- self.mountpoint,
- ],
- )
-
- def _find_debug_dir(self):
- """
- Find the debugfs folder for this mount
- """
- pyscript = dedent("""
- import glob
- import os
- import json
-
- def get_id_to_dir():
- result = {}
- for dir in glob.glob("/sys/kernel/debug/ceph/*"):
- mds_sessions_lines = open(os.path.join(dir, "mds_sessions")).readlines()
- client_id = mds_sessions_lines[1].split()[1].strip('"')
-
- result[client_id] = dir
- return result
-
- print json.dumps(get_id_to_dir())
- """)
-
- p = self.client_remote.run(args=[
- 'sudo', 'python', '-c', pyscript
- ], stdout=StringIO())
- client_id_to_dir = json.loads(p.stdout.getvalue())
-
- try:
- return client_id_to_dir[self.client_id]
- except KeyError:
- log.error("Client id '{0}' debug dir not found (clients seen were: {1})".format(
- self.client_id, ",".join(client_id_to_dir.keys())
- ))
- raise
-
- def _read_debug_file(self, filename):
- debug_dir = self._find_debug_dir()
-
- pyscript = dedent("""
- import os
-
- print open(os.path.join("{debug_dir}", "{filename}")).read()
- """).format(debug_dir=debug_dir, filename=filename)
-
- p = self.client_remote.run(args=[
- 'sudo', 'python', '-c', pyscript
- ], stdout=StringIO())
- return p.stdout.getvalue()
-
- def get_global_id(self):
- """
- Look up the CephFS client ID for this mount, using debugfs.
- """
-
- assert self.mounted
-
- mds_sessions = self._read_debug_file("mds_sessions")
- lines = mds_sessions.split("\n")
- return int(lines[0].split()[1])
-
- def get_osd_epoch(self):
- """
- Return 2-tuple of osd_epoch, osd_epoch_barrier
- """
- osd_map = self._read_debug_file("osdmap")
- lines = osd_map.split("\n")
- first_line_tokens = lines[0].split()
- epoch, barrier = int(first_line_tokens[1]), int(first_line_tokens[3])
-
- return epoch, barrier
diff --git a/src/ceph/qa/tasks/cephfs/mount.py b/src/ceph/qa/tasks/cephfs/mount.py
deleted file mode 100644
index 4f96e6c..0000000
--- a/src/ceph/qa/tasks/cephfs/mount.py
+++ /dev/null
@@ -1,627 +0,0 @@
-from contextlib import contextmanager
-import json
-import logging
-import datetime
-import time
-from textwrap import dedent
-import os
-from StringIO import StringIO
-from teuthology.orchestra import run
-from teuthology.orchestra.run import CommandFailedError, ConnectionLostError
-
-log = logging.getLogger(__name__)
-
-
-class CephFSMount(object):
- def __init__(self, test_dir, client_id, client_remote):
- """
- :param test_dir: Global teuthology test dir
- :param client_id: Client ID, the 'foo' in client.foo
- :param client_remote: Remote instance for the host where client will run
- """
-
- self.test_dir = test_dir
- self.client_id = client_id
- self.client_remote = client_remote
- self.mountpoint_dir_name = 'mnt.{id}'.format(id=self.client_id)
-
- self.test_files = ['a', 'b', 'c']
-
- self.background_procs = []
-
- @property
- def mountpoint(self):
- return os.path.join(
- self.test_dir, '{dir_name}'.format(dir_name=self.mountpoint_dir_name))
-
- def is_mounted(self):
- raise NotImplementedError()
-
- def mount(self, mount_path=None, mount_fs_name=None):
- raise NotImplementedError()
-
- def umount(self):
- raise NotImplementedError()
-
- def umount_wait(self, force=False, require_clean=False):
- """
-
- :param force: Expect that the mount will not shutdown cleanly: kill
- it hard.
- :param require_clean: Wait for the Ceph client associated with the
- mount (e.g. ceph-fuse) to terminate, and
- raise if it doesn't do so cleanly.
- :return:
- """
- raise NotImplementedError()
-
- def kill_cleanup(self):
- raise NotImplementedError()
-
- def kill(self):
- raise NotImplementedError()
-
- def cleanup(self):
- raise NotImplementedError()
-
- def wait_until_mounted(self):
- raise NotImplementedError()
-
- def get_keyring_path(self):
- return '/etc/ceph/ceph.client.{id}.keyring'.format(id=self.client_id)
-
- @property
- def config_path(self):
- """
- Path to ceph.conf: override this if you're not a normal systemwide ceph install
- :return: stringv
- """
- return "/etc/ceph/ceph.conf"
-
- @contextmanager
- def mounted(self):
- """
- A context manager, from an initially unmounted state, to mount
- this, yield, and then unmount and clean up.
- """
- self.mount()
- self.wait_until_mounted()
- try:
- yield
- finally:
- self.umount_wait()
-
- def create_files(self):
- assert(self.is_mounted())
-
- for suffix in self.test_files:
- log.info("Creating file {0}".format(suffix))
- self.client_remote.run(args=[
- 'sudo', 'touch', os.path.join(self.mountpoint, suffix)
- ])
-
- def check_files(self):
- assert(self.is_mounted())
-
- for suffix in self.test_files:
- log.info("Checking file {0}".format(suffix))
- r = self.client_remote.run(args=[
- 'sudo', 'ls', os.path.join(self.mountpoint, suffix)
- ], check_status=False)
- if r.exitstatus != 0:
- raise RuntimeError("Expected file {0} not found".format(suffix))
-
- def create_destroy(self):
- assert(self.is_mounted())
-
- filename = "{0} {1}".format(datetime.datetime.now(), self.client_id)
- log.debug("Creating test file {0}".format(filename))
- self.client_remote.run(args=[
- 'sudo', 'touch', os.path.join(self.mountpoint, filename)
- ])
- log.debug("Deleting test file {0}".format(filename))
- self.client_remote.run(args=[
- 'sudo', 'rm', '-f', os.path.join(self.mountpoint, filename)
- ])
-
- def _run_python(self, pyscript):
- return self.client_remote.run(args=[
- 'sudo', 'adjust-ulimits', 'daemon-helper', 'kill', 'python', '-c', pyscript
- ], wait=False, stdin=run.PIPE, stdout=StringIO())
-
- def run_python(self, pyscript):
- p = self._run_python(pyscript)
- p.wait()
- return p.stdout.getvalue().strip()
-
- def run_shell(self, args, wait=True):
- args = ["cd", self.mountpoint, run.Raw('&&'), "sudo"] + args
- return self.client_remote.run(args=args, stdout=StringIO(),
- stderr=StringIO(), wait=wait)
-
- def open_no_data(self, basename):
- """
- A pure metadata operation
- """
- assert(self.is_mounted())
-
- path = os.path.join(self.mountpoint, basename)
-
- p = self._run_python(dedent(
- """
- f = open("{path}", 'w')
- """.format(path=path)
- ))
- p.wait()
-
- def open_background(self, basename="background_file"):
- """
- Open a file for writing, then block such that the client
- will hold a capability.
-
- Don't return until the remote process has got as far as opening
- the file, then return the RemoteProcess instance.
- """
- assert(self.is_mounted())
-
- path = os.path.join(self.mountpoint, basename)
-
- pyscript = dedent("""
- import time
-
- f = open("{path}", 'w')
- f.write('content')
- f.flush()
- f.write('content2')
- while True:
- time.sleep(1)
- """).format(path=path)
-
- rproc = self._run_python(pyscript)
- self.background_procs.append(rproc)
-
- # This wait would not be sufficient if the file had already
- # existed, but it's simple and in practice users of open_background
- # are not using it on existing files.
- self.wait_for_visible(basename)
-
- return rproc
-
- def wait_for_visible(self, basename="background_file", timeout=30):
- i = 0
- while i < timeout:
- r = self.client_remote.run(args=[
- 'sudo', 'ls', os.path.join(self.mountpoint, basename)
- ], check_status=False)
- if r.exitstatus == 0:
- log.debug("File {0} became visible from {1} after {2}s".format(
- basename, self.client_id, i))
- return
- else:
- time.sleep(1)
- i += 1
-
- raise RuntimeError("Timed out after {0}s waiting for {1} to become visible from {2}".format(
- i, basename, self.client_id))
-
- def lock_background(self, basename="background_file", do_flock=True):
- """
- Open and lock a files for writing, hold the lock in a background process
- """
- assert(self.is_mounted())
-
- path = os.path.join(self.mountpoint, basename)
-
- script_builder = """
- import time
- import fcntl
- import struct"""
- if do_flock:
- script_builder += """
- f1 = open("{path}-1", 'w')
- fcntl.flock(f1, fcntl.LOCK_EX | fcntl.LOCK_NB)"""
- script_builder += """
- f2 = open("{path}-2", 'w')
- lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 0, 0, 0)
- fcntl.fcntl(f2, fcntl.F_SETLK, lockdata)
- while True:
- time.sleep(1)
- """
-
- pyscript = dedent(script_builder).format(path=path)
-
- log.info("lock_background file {0}".format(basename))
- rproc = self._run_python(pyscript)
- self.background_procs.append(rproc)
- return rproc
-
- def lock_and_release(self, basename="background_file"):
- assert(self.is_mounted())
-
- path = os.path.join(self.mountpoint, basename)
-
- script = """
- import time
- import fcntl
- import struct
- f1 = open("{path}-1", 'w')
- fcntl.flock(f1, fcntl.LOCK_EX)
- f2 = open("{path}-2", 'w')
- lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 0, 0, 0)
- fcntl.fcntl(f2, fcntl.F_SETLK, lockdata)
- """
- pyscript = dedent(script).format(path=path)
-
- log.info("lock_and_release file {0}".format(basename))
- return self._run_python(pyscript)
-
- def check_filelock(self, basename="background_file", do_flock=True):
- assert(self.is_mounted())
-
- path = os.path.join(self.mountpoint, basename)
-
- script_builder = """
- import fcntl
- import errno
- import struct"""
- if do_flock:
- script_builder += """
- f1 = open("{path}-1", 'r')
- try:
- fcntl.flock(f1, fcntl.LOCK_EX | fcntl.LOCK_NB)
- except IOError, e:
- if e.errno == errno.EAGAIN:
- pass
- else:
- raise RuntimeError("flock on file {path}-1 not found")"""
- script_builder += """
- f2 = open("{path}-2", 'r')
- try:
- lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 0, 0, 0)
- fcntl.fcntl(f2, fcntl.F_SETLK, lockdata)
- except IOError, e:
- if e.errno == errno.EAGAIN:
- pass
- else:
- raise RuntimeError("posix lock on file {path}-2 not found")
- """
- pyscript = dedent(script_builder).format(path=path)
-
- log.info("check lock on file {0}".format(basename))
- self.client_remote.run(args=[
- 'sudo', 'python', '-c', pyscript
- ])
-
- def write_background(self, basename="background_file", loop=False):
- """
- Open a file for writing, complete as soon as you can
- :param basename:
- :return:
- """
- assert(self.is_mounted())
-
- path = os.path.join(self.mountpoint, basename)
-
- pyscript = dedent("""
- import os
- import time
-
- fd = os.open("{path}", os.O_RDWR | os.O_CREAT, 0644)
- try:
- while True:
- os.write(fd, 'content')
- time.sleep(1)
- if not {loop}:
- break
- except IOError, e:
- pass
- os.close(fd)
- """).format(path=path, loop=str(loop))
-
- rproc = self._run_python(pyscript)
- self.background_procs.append(rproc)
- return rproc
-
- def write_n_mb(self, filename, n_mb, seek=0, wait=True):
- """
- Write the requested number of megabytes to a file
- """
- assert(self.is_mounted())
-
- return self.run_shell(["dd", "if=/dev/urandom", "of={0}".format(filename),
- "bs=1M", "conv=fdatasync",
- "count={0}".format(n_mb),
- "seek={0}".format(seek)
- ], wait=wait)
-
- def write_test_pattern(self, filename, size):
- log.info("Writing {0} bytes to {1}".format(size, filename))
- return self.run_python(dedent("""
- import zlib
- path = "{path}"
- f = open(path, 'w')
- for i in range(0, {size}):
- val = zlib.crc32("%s" % i) & 7
- f.write(chr(val))
- f.close()
- """.format(
- path=os.path.join(self.mountpoint, filename),
- size=size
- )))
-
- def validate_test_pattern(self, filename, size):
- log.info("Validating {0} bytes from {1}".format(size, filename))
- return self.run_python(dedent("""
- import zlib
- path = "{path}"
- f = open(path, 'r')
- bytes = f.read()
- f.close()
- if len(bytes) != {size}:
- raise RuntimeError("Bad length {{0}} vs. expected {{1}}".format(
- len(bytes), {size}
- ))
- for i, b in enumerate(bytes):
- val = zlib.crc32("%s" % i) & 7
- if b != chr(val):
- raise RuntimeError("Bad data at offset {{0}}".format(i))
- """.format(
- path=os.path.join(self.mountpoint, filename),
- size=size
- )))
-
- def open_n_background(self, fs_path, count):
- """
- Open N files for writing, hold them open in a background process
-
- :param fs_path: Path relative to CephFS root, e.g. "foo/bar"
- :return: a RemoteProcess
- """
- assert(self.is_mounted())
-
- abs_path = os.path.join(self.mountpoint, fs_path)
-
- pyscript = dedent("""
- import sys
- import time
- import os
-
- n = {count}
- abs_path = "{abs_path}"
-
- if not os.path.exists(os.path.dirname(abs_path)):
- os.makedirs(os.path.dirname(abs_path))
-
- handles = []
- for i in range(0, n):
- fname = "{{0}}_{{1}}".format(abs_path, i)
- handles.append(open(fname, 'w'))
-
- while True:
- time.sleep(1)
- """).format(abs_path=abs_path, count=count)
-
- rproc = self._run_python(pyscript)
- self.background_procs.append(rproc)
- return rproc
-
- def create_n_files(self, fs_path, count, sync=False):
- assert(self.is_mounted())
-
- abs_path = os.path.join(self.mountpoint, fs_path)
-
- pyscript = dedent("""
- import sys
- import time
- import os
-
- n = {count}
- abs_path = "{abs_path}"
-
- if not os.path.exists(os.path.dirname(abs_path)):
- os.makedirs(os.path.dirname(abs_path))
-
- for i in range(0, n):
- fname = "{{0}}_{{1}}".format(abs_path, i)
- h = open(fname, 'w')
- h.write('content')
- if {sync}:
- h.flush()
- os.fsync(h.fileno())
- h.close()
- """).format(abs_path=abs_path, count=count, sync=str(sync))
-
- self.run_python(pyscript)
-
- def teardown(self):
- for p in self.background_procs:
- log.info("Terminating background process")
- self._kill_background(p)
-
- self.background_procs = []
-
- def _kill_background(self, p):
- if p.stdin:
- p.stdin.close()
- try:
- p.wait()
- except (CommandFailedError, ConnectionLostError):
- pass
-
- def kill_background(self, p):
- """
- For a process that was returned by one of the _background member functions,
- kill it hard.
- """
- self._kill_background(p)
- self.background_procs.remove(p)
-
- def get_global_id(self):
- raise NotImplementedError()
-
- def get_osd_epoch(self):
- raise NotImplementedError()
-
- def stat(self, fs_path, wait=True):
- """
- stat a file, and return the result as a dictionary like this:
- {
- "st_ctime": 1414161137.0,
- "st_mtime": 1414161137.0,
- "st_nlink": 33,
- "st_gid": 0,
- "st_dev": 16777218,
- "st_size": 1190,
- "st_ino": 2,
- "st_uid": 0,
- "st_mode": 16877,
- "st_atime": 1431520593.0
- }
-
- Raises exception on absent file.
- """
- abs_path = os.path.join(self.mountpoint, fs_path)
-
- pyscript = dedent("""
- import os
- import stat
- import json
- import sys
-
- try:
- s = os.stat("{path}")
- except OSError as e:
- sys.exit(e.errno)
-
- attrs = ["st_mode", "st_ino", "st_dev", "st_nlink", "st_uid", "st_gid", "st_size", "st_atime", "st_mtime", "st_ctime"]
- print json.dumps(
- dict([(a, getattr(s, a)) for a in attrs]),
- indent=2)
- """).format(path=abs_path)
- proc = self._run_python(pyscript)
- if wait:
- proc.wait()
- return json.loads(proc.stdout.getvalue().strip())
- else:
- return proc
-
- def touch(self, fs_path):
- """
- Create a dentry if it doesn't already exist. This python
- implementation exists because the usual command line tool doesn't
- pass through error codes like EIO.
-
- :param fs_path:
- :return:
- """
- abs_path = os.path.join(self.mountpoint, fs_path)
- pyscript = dedent("""
- import sys
- import errno
-
- try:
- f = open("{path}", "w")
- f.close()
- except IOError as e:
- sys.exit(errno.EIO)
- """).format(path=abs_path)
- proc = self._run_python(pyscript)
- proc.wait()
-
- def path_to_ino(self, fs_path, follow_symlinks=True):
- abs_path = os.path.join(self.mountpoint, fs_path)
-
- if follow_symlinks:
- pyscript = dedent("""
- import os
- import stat
-
- print os.stat("{path}").st_ino
- """).format(path=abs_path)
- else:
- pyscript = dedent("""
- import os
- import stat
-
- print os.lstat("{path}").st_ino
- """).format(path=abs_path)
-
- proc = self._run_python(pyscript)
- proc.wait()
- return int(proc.stdout.getvalue().strip())
-
- def path_to_nlink(self, fs_path):
- abs_path = os.path.join(self.mountpoint, fs_path)
-
- pyscript = dedent("""
- import os
- import stat
-
- print os.stat("{path}").st_nlink
- """).format(path=abs_path)
-
- proc = self._run_python(pyscript)
- proc.wait()
- return int(proc.stdout.getvalue().strip())
-
- def ls(self, path=None):
- """
- Wrap ls: return a list of strings
- """
- cmd = ["ls"]
- if path:
- cmd.append(path)
-
- ls_text = self.run_shell(cmd).stdout.getvalue().strip()
-
- if ls_text:
- return ls_text.split("\n")
- else:
- # Special case because otherwise split on empty string
- # gives you [''] instead of []
- return []
-
- def setfattr(self, path, key, val):
- """
- Wrap setfattr.
-
- :param path: relative to mount point
- :param key: xattr name
- :param val: xattr value
- :return: None
- """
- self.run_shell(["setfattr", "-n", key, "-v", val, path])
-
- def getfattr(self, path, attr):
- """
- Wrap getfattr: return the values of a named xattr on one file, or
- None if the attribute is not found.
-
- :return: a string
- """
- p = self.run_shell(["getfattr", "--only-values", "-n", attr, path], wait=False)
- try:
- p.wait()
- except CommandFailedError as e:
- if e.exitstatus == 1 and "No such attribute" in p.stderr.getvalue():
- return None
- else:
- raise
-
- return p.stdout.getvalue()
-
- def df(self):
- """
- Wrap df: return a dict of usage fields in bytes
- """
-
- p = self.run_shell(["df", "-B1", "."])
- lines = p.stdout.getvalue().strip().split("\n")
- fs, total, used, avail = lines[1].split()[:4]
- log.warn(lines)
-
- return {
- "total": int(total),
- "used": int(used),
- "available": int(avail)
- }
diff --git a/src/ceph/qa/tasks/cephfs/test_auto_repair.py b/src/ceph/qa/tasks/cephfs/test_auto_repair.py
deleted file mode 100644
index c0aa2e4..0000000
--- a/src/ceph/qa/tasks/cephfs/test_auto_repair.py
+++ /dev/null
@@ -1,90 +0,0 @@
-
-"""
-Exercise the MDS's auto repair functions
-"""
-
-import logging
-import time
-
-from teuthology.orchestra.run import CommandFailedError
-from tasks.cephfs.cephfs_test_case import CephFSTestCase
-
-
-log = logging.getLogger(__name__)
-
-
-# Arbitrary timeouts for operations involving restarting
-# an MDS or waiting for it to come up
-MDS_RESTART_GRACE = 60
-
-
-class TestMDSAutoRepair(CephFSTestCase):
- def test_backtrace_repair(self):
- """
- MDS should verify/fix backtrace on fetch dirfrag
- """
-
- self.mount_a.run_shell(["mkdir", "testdir1"])
- self.mount_a.run_shell(["touch", "testdir1/testfile"])
- dir_objname = "{:x}.00000000".format(self.mount_a.path_to_ino("testdir1"))
-
- # drop inodes caps
- self.mount_a.umount_wait()
-
- # flush journal entries to dirfrag objects, and expire journal
- self.fs.mds_asok(['flush', 'journal'])
-
- # Restart the MDS to drop the metadata cache (because we expired the journal,
- # nothing gets replayed into cache on restart)
- self.fs.mds_stop()
- self.fs.mds_fail_restart()
- self.fs.wait_for_daemons()
-
- # remove testdir1's backtrace
- self.fs.rados(["rmxattr", dir_objname, "parent"])
-
- # readdir (fetch dirfrag) should fix testdir1's backtrace
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
- self.mount_a.run_shell(["ls", "testdir1"])
-
- # flush journal entries to dirfrag objects
- self.fs.mds_asok(['flush', 'journal'])
-
- # check if backtrace exists
- self.fs.rados(["getxattr", dir_objname, "parent"])
-
- def test_mds_readonly(self):
- """
- test if MDS behave correct when it's readonly
- """
- # operation should successd when MDS is not readonly
- self.mount_a.run_shell(["touch", "test_file1"])
- writer = self.mount_a.write_background(loop=True)
-
- time.sleep(10)
- self.assertFalse(writer.finished)
-
- # force MDS to read-only mode
- self.fs.mds_asok(['force_readonly'])
- time.sleep(10)
-
- # touching test file should fail
- try:
- self.mount_a.run_shell(["touch", "test_file1"])
- except CommandFailedError:
- pass
- else:
- self.assertTrue(False)
-
- # background writer also should fail
- self.assertTrue(writer.finished)
-
- # The MDS should report its readonly health state to the mon
- self.wait_for_health("MDS_READ_ONLY", timeout=30)
-
- # restart mds to make it writable
- self.fs.mds_fail_restart()
- self.fs.wait_for_daemons()
-
- self.wait_for_health_clear(timeout=30)
diff --git a/src/ceph/qa/tasks/cephfs/test_backtrace.py b/src/ceph/qa/tasks/cephfs/test_backtrace.py
deleted file mode 100644
index af246a1..0000000
--- a/src/ceph/qa/tasks/cephfs/test_backtrace.py
+++ /dev/null
@@ -1,78 +0,0 @@
-
-from tasks.cephfs.cephfs_test_case import CephFSTestCase
-
-
-class TestBacktrace(CephFSTestCase):
- def test_backtrace(self):
- """
- That the 'parent' and 'layout' xattrs on the head objects of files
- are updated correctly.
- """
-
- old_data_pool_name = self.fs.get_data_pool_name()
- old_pool_id = self.fs.get_data_pool_id()
-
- # Create a file for subsequent checks
- self.mount_a.run_shell(["mkdir", "parent_a"])
- self.mount_a.run_shell(["touch", "parent_a/alpha"])
- file_ino = self.mount_a.path_to_ino("parent_a/alpha")
-
- # That backtrace and layout are written after initial flush
- self.fs.mds_asok(["flush", "journal"])
- backtrace = self.fs.read_backtrace(file_ino)
- self.assertEqual(['alpha', 'parent_a'], [a['dname'] for a in backtrace['ancestors']])
- layout = self.fs.read_layout(file_ino)
- self.assertDictEqual(layout, {
- "stripe_unit": 4194304,
- "stripe_count": 1,
- "object_size": 4194304,
- "pool_id": old_pool_id,
- "pool_ns": "",
- })
- self.assertEqual(backtrace['pool'], old_pool_id)
-
- # That backtrace is written after parentage changes
- self.mount_a.run_shell(["mkdir", "parent_b"])
- self.mount_a.run_shell(["mv", "parent_a/alpha", "parent_b/alpha"])
-
- self.fs.mds_asok(["flush", "journal"])
- backtrace = self.fs.read_backtrace(file_ino)
- self.assertEqual(['alpha', 'parent_b'], [a['dname'] for a in backtrace['ancestors']])
-
- # Create a new data pool
- new_pool_name = "data_new"
- new_pool_id = self.fs.add_data_pool(new_pool_name)
-
- # That an object which has switched pools gets its backtrace updated
- self.mount_a.setfattr("./parent_b/alpha",
- "ceph.file.layout.pool", new_pool_name)
- self.fs.mds_asok(["flush", "journal"])
- backtrace_old_pool = self.fs.read_backtrace(file_ino, pool=old_data_pool_name)
- self.assertEqual(backtrace_old_pool['pool'], new_pool_id)
- backtrace_new_pool = self.fs.read_backtrace(file_ino, pool=new_pool_name)
- self.assertEqual(backtrace_new_pool['pool'], new_pool_id)
- new_pool_layout = self.fs.read_layout(file_ino, pool=new_pool_name)
- self.assertEqual(new_pool_layout['pool_id'], new_pool_id)
- self.assertEqual(new_pool_layout['pool_ns'], '')
-
- # That subsequent linkage changes are only written to new pool backtrace
- self.mount_a.run_shell(["mkdir", "parent_c"])
- self.mount_a.run_shell(["mv", "parent_b/alpha", "parent_c/alpha"])
- self.fs.mds_asok(["flush", "journal"])
- backtrace_old_pool = self.fs.read_backtrace(file_ino, pool=old_data_pool_name)
- self.assertEqual(['alpha', 'parent_b'], [a['dname'] for a in backtrace_old_pool['ancestors']])
- backtrace_new_pool = self.fs.read_backtrace(file_ino, pool=new_pool_name)
- self.assertEqual(['alpha', 'parent_c'], [a['dname'] for a in backtrace_new_pool['ancestors']])
-
- # That layout is written to new pool after change to other field in layout
- self.mount_a.setfattr("./parent_c/alpha",
- "ceph.file.layout.object_size", "8388608")
-
- self.fs.mds_asok(["flush", "journal"])
- new_pool_layout = self.fs.read_layout(file_ino, pool=new_pool_name)
- self.assertEqual(new_pool_layout['object_size'], 8388608)
-
- # ...but not to the old pool: the old pool's backtrace points to the new pool, and that's enough,
- # we don't update the layout in all the old pools whenever it changes
- old_pool_layout = self.fs.read_layout(file_ino, pool=old_data_pool_name)
- self.assertEqual(old_pool_layout['object_size'], 4194304)
diff --git a/src/ceph/qa/tasks/cephfs/test_cap_flush.py b/src/ceph/qa/tasks/cephfs/test_cap_flush.py
deleted file mode 100644
index 1cd102f..0000000
--- a/src/ceph/qa/tasks/cephfs/test_cap_flush.py
+++ /dev/null
@@ -1,64 +0,0 @@
-
-import os
-import time
-from textwrap import dedent
-from unittest import SkipTest
-from tasks.cephfs.fuse_mount import FuseMount
-from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
-
-class TestCapFlush(CephFSTestCase):
- @for_teuthology
- def test_replay_create(self):
- """
- MDS starts to handle client caps when it enters clientreplay stage.
- When handling a client cap in clientreplay stage, it's possible that
- corresponding inode does not exist because the client request which
- creates inode hasn't been replayed.
- """
-
- if not isinstance(self.mount_a, FuseMount):
- raise SkipTest("Require FUSE client to inject client release failure")
-
- dir_path = os.path.join(self.mount_a.mountpoint, "testdir")
- py_script = dedent("""
- import os
- os.mkdir("{0}")
- fd = os.open("{0}", os.O_RDONLY)
- os.fchmod(fd, 0777)
- os.fsync(fd)
- """).format(dir_path)
- self.mount_a.run_python(py_script)
-
- self.fs.mds_asok(["flush", "journal"])
-
- # client will only get unsafe replay
- self.fs.mds_asok(["config", "set", "mds_log_pause", "1"])
-
- file_name = "testfile"
- file_path = dir_path + "/" + file_name
-
- # Create a file and modify its mode. ceph-fuse will mark Ax cap dirty
- py_script = dedent("""
- import os
- os.chdir("{0}")
- os.setgid(65534)
- os.setuid(65534)
- fd = os.open("{1}", os.O_CREAT | os.O_RDWR, 0644)
- os.fchmod(fd, 0640)
- """).format(dir_path, file_name)
- self.mount_a.run_python(py_script)
-
- # Modify file mode by different user. ceph-fuse will send a setattr request
- self.mount_a.run_shell(["chmod", "600", file_path], wait=False)
-
- time.sleep(10)
-
- # Restart mds. Client will re-send the unsafe request and cap flush
- self.fs.mds_stop()
- self.fs.mds_fail_restart()
- self.fs.wait_for_daemons()
-
- mode = self.mount_a.run_shell(['stat', '-c' '%a', file_path]).stdout.getvalue().strip()
- # If the cap flush get dropped, mode should be 0644.
- # (Ax cap stays in dirty state, which prevents setattr reply from updating file mode)
- self.assertEqual(mode, "600")
diff --git a/src/ceph/qa/tasks/cephfs/test_client_limits.py b/src/ceph/qa/tasks/cephfs/test_client_limits.py
deleted file mode 100644
index cb5e3a4..0000000
--- a/src/ceph/qa/tasks/cephfs/test_client_limits.py
+++ /dev/null
@@ -1,239 +0,0 @@
-
-"""
-Exercise the MDS's behaviour when clients and the MDCache reach or
-exceed the limits of how many caps/inodes they should hold.
-"""
-
-import logging
-from textwrap import dedent
-from unittest import SkipTest
-from teuthology.orchestra.run import CommandFailedError
-from tasks.cephfs.cephfs_test_case import CephFSTestCase, needs_trimming
-from tasks.cephfs.fuse_mount import FuseMount
-import os
-
-
-log = logging.getLogger(__name__)
-
-
-# Arbitrary timeouts for operations involving restarting
-# an MDS or waiting for it to come up
-MDS_RESTART_GRACE = 60
-
-# Hardcoded values from Server::recall_client_state
-CAP_RECALL_RATIO = 0.8
-CAP_RECALL_MIN = 100
-
-
-class TestClientLimits(CephFSTestCase):
- REQUIRE_KCLIENT_REMOTE = True
- CLIENTS_REQUIRED = 2
-
- def _test_client_pin(self, use_subdir, open_files):
- """
- When a client pins an inode in its cache, for example because the file is held open,
- it should reject requests from the MDS to trim these caps. The MDS should complain
- to the user that it is unable to enforce its cache size limits because of this
- objectionable client.
-
- :param use_subdir: whether to put test files in a subdir or use root
- """
-
- cache_size = open_files/2
-
- self.set_conf('mds', 'mds cache size', cache_size)
- self.fs.mds_fail_restart()
- self.fs.wait_for_daemons()
-
- mds_min_caps_per_client = int(self.fs.get_config("mds_min_caps_per_client"))
- self.assertTrue(open_files >= mds_min_caps_per_client)
- mds_max_ratio_caps_per_client = float(self.fs.get_config("mds_max_ratio_caps_per_client"))
-
- mount_a_client_id = self.mount_a.get_global_id()
- path = "subdir/mount_a" if use_subdir else "mount_a"
- open_proc = self.mount_a.open_n_background(path, open_files)
-
- # Client should now hold:
- # `open_files` caps for the open files
- # 1 cap for root
- # 1 cap for subdir
- self.wait_until_equal(lambda: self.get_session(mount_a_client_id)['num_caps'],
- open_files + (2 if use_subdir else 1),
- timeout=600,
- reject_fn=lambda x: x > open_files + 2)
-
- # MDS should not be happy about that, as the client is failing to comply
- # with the SESSION_RECALL messages it is being sent
- mds_recall_state_timeout = float(self.fs.get_config("mds_recall_state_timeout"))
- self.wait_for_health("MDS_CLIENT_RECALL", mds_recall_state_timeout+10)
-
- # We can also test that the MDS health warning for oversized
- # cache is functioning as intended.
- self.wait_for_health("MDS_CACHE_OVERSIZED",
- mds_recall_state_timeout + 10)
-
- # When the client closes the files, it should retain only as many caps as allowed
- # under the SESSION_RECALL policy
- log.info("Terminating process holding files open")
- open_proc.stdin.close()
- try:
- open_proc.wait()
- except CommandFailedError:
- # We killed it, so it raises an error
- pass
-
- # The remaining caps should comply with the numbers sent from MDS in SESSION_RECALL message,
- # which depend on the caps outstanding, cache size and overall ratio
- recall_expected_value = int((1.0-mds_max_ratio_caps_per_client)*(open_files+2))
- def expected_caps():
- num_caps = self.get_session(mount_a_client_id)['num_caps']
- if num_caps < mds_min_caps_per_client:
- raise RuntimeError("client caps fell below min!")
- elif num_caps == mds_min_caps_per_client:
- return True
- elif recall_expected_value*.95 <= num_caps <= recall_expected_value*1.05:
- return True
- else:
- return False
-
- self.wait_until_true(expected_caps, timeout=60)
-
- @needs_trimming
- def test_client_pin_root(self):
- self._test_client_pin(False, 400)
-
- @needs_trimming
- def test_client_pin(self):
- self._test_client_pin(True, 800)
-
- @needs_trimming
- def test_client_pin_mincaps(self):
- self._test_client_pin(True, 200)
-
- def test_client_release_bug(self):
- """
- When a client has a bug (which we will simulate) preventing it from releasing caps,
- the MDS should notice that releases are not being sent promptly, and generate a health
- metric to that effect.
- """
-
- # The debug hook to inject the failure only exists in the fuse client
- if not isinstance(self.mount_a, FuseMount):
- raise SkipTest("Require FUSE client to inject client release failure")
-
- self.set_conf('client.{0}'.format(self.mount_a.client_id), 'client inject release failure', 'true')
- self.mount_a.teardown()
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
- mount_a_client_id = self.mount_a.get_global_id()
-
- # Client A creates a file. He will hold the write caps on the file, and later (simulated bug) fail
- # to comply with the MDSs request to release that cap
- self.mount_a.run_shell(["touch", "file1"])
-
- # Client B tries to stat the file that client A created
- rproc = self.mount_b.write_background("file1")
-
- # After mds_revoke_cap_timeout, we should see a health warning (extra lag from
- # MDS beacon period)
- mds_revoke_cap_timeout = float(self.fs.get_config("mds_revoke_cap_timeout"))
- self.wait_for_health("MDS_CLIENT_LATE_RELEASE", mds_revoke_cap_timeout + 10)
-
- # Client B should still be stuck
- self.assertFalse(rproc.finished)
-
- # Kill client A
- self.mount_a.kill()
- self.mount_a.kill_cleanup()
-
- # Client B should complete
- self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
- rproc.wait()
-
- def test_client_oldest_tid(self):
- """
- When a client does not advance its oldest tid, the MDS should notice that
- and generate health warnings.
- """
-
- # num of requests client issues
- max_requests = 1000
-
- # The debug hook to inject the failure only exists in the fuse client
- if not isinstance(self.mount_a, FuseMount):
- raise SkipTest("Require FUSE client to inject client release failure")
-
- self.set_conf('client', 'client inject fixed oldest tid', 'true')
- self.mount_a.teardown()
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
-
- self.fs.mds_asok(['config', 'set', 'mds_max_completed_requests', '{0}'.format(max_requests)])
-
- # Create lots of files
- self.mount_a.create_n_files("testdir/file1", max_requests + 100)
-
- # Create a few files synchronously. This makes sure previous requests are completed
- self.mount_a.create_n_files("testdir/file2", 5, True)
-
- # Wait for the health warnings. Assume mds can handle 10 request per second at least
- self.wait_for_health("MDS_CLIENT_OLDEST_TID", max_requests / 10)
-
- def _test_client_cache_size(self, mount_subdir):
- """
- check if client invalidate kernel dcache according to its cache size config
- """
-
- # The debug hook to inject the failure only exists in the fuse client
- if not isinstance(self.mount_a, FuseMount):
- raise SkipTest("Require FUSE client to inject client release failure")
-
- if mount_subdir:
- # fuse assigns a fix inode number (1) to root inode. But in mounting into
- # subdir case, the actual inode number of root is not 1. This mismatch
- # confuses fuse_lowlevel_notify_inval_entry() when invalidating dentries
- # in root directory.
- self.mount_a.run_shell(["mkdir", "subdir"])
- self.mount_a.umount_wait()
- self.set_conf('client', 'client mountpoint', '/subdir')
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
- root_ino = self.mount_a.path_to_ino(".")
- self.assertEqual(root_ino, 1);
-
- dir_path = os.path.join(self.mount_a.mountpoint, "testdir")
-
- mkdir_script = dedent("""
- import os
- os.mkdir("{path}")
- for n in range(0, {num_dirs}):
- os.mkdir("{path}/dir{{0}}".format(n))
- """)
-
- num_dirs = 1000
- self.mount_a.run_python(mkdir_script.format(path=dir_path, num_dirs=num_dirs))
- self.mount_a.run_shell(["sync"])
-
- dentry_count, dentry_pinned_count = self.mount_a.get_dentry_count()
- self.assertGreaterEqual(dentry_count, num_dirs)
- self.assertGreaterEqual(dentry_pinned_count, num_dirs)
-
- cache_size = num_dirs / 10
- self.mount_a.set_cache_size(cache_size)
-
- def trimmed():
- dentry_count, dentry_pinned_count = self.mount_a.get_dentry_count()
- log.info("waiting, dentry_count, dentry_pinned_count: {0}, {1}".format(
- dentry_count, dentry_pinned_count
- ))
- if dentry_count > cache_size or dentry_pinned_count > cache_size:
- return False
-
- return True
-
- self.wait_until_true(trimmed, 30)
-
- @needs_trimming
- def test_client_cache_size(self):
- self._test_client_cache_size(False)
- self._test_client_cache_size(True)
diff --git a/src/ceph/qa/tasks/cephfs/test_client_recovery.py b/src/ceph/qa/tasks/cephfs/test_client_recovery.py
deleted file mode 100644
index fd58c14..0000000
--- a/src/ceph/qa/tasks/cephfs/test_client_recovery.py
+++ /dev/null
@@ -1,474 +0,0 @@
-
-"""
-Teuthology task for exercising CephFS client recovery
-"""
-
-import logging
-from textwrap import dedent
-import time
-import distutils.version as version
-import re
-import os
-
-from teuthology.orchestra.run import CommandFailedError, ConnectionLostError
-from tasks.cephfs.cephfs_test_case import CephFSTestCase
-from teuthology.packaging import get_package_version
-
-
-log = logging.getLogger(__name__)
-
-
-# Arbitrary timeouts for operations involving restarting
-# an MDS or waiting for it to come up
-MDS_RESTART_GRACE = 60
-
-
-class TestClientNetworkRecovery(CephFSTestCase):
- REQUIRE_KCLIENT_REMOTE = True
- REQUIRE_ONE_CLIENT_REMOTE = True
- CLIENTS_REQUIRED = 2
-
- LOAD_SETTINGS = ["mds_session_timeout", "mds_reconnect_timeout", "ms_max_backoff"]
-
- # Environment references
- mds_session_timeout = None
- mds_reconnect_timeout = None
- ms_max_backoff = None
-
- def test_network_death(self):
- """
- Simulate software freeze or temporary network failure.
-
- Check that the client blocks I/O during failure, and completes
- I/O after failure.
- """
-
- # We only need one client
- self.mount_b.umount_wait()
-
- # Initially our one client session should be visible
- client_id = self.mount_a.get_global_id()
- ls_data = self._session_list()
- self.assert_session_count(1, ls_data)
- self.assertEqual(ls_data[0]['id'], client_id)
- self.assert_session_state(client_id, "open")
-
- # ...and capable of doing I/O without blocking
- self.mount_a.create_files()
-
- # ...but if we turn off the network
- self.fs.set_clients_block(True)
-
- # ...and try and start an I/O
- write_blocked = self.mount_a.write_background()
-
- # ...then it should block
- self.assertFalse(write_blocked.finished)
- self.assert_session_state(client_id, "open")
- time.sleep(self.mds_session_timeout * 1.5) # Long enough for MDS to consider session stale
- self.assertFalse(write_blocked.finished)
- self.assert_session_state(client_id, "stale")
-
- # ...until we re-enable I/O
- self.fs.set_clients_block(False)
-
- # ...when it should complete promptly
- a = time.time()
- self.wait_until_true(lambda: write_blocked.finished, self.ms_max_backoff * 2)
- write_blocked.wait() # Already know we're finished, wait() to raise exception on errors
- recovery_time = time.time() - a
- log.info("recovery time: {0}".format(recovery_time))
- self.assert_session_state(client_id, "open")
-
-
-class TestClientRecovery(CephFSTestCase):
- REQUIRE_KCLIENT_REMOTE = True
- CLIENTS_REQUIRED = 2
-
- LOAD_SETTINGS = ["mds_session_timeout", "mds_reconnect_timeout", "ms_max_backoff"]
-
- # Environment references
- mds_session_timeout = None
- mds_reconnect_timeout = None
- ms_max_backoff = None
-
- def test_basic(self):
- # Check that two clients come up healthy and see each others' files
- # =====================================================
- self.mount_a.create_files()
- self.mount_a.check_files()
- self.mount_a.umount_wait()
-
- self.mount_b.check_files()
-
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
-
- # Check that the admin socket interface is correctly reporting
- # two sessions
- # =====================================================
- ls_data = self._session_list()
- self.assert_session_count(2, ls_data)
-
- self.assertSetEqual(
- set([l['id'] for l in ls_data]),
- {self.mount_a.get_global_id(), self.mount_b.get_global_id()}
- )
-
- def test_restart(self):
- # Check that after an MDS restart both clients reconnect and continue
- # to handle I/O
- # =====================================================
- self.fs.mds_fail_restart()
- self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
-
- self.mount_a.create_destroy()
- self.mount_b.create_destroy()
-
- def _session_num_caps(self, client_id):
- ls_data = self.fs.mds_asok(['session', 'ls'])
- return int(self._session_by_id(ls_data).get(client_id, {'num_caps': None})['num_caps'])
-
- def test_reconnect_timeout(self):
- # Reconnect timeout
- # =================
- # Check that if I stop an MDS and a client goes away, the MDS waits
- # for the reconnect period
- self.fs.mds_stop()
- self.fs.mds_fail()
-
- mount_a_client_id = self.mount_a.get_global_id()
- self.mount_a.umount_wait(force=True)
-
- self.fs.mds_restart()
-
- self.fs.wait_for_state('up:reconnect', reject='up:active', timeout=MDS_RESTART_GRACE)
- # Check that the MDS locally reports its state correctly
- status = self.fs.mds_asok(['status'])
- self.assertIn("reconnect_status", status)
-
- ls_data = self._session_list()
- self.assert_session_count(2, ls_data)
-
- # The session for the dead client should have the 'reconnect' flag set
- self.assertTrue(self.get_session(mount_a_client_id)['reconnecting'])
-
- # Wait for the reconnect state to clear, this should take the
- # reconnect timeout period.
- in_reconnect_for = self.fs.wait_for_state('up:active', timeout=self.mds_reconnect_timeout * 2)
- # Check that the period we waited to enter active is within a factor
- # of two of the reconnect timeout.
- self.assertGreater(in_reconnect_for, self.mds_reconnect_timeout / 2,
- "Should have been in reconnect phase for {0} but only took {1}".format(
- self.mds_reconnect_timeout, in_reconnect_for
- ))
-
- self.assert_session_count(1)
-
- # Check that the client that timed out during reconnect can
- # mount again and do I/O
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
- self.mount_a.create_destroy()
-
- self.assert_session_count(2)
-
- def test_reconnect_eviction(self):
- # Eviction during reconnect
- # =========================
- mount_a_client_id = self.mount_a.get_global_id()
-
- self.fs.mds_stop()
- self.fs.mds_fail()
-
- # The mount goes away while the MDS is offline
- self.mount_a.kill()
-
- self.fs.mds_restart()
-
- # Enter reconnect phase
- self.fs.wait_for_state('up:reconnect', reject='up:active', timeout=MDS_RESTART_GRACE)
- self.assert_session_count(2)
-
- # Evict the stuck client
- self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
- self.assert_session_count(1)
-
- # Observe that we proceed to active phase without waiting full reconnect timeout
- evict_til_active = self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
- # Once we evict the troublemaker, the reconnect phase should complete
- # in well under the reconnect timeout.
- self.assertLess(evict_til_active, self.mds_reconnect_timeout * 0.5,
- "reconnect did not complete soon enough after eviction, took {0}".format(
- evict_til_active
- ))
-
- # We killed earlier so must clean up before trying to use again
- self.mount_a.kill_cleanup()
-
- # Bring the client back
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
- self.mount_a.create_destroy()
-
- def test_stale_caps(self):
- # Capability release from stale session
- # =====================================
- cap_holder = self.mount_a.open_background()
-
- # Wait for the file to be visible from another client, indicating
- # that mount_a has completed its network ops
- self.mount_b.wait_for_visible()
-
- # Simulate client death
- self.mount_a.kill()
-
- try:
- # Now, after mds_session_timeout seconds, the waiter should
- # complete their operation when the MDS marks the holder's
- # session stale.
- cap_waiter = self.mount_b.write_background()
- a = time.time()
- cap_waiter.wait()
- b = time.time()
-
- # Should have succeeded
- self.assertEqual(cap_waiter.exitstatus, 0)
-
- cap_waited = b - a
- log.info("cap_waiter waited {0}s".format(cap_waited))
- self.assertTrue(self.mds_session_timeout / 2.0 <= cap_waited <= self.mds_session_timeout * 2.0,
- "Capability handover took {0}, expected approx {1}".format(
- cap_waited, self.mds_session_timeout
- ))
-
- cap_holder.stdin.close()
- try:
- cap_holder.wait()
- except (CommandFailedError, ConnectionLostError):
- # We killed it (and possibly its node), so it raises an error
- pass
- finally:
- # teardown() doesn't quite handle this case cleanly, so help it out
- self.mount_a.kill_cleanup()
-
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
-
- def test_evicted_caps(self):
- # Eviction while holding a capability
- # ===================================
-
- # Take out a write capability on a file on client A,
- # and then immediately kill it.
- cap_holder = self.mount_a.open_background()
- mount_a_client_id = self.mount_a.get_global_id()
-
- # Wait for the file to be visible from another client, indicating
- # that mount_a has completed its network ops
- self.mount_b.wait_for_visible()
-
- # Simulate client death
- self.mount_a.kill()
-
- try:
- # The waiter should get stuck waiting for the capability
- # held on the MDS by the now-dead client A
- cap_waiter = self.mount_b.write_background()
- time.sleep(5)
- self.assertFalse(cap_waiter.finished)
-
- self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
- # Now, because I evicted the old holder of the capability, it should
- # immediately get handed over to the waiter
- a = time.time()
- cap_waiter.wait()
- b = time.time()
- cap_waited = b - a
- log.info("cap_waiter waited {0}s".format(cap_waited))
- # This is the check that it happened 'now' rather than waiting
- # for the session timeout
- self.assertLess(cap_waited, self.mds_session_timeout / 2.0,
- "Capability handover took {0}, expected less than {1}".format(
- cap_waited, self.mds_session_timeout / 2.0
- ))
-
- cap_holder.stdin.close()
- try:
- cap_holder.wait()
- except (CommandFailedError, ConnectionLostError):
- # We killed it (and possibly its node), so it raises an error
- pass
- finally:
- self.mount_a.kill_cleanup()
-
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
-
- def test_trim_caps(self):
- # Trim capability when reconnecting MDS
- # ===================================
-
- count = 500
- # Create lots of files
- for i in range(count):
- self.mount_a.run_shell(["touch", "f{0}".format(i)])
-
- # Populate mount_b's cache
- self.mount_b.run_shell(["ls", "-l"])
-
- client_id = self.mount_b.get_global_id()
- num_caps = self._session_num_caps(client_id)
- self.assertGreaterEqual(num_caps, count)
-
- # Restart MDS. client should trim its cache when reconnecting to the MDS
- self.fs.mds_fail_restart()
- self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
-
- num_caps = self._session_num_caps(client_id)
- self.assertLess(num_caps, count,
- "should have less than {0} capabilities, have {1}".format(
- count, num_caps
- ))
-
- def _is_flockable(self):
- a_version_str = get_package_version(self.mount_a.client_remote, "fuse")
- b_version_str = get_package_version(self.mount_b.client_remote, "fuse")
- flock_version_str = "2.9"
-
- version_regex = re.compile(r"[0-9\.]+")
- a_result = version_regex.match(a_version_str)
- self.assertTrue(a_result)
- b_result = version_regex.match(b_version_str)
- self.assertTrue(b_result)
- a_version = version.StrictVersion(a_result.group())
- b_version = version.StrictVersion(b_result.group())
- flock_version=version.StrictVersion(flock_version_str)
-
- if (a_version >= flock_version and b_version >= flock_version):
- log.info("flock locks are available")
- return True
- else:
- log.info("not testing flock locks, machines have versions {av} and {bv}".format(
- av=a_version_str,bv=b_version_str))
- return False
-
- def test_filelock(self):
- """
- Check that file lock doesn't get lost after an MDS restart
- """
-
- flockable = self._is_flockable()
- lock_holder = self.mount_a.lock_background(do_flock=flockable)
-
- self.mount_b.wait_for_visible("background_file-2")
- self.mount_b.check_filelock(do_flock=flockable)
-
- self.fs.mds_fail_restart()
- self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
-
- self.mount_b.check_filelock(do_flock=flockable)
-
- # Tear down the background process
- lock_holder.stdin.close()
- try:
- lock_holder.wait()
- except (CommandFailedError, ConnectionLostError):
- # We killed it, so it raises an error
- pass
-
- def test_filelock_eviction(self):
- """
- Check that file lock held by evicted client is given to
- waiting client.
- """
- if not self._is_flockable():
- self.skipTest("flock is not available")
-
- lock_holder = self.mount_a.lock_background()
- self.mount_b.wait_for_visible("background_file-2")
- self.mount_b.check_filelock()
-
- lock_taker = self.mount_b.lock_and_release()
- # Check the taker is waiting (doesn't get it immediately)
- time.sleep(2)
- self.assertFalse(lock_holder.finished)
- self.assertFalse(lock_taker.finished)
-
- try:
- mount_a_client_id = self.mount_a.get_global_id()
- self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
-
- # Evicting mount_a should let mount_b's attempt to take the lock
- # succeed
- self.wait_until_true(lambda: lock_taker.finished, timeout=10)
- finally:
- # teardown() doesn't quite handle this case cleanly, so help it out
- self.mount_a.kill()
- self.mount_a.kill_cleanup()
-
- # Bring the client back
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
-
- def test_dir_fsync(self):
- self._test_fsync(True);
-
- def test_create_fsync(self):
- self._test_fsync(False);
-
- def _test_fsync(self, dirfsync):
- """
- That calls to fsync guarantee visibility of metadata to another
- client immediately after the fsyncing client dies.
- """
-
- # Leave this guy out until he's needed
- self.mount_b.umount_wait()
-
- # Create dir + child dentry on client A, and fsync the dir
- path = os.path.join(self.mount_a.mountpoint, "subdir")
- self.mount_a.run_python(
- dedent("""
- import os
- import time
-
- path = "{path}"
-
- print "Starting creation..."
- start = time.time()
-
- os.mkdir(path)
- dfd = os.open(path, os.O_DIRECTORY)
-
- fd = open(os.path.join(path, "childfile"), "w")
- print "Finished creation in {{0}}s".format(time.time() - start)
-
- print "Starting fsync..."
- start = time.time()
- if {dirfsync}:
- os.fsync(dfd)
- else:
- os.fsync(fd)
- print "Finished fsync in {{0}}s".format(time.time() - start)
- """.format(path=path,dirfsync=str(dirfsync)))
- )
-
- # Immediately kill the MDS and then client A
- self.fs.mds_stop()
- self.fs.mds_fail()
- self.mount_a.kill()
- self.mount_a.kill_cleanup()
-
- # Restart the MDS. Wait for it to come up, it'll have to time out in clientreplay
- self.fs.mds_restart()
- log.info("Waiting for reconnect...")
- self.fs.wait_for_state("up:reconnect")
- log.info("Waiting for active...")
- self.fs.wait_for_state("up:active", timeout=MDS_RESTART_GRACE + self.mds_reconnect_timeout)
- log.info("Reached active...")
-
- # Is the child dentry visible from mount B?
- self.mount_b.mount()
- self.mount_b.wait_until_mounted()
- self.mount_b.run_shell(["ls", "subdir/childfile"])
diff --git a/src/ceph/qa/tasks/cephfs/test_config_commands.py b/src/ceph/qa/tasks/cephfs/test_config_commands.py
deleted file mode 100644
index ce0619f..0000000
--- a/src/ceph/qa/tasks/cephfs/test_config_commands.py
+++ /dev/null
@@ -1,63 +0,0 @@
-
-from unittest import case
-from tasks.cephfs.cephfs_test_case import CephFSTestCase
-from tasks.cephfs.fuse_mount import FuseMount
-
-
-class TestConfigCommands(CephFSTestCase):
- """
- Test that daemons and clients respond to the otherwise rarely-used
- runtime config modification operations.
- """
-
- CLIENTS_REQUIRED = 1
- MDSS_REQUIRED = 1
-
- def test_client_config(self):
- """
- That I can successfully issue asok "config set" commands
-
- :return:
- """
-
- if not isinstance(self.mount_a, FuseMount):
- raise case.SkipTest("Test only applies to FUSE clients")
-
- test_key = "client_cache_size"
- test_val = "123"
- self.mount_a.admin_socket(['config', 'set', test_key, test_val])
- out = self.mount_a.admin_socket(['config', 'get', test_key])
- self.assertEqual(out[test_key], test_val)
-
- self.mount_a.write_n_mb("file.bin", 1);
-
- # Implicitly asserting that things don't have lockdep error in shutdown
- self.mount_a.umount_wait(require_clean=True)
- self.fs.mds_stop()
-
- def test_mds_config_asok(self):
- test_key = "mds_max_purge_ops"
- test_val = "123"
- self.fs.mds_asok(['config', 'set', test_key, test_val])
- out = self.fs.mds_asok(['config', 'get', test_key])
- self.assertEqual(out[test_key], test_val)
-
- # Implicitly asserting that things don't have lockdep error in shutdown
- self.mount_a.umount_wait(require_clean=True)
- self.fs.mds_stop()
-
- def test_mds_config_tell(self):
- test_key = "mds_max_purge_ops"
- test_val = "123"
-
- mds_id = self.fs.get_lone_mds_id()
- self.fs.mon_manager.raw_cluster_cmd("tell", "mds.{0}".format(mds_id), "injectargs",
- "--{0}={1}".format(test_key, test_val))
-
- # Read it back with asok because there is no `tell` equivalent
- out = self.fs.mds_asok(['config', 'get', test_key])
- self.assertEqual(out[test_key], test_val)
-
- # Implicitly asserting that things don't have lockdep error in shutdown
- self.mount_a.umount_wait(require_clean=True)
- self.fs.mds_stop()
diff --git a/src/ceph/qa/tasks/cephfs/test_damage.py b/src/ceph/qa/tasks/cephfs/test_damage.py
deleted file mode 100644
index 380b49c..0000000
--- a/src/ceph/qa/tasks/cephfs/test_damage.py
+++ /dev/null
@@ -1,548 +0,0 @@
-import json
-import logging
-import errno
-import re
-from teuthology.contextutil import MaxWhileTries
-from teuthology.exceptions import CommandFailedError
-from teuthology.orchestra.run import wait
-from tasks.cephfs.fuse_mount import FuseMount
-from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
-
-DAMAGED_ON_START = "damaged_on_start"
-DAMAGED_ON_LS = "damaged_on_ls"
-CRASHED = "server crashed"
-NO_DAMAGE = "no damage"
-FAILED_CLIENT = "client failed"
-FAILED_SERVER = "server failed"
-
-# An EIO in response to a stat from the client
-EIO_ON_LS = "eio"
-
-# An EIO, but nothing in damage table (not ever what we expect)
-EIO_NO_DAMAGE = "eio without damage entry"
-
-
-log = logging.getLogger(__name__)
-
-
-class TestDamage(CephFSTestCase):
- def _simple_workload_write(self):
- self.mount_a.run_shell(["mkdir", "subdir"])
- self.mount_a.write_n_mb("subdir/sixmegs", 6)
- return self.mount_a.stat("subdir/sixmegs")
-
- def is_marked_damaged(self, rank):
- mds_map = self.fs.get_mds_map()
- return rank in mds_map['damaged']
-
- @for_teuthology #459s
- def test_object_deletion(self):
- """
- That the MDS has a clean 'damaged' response to loss of any single metadata object
- """
-
- self._simple_workload_write()
-
- # Hmm, actually it would be nice to permute whether the metadata pool
- # state contains sessions or not, but for the moment close this session
- # to avoid waiting through reconnect on every MDS start.
- self.mount_a.umount_wait()
- for mds_name in self.fs.get_active_names():
- self.fs.mds_asok(["flush", "journal"], mds_name)
-
- self.fs.mds_stop()
- self.fs.mds_fail()
-
- self.fs.rados(['export', '/tmp/metadata.bin'])
-
- def is_ignored(obj_id, dentry=None):
- """
- A filter to avoid redundantly mutating many similar objects (e.g.
- stray dirfrags) or similar dentries (e.g. stray dir dentries)
- """
- if re.match("60.\.00000000", obj_id) and obj_id != "600.00000000":
- return True
-
- if dentry and obj_id == "100.00000000":
- if re.match("stray.+_head", dentry) and dentry != "stray0_head":
- return True
-
- return False
-
- def get_path(obj_id, dentry=None):
- """
- What filesystem path does this object or dentry correspond to? i.e.
- what should I poke to see EIO after damaging it?
- """
-
- if obj_id == "1.00000000" and dentry == "subdir_head":
- return "./subdir"
- elif obj_id == "10000000000.00000000" and dentry == "sixmegs_head":
- return "./subdir/sixmegs"
-
- # None means ls will do an "ls -R" in hope of seeing some errors
- return None
-
- objects = self.fs.rados(["ls"]).split("\n")
- objects = [o for o in objects if not is_ignored(o)]
-
- # Find all objects with an OMAP header
- omap_header_objs = []
- for o in objects:
- header = self.fs.rados(["getomapheader", o])
- # The rados CLI wraps the header output in a hex-printed style
- header_bytes = int(re.match("header \((.+) bytes\)", header).group(1))
- if header_bytes > 0:
- omap_header_objs.append(o)
-
- # Find all OMAP key/vals
- omap_keys = []
- for o in objects:
- keys_str = self.fs.rados(["listomapkeys", o])
- if keys_str:
- for key in keys_str.split("\n"):
- if not is_ignored(o, key):
- omap_keys.append((o, key))
-
- # Find objects that have data in their bodies
- data_objects = []
- for obj_id in objects:
- stat_out = self.fs.rados(["stat", obj_id])
- size = int(re.match(".+, size (.+)$", stat_out).group(1))
- if size > 0:
- data_objects.append(obj_id)
-
- # Define the various forms of damage we will inflict
- class MetadataMutation(object):
- def __init__(self, obj_id_, desc_, mutate_fn_, expectation_, ls_path=None):
- self.obj_id = obj_id_
- self.desc = desc_
- self.mutate_fn = mutate_fn_
- self.expectation = expectation_
- if ls_path is None:
- self.ls_path = "."
- else:
- self.ls_path = ls_path
-
- def __eq__(self, other):
- return self.desc == other.desc
-
- def __hash__(self):
- return hash(self.desc)
-
- junk = "deadbeef" * 10
- mutations = []
-
- # Removals
- for obj_id in objects:
- if obj_id in [
- # JournalPointers are auto-replaced if missing (same path as upgrade)
- "400.00000000",
- # Missing dirfrags for non-system dirs result in empty directory
- "10000000000.00000000",
- # PurgeQueue is auto-created if not found on startup
- "500.00000000"
- ]:
- expectation = NO_DAMAGE
- else:
- expectation = DAMAGED_ON_START
-
- log.info("Expectation on rm '{0}' will be '{1}'".format(
- obj_id, expectation
- ))
-
- mutations.append(MetadataMutation(
- obj_id,
- "Delete {0}".format(obj_id),
- lambda o=obj_id: self.fs.rados(["rm", o]),
- expectation
- ))
-
- # Blatant corruptions
- mutations.extend([
- MetadataMutation(
- o,
- "Corrupt {0}".format(o),
- lambda o=o: self.fs.rados(["put", o, "-"], stdin_data=junk),
- DAMAGED_ON_START
- ) for o in data_objects
- ])
-
- # Truncations
- for obj_id in data_objects:
- if obj_id == "500.00000000":
- # The PurgeQueue is allowed to be empty: Journaler interprets
- # an empty header object as an empty journal.
- expectation = NO_DAMAGE
- else:
- expectation = DAMAGED_ON_START
-
- mutations.append(
- MetadataMutation(
- o,
- "Truncate {0}".format(o),
- lambda o=o: self.fs.rados(["truncate", o, "0"]),
- DAMAGED_ON_START
- ))
-
- # OMAP value corruptions
- for o, k in omap_keys:
- if o.startswith("100."):
- # Anything in rank 0's 'mydir'
- expectation = DAMAGED_ON_START
- else:
- expectation = EIO_ON_LS
-
- mutations.append(
- MetadataMutation(
- o,
- "Corrupt omap key {0}:{1}".format(o, k),
- lambda o=o,k=k: self.fs.rados(["setomapval", o, k, junk]),
- expectation,
- get_path(o, k)
- )
- )
-
- # OMAP header corruptions
- for obj_id in omap_header_objs:
- if re.match("60.\.00000000", obj_id) \
- or obj_id in ["1.00000000", "100.00000000", "mds0_sessionmap"]:
- expectation = DAMAGED_ON_START
- else:
- expectation = NO_DAMAGE
-
- log.info("Expectation on corrupt header '{0}' will be '{1}'".format(
- obj_id, expectation
- ))
-
- mutations.append(
- MetadataMutation(
- obj_id,
- "Corrupt omap header on {0}".format(obj_id),
- lambda o=obj_id: self.fs.rados(["setomapheader", o, junk]),
- expectation
- )
- )
-
- results = {}
-
- for mutation in mutations:
- log.info("Applying mutation '{0}'".format(mutation.desc))
-
- # Reset MDS state
- self.mount_a.umount_wait(force=True)
- self.fs.mds_stop()
- self.fs.mds_fail()
- self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
-
- # Reset RADOS pool state
- self.fs.rados(['import', '/tmp/metadata.bin'])
-
- # Inject the mutation
- mutation.mutate_fn()
-
- # Try starting the MDS
- self.fs.mds_restart()
-
- # How long we'll wait between starting a daemon and expecting
- # it to make it through startup, and potentially declare itself
- # damaged to the mon cluster.
- startup_timeout = 60
-
- if mutation.expectation not in (EIO_ON_LS, DAMAGED_ON_LS, NO_DAMAGE):
- if mutation.expectation == DAMAGED_ON_START:
- # The MDS may pass through active before making it to damaged
- try:
- self.wait_until_true(lambda: self.is_marked_damaged(0), startup_timeout)
- except RuntimeError:
- pass
-
- # Wait for MDS to either come up or go into damaged state
- try:
- self.wait_until_true(lambda: self.is_marked_damaged(0) or self.fs.are_daemons_healthy(), startup_timeout)
- except RuntimeError:
- crashed = False
- # Didn't make it to healthy or damaged, did it crash?
- for daemon_id, daemon in self.fs.mds_daemons.items():
- if daemon.proc and daemon.proc.finished:
- crashed = True
- log.error("Daemon {0} crashed!".format(daemon_id))
- daemon.proc = None # So that subsequent stop() doesn't raise error
- if not crashed:
- # Didn't go health, didn't go damaged, didn't crash, so what?
- raise
- else:
- log.info("Result: Mutation '{0}' led to crash".format(mutation.desc))
- results[mutation] = CRASHED
- continue
- if self.is_marked_damaged(0):
- log.info("Result: Mutation '{0}' led to DAMAGED state".format(mutation.desc))
- results[mutation] = DAMAGED_ON_START
- continue
- else:
- log.info("Mutation '{0}' did not prevent MDS startup, attempting ls...".format(mutation.desc))
- else:
- try:
- self.wait_until_true(self.fs.are_daemons_healthy, 60)
- except RuntimeError:
- log.info("Result: Mutation '{0}' should have left us healthy, actually not.".format(mutation.desc))
- if self.is_marked_damaged(0):
- results[mutation] = DAMAGED_ON_START
- else:
- results[mutation] = FAILED_SERVER
- continue
- log.info("Daemons came up after mutation '{0}', proceeding to ls".format(mutation.desc))
-
- # MDS is up, should go damaged on ls or client mount
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
- if mutation.ls_path == ".":
- proc = self.mount_a.run_shell(["ls", "-R", mutation.ls_path], wait=False)
- else:
- proc = self.mount_a.stat(mutation.ls_path, wait=False)
-
- if mutation.expectation == DAMAGED_ON_LS:
- try:
- self.wait_until_true(lambda: self.is_marked_damaged(0), 60)
- log.info("Result: Mutation '{0}' led to DAMAGED state after ls".format(mutation.desc))
- results[mutation] = DAMAGED_ON_LS
- except RuntimeError:
- if self.fs.are_daemons_healthy():
- log.error("Result: Failed to go damaged on mutation '{0}', actually went active".format(
- mutation.desc))
- results[mutation] = NO_DAMAGE
- else:
- log.error("Result: Failed to go damaged on mutation '{0}'".format(mutation.desc))
- results[mutation] = FAILED_SERVER
-
- else:
- try:
- wait([proc], 20)
- log.info("Result: Mutation '{0}' did not caused DAMAGED state".format(mutation.desc))
- results[mutation] = NO_DAMAGE
- except MaxWhileTries:
- log.info("Result: Failed to complete client IO on mutation '{0}'".format(mutation.desc))
- results[mutation] = FAILED_CLIENT
- except CommandFailedError as e:
- if e.exitstatus == errno.EIO:
- log.info("Result: EIO on client")
- results[mutation] = EIO_ON_LS
- else:
- log.info("Result: unexpected error {0} on client".format(e))
- results[mutation] = FAILED_CLIENT
-
- if mutation.expectation == EIO_ON_LS:
- # EIOs mean something handled by DamageTable: assert that it has
- # been populated
- damage = json.loads(
- self.fs.mon_manager.raw_cluster_cmd(
- 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), "damage", "ls", '--format=json-pretty'))
- if len(damage) == 0:
- results[mutation] = EIO_NO_DAMAGE
-
- failures = [(mutation, result) for (mutation, result) in results.items() if mutation.expectation != result]
- if failures:
- log.error("{0} mutations had unexpected outcomes:".format(len(failures)))
- for mutation, result in failures:
- log.error(" Expected '{0}' actually '{1}' from '{2}'".format(
- mutation.expectation, result, mutation.desc
- ))
- raise RuntimeError("{0} mutations had unexpected outcomes".format(len(failures)))
- else:
- log.info("All {0} mutations had expected outcomes".format(len(mutations)))
-
- def test_damaged_dentry(self):
- # Damage to dentrys is interesting because it leaves the
- # directory's `complete` flag in a subtle state where
- # we have marked the dir complete in order that folks
- # can access it, but in actual fact there is a dentry
- # missing
- self.mount_a.run_shell(["mkdir", "subdir/"])
-
- self.mount_a.run_shell(["touch", "subdir/file_undamaged"])
- self.mount_a.run_shell(["touch", "subdir/file_to_be_damaged"])
-
- subdir_ino = self.mount_a.path_to_ino("subdir")
-
- self.mount_a.umount_wait()
- for mds_name in self.fs.get_active_names():
- self.fs.mds_asok(["flush", "journal"], mds_name)
-
- self.fs.mds_stop()
- self.fs.mds_fail()
-
- # Corrupt a dentry
- junk = "deadbeef" * 10
- dirfrag_obj = "{0:x}.00000000".format(subdir_ino)
- self.fs.rados(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
-
- # Start up and try to list it
- self.fs.mds_restart()
- self.fs.wait_for_daemons()
-
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
- dentries = self.mount_a.ls("subdir/")
-
- # The damaged guy should have disappeared
- self.assertEqual(dentries, ["file_undamaged"])
-
- # I should get ENOENT if I try and read it normally, because
- # the dir is considered complete
- try:
- self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
- except CommandFailedError as e:
- self.assertEqual(e.exitstatus, errno.ENOENT)
- else:
- raise AssertionError("Expected ENOENT")
-
- # The fact that there is damaged should have bee recorded
- damage = json.loads(
- self.fs.mon_manager.raw_cluster_cmd(
- 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
- "damage", "ls", '--format=json-pretty'))
- self.assertEqual(len(damage), 1)
- damage_id = damage[0]['id']
-
- # If I try to create a dentry with the same name as the damaged guy
- # then that should be forbidden
- try:
- self.mount_a.touch("subdir/file_to_be_damaged")
- except CommandFailedError as e:
- self.assertEqual(e.exitstatus, errno.EIO)
- else:
- raise AssertionError("Expected EIO")
-
- # Attempting that touch will clear the client's complete flag, now
- # when I stat it I'll get EIO instead of ENOENT
- try:
- self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
- except CommandFailedError as e:
- if isinstance(self.mount_a, FuseMount):
- self.assertEqual(e.exitstatus, errno.EIO)
- else:
- # Kernel client handles this case differently
- self.assertEqual(e.exitstatus, errno.ENOENT)
- else:
- raise AssertionError("Expected EIO")
-
- nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
- self.assertEqual(nfiles, "2")
-
- self.mount_a.umount_wait()
-
- # Now repair the stats
- scrub_json = self.fs.mds_asok(["scrub_path", "/subdir", "repair"])
- log.info(json.dumps(scrub_json, indent=2))
-
- self.assertEqual(scrub_json["passed_validation"], False)
- self.assertEqual(scrub_json["raw_stats"]["checked"], True)
- self.assertEqual(scrub_json["raw_stats"]["passed"], False)
-
- # Check that the file count is now correct
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
- nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
- self.assertEqual(nfiles, "1")
-
- # Clean up the omap object
- self.fs.rados(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
-
- # Clean up the damagetable entry
- self.fs.mon_manager.raw_cluster_cmd(
- 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
- "damage", "rm", "{did}".format(did=damage_id))
-
- # Now I should be able to create a file with the same name as the
- # damaged guy if I want.
- self.mount_a.touch("subdir/file_to_be_damaged")
-
- def test_open_ino_errors(self):
- """
- That errors encountered during opening inos are properly propagated
- """
-
- self.mount_a.run_shell(["mkdir", "dir1"])
- self.mount_a.run_shell(["touch", "dir1/file1"])
- self.mount_a.run_shell(["mkdir", "dir2"])
- self.mount_a.run_shell(["touch", "dir2/file2"])
- self.mount_a.run_shell(["mkdir", "testdir"])
- self.mount_a.run_shell(["ln", "dir1/file1", "testdir/hardlink1"])
- self.mount_a.run_shell(["ln", "dir2/file2", "testdir/hardlink2"])
-
- file1_ino = self.mount_a.path_to_ino("dir1/file1")
- file2_ino = self.mount_a.path_to_ino("dir2/file2")
- dir2_ino = self.mount_a.path_to_ino("dir2")
-
- # Ensure everything is written to backing store
- self.mount_a.umount_wait()
- self.fs.mds_asok(["flush", "journal"])
-
- # Drop everything from the MDS cache
- self.mds_cluster.mds_stop()
- self.fs.journal_tool(['journal', 'reset'])
- self.mds_cluster.mds_fail_restart()
- self.fs.wait_for_daemons()
-
- self.mount_a.mount()
-
- # Case 1: un-decodeable backtrace
-
- # Validate that the backtrace is present and decodable
- self.fs.read_backtrace(file1_ino)
- # Go corrupt the backtrace of alpha/target (used for resolving
- # bravo/hardlink).
- self.fs._write_data_xattr(file1_ino, "parent", "rhubarb")
-
- # Check that touching the hardlink gives EIO
- ran = self.mount_a.run_shell(["stat", "testdir/hardlink1"], wait=False)
- try:
- ran.wait()
- except CommandFailedError:
- self.assertTrue("Input/output error" in ran.stderr.getvalue())
-
- # Check that an entry is created in the damage table
- damage = json.loads(
- self.fs.mon_manager.raw_cluster_cmd(
- 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
- "damage", "ls", '--format=json-pretty'))
- self.assertEqual(len(damage), 1)
- self.assertEqual(damage[0]['damage_type'], "backtrace")
- self.assertEqual(damage[0]['ino'], file1_ino)
-
- self.fs.mon_manager.raw_cluster_cmd(
- 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
- "damage", "rm", str(damage[0]['id']))
-
-
- # Case 2: missing dirfrag for the target inode
-
- self.fs.rados(["rm", "{0:x}.00000000".format(dir2_ino)])
-
- # Check that touching the hardlink gives EIO
- ran = self.mount_a.run_shell(["stat", "testdir/hardlink2"], wait=False)
- try:
- ran.wait()
- except CommandFailedError:
- self.assertTrue("Input/output error" in ran.stderr.getvalue())
-
- # Check that an entry is created in the damage table
- damage = json.loads(
- self.fs.mon_manager.raw_cluster_cmd(
- 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
- "damage", "ls", '--format=json-pretty'))
- self.assertEqual(len(damage), 2)
- if damage[0]['damage_type'] == "backtrace" :
- self.assertEqual(damage[0]['ino'], file2_ino)
- self.assertEqual(damage[1]['damage_type'], "dir_frag")
- self.assertEqual(damage[1]['ino'], dir2_ino)
- else:
- self.assertEqual(damage[0]['damage_type'], "dir_frag")
- self.assertEqual(damage[0]['ino'], dir2_ino)
- self.assertEqual(damage[1]['damage_type'], "backtrace")
- self.assertEqual(damage[1]['ino'], file2_ino)
-
- for entry in damage:
- self.fs.mon_manager.raw_cluster_cmd(
- 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
- "damage", "rm", str(entry['id']))
diff --git a/src/ceph/qa/tasks/cephfs/test_data_scan.py b/src/ceph/qa/tasks/cephfs/test_data_scan.py
deleted file mode 100644
index a2d3157..0000000
--- a/src/ceph/qa/tasks/cephfs/test_data_scan.py
+++ /dev/null
@@ -1,600 +0,0 @@
-
-"""
-Test our tools for recovering metadata from the data pool
-"""
-import json
-
-import logging
-import os
-from textwrap import dedent
-import traceback
-from collections import namedtuple, defaultdict
-
-from teuthology.orchestra.run import CommandFailedError
-from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
-
-log = logging.getLogger(__name__)
-
-
-ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
-
-
-class Workload(object):
- def __init__(self, filesystem, mount):
- self._mount = mount
- self._filesystem = filesystem
- self._initial_state = None
-
- # Accumulate backtraces for every failed validation, and return them. Backtraces
- # are rather verbose, but we only see them when something breaks, and they
- # let us see which check failed without having to decorate each check with
- # a string
- self._errors = []
-
- def assert_equal(self, a, b):
- try:
- if a != b:
- raise AssertionError("{0} != {1}".format(a, b))
- except AssertionError as e:
- self._errors.append(
- ValidationError(e, traceback.format_exc(3))
- )
-
- def write(self):
- """
- Write the workload files to the mount
- """
- raise NotImplementedError()
-
- def validate(self):
- """
- Read from the mount and validate that the workload files are present (i.e. have
- survived or been reconstructed from the test scenario)
- """
- raise NotImplementedError()
-
- def damage(self):
- """
- Damage the filesystem pools in ways that will be interesting to recover from. By
- default just wipe everything in the metadata pool
- """
- # Delete every object in the metadata pool
- objects = self._filesystem.rados(["ls"]).split("\n")
- for o in objects:
- self._filesystem.rados(["rm", o])
-
- def flush(self):
- """
- Called after client unmount, after write: flush whatever you want
- """
- self._filesystem.mds_asok(["flush", "journal"])
-
-
-class SimpleWorkload(Workload):
- """
- Single file, single directory, check that it gets recovered and so does its size
- """
- def write(self):
- self._mount.run_shell(["mkdir", "subdir"])
- self._mount.write_n_mb("subdir/sixmegs", 6)
- self._initial_state = self._mount.stat("subdir/sixmegs")
-
- def validate(self):
- self._mount.run_shell(["ls", "subdir"])
- st = self._mount.stat("subdir/sixmegs")
- self.assert_equal(st['st_size'], self._initial_state['st_size'])
- return self._errors
-
-
-class MovedFile(Workload):
- def write(self):
- # Create a file whose backtrace disagrees with his eventual position
- # in the metadata. We will see that he gets reconstructed in his
- # original position according to his backtrace.
- self._mount.run_shell(["mkdir", "subdir_alpha"])
- self._mount.run_shell(["mkdir", "subdir_bravo"])
- self._mount.write_n_mb("subdir_alpha/sixmegs", 6)
- self._filesystem.mds_asok(["flush", "journal"])
- self._mount.run_shell(["mv", "subdir_alpha/sixmegs", "subdir_bravo/sixmegs"])
- self._initial_state = self._mount.stat("subdir_bravo/sixmegs")
-
- def flush(self):
- pass
-
- def validate(self):
- self.assert_equal(self._mount.ls(), ["subdir_alpha"])
- st = self._mount.stat("subdir_alpha/sixmegs")
- self.assert_equal(st['st_size'], self._initial_state['st_size'])
- return self._errors
-
-
-class BacktracelessFile(Workload):
- def write(self):
- self._mount.run_shell(["mkdir", "subdir"])
- self._mount.write_n_mb("subdir/sixmegs", 6)
- self._initial_state = self._mount.stat("subdir/sixmegs")
-
- def flush(self):
- # Never flush metadata, so backtrace won't be written
- pass
-
- def validate(self):
- ino_name = "%x" % self._initial_state["st_ino"]
-
- # The inode should be linked into lost+found because we had no path for it
- self.assert_equal(self._mount.ls(), ["lost+found"])
- self.assert_equal(self._mount.ls("lost+found"), [ino_name])
- st = self._mount.stat("lost+found/{ino_name}".format(ino_name=ino_name))
-
- # We might not have got the name or path, but we should still get the size
- self.assert_equal(st['st_size'], self._initial_state['st_size'])
-
- return self._errors
-
-
-class StripedStashedLayout(Workload):
- def __init__(self, fs, m):
- super(StripedStashedLayout, self).__init__(fs, m)
-
- # Nice small stripes so we can quickly do our writes+validates
- self.sc = 4
- self.ss = 65536
- self.os = 262144
-
- self.interesting_sizes = [
- # Exactly stripe_count objects will exist
- self.os * self.sc,
- # Fewer than stripe_count objects will exist
- self.os * self.sc / 2,
- self.os * (self.sc - 1) + self.os / 2,
- self.os * (self.sc - 1) + self.os / 2 - 1,
- self.os * (self.sc + 1) + self.os / 2,
- self.os * (self.sc + 1) + self.os / 2 + 1,
- # More than stripe_count objects will exist
- self.os * self.sc + self.os * self.sc / 2
- ]
-
- def write(self):
- # Create a dir with a striped layout set on it
- self._mount.run_shell(["mkdir", "stripey"])
-
- self._mount.setfattr("./stripey", "ceph.dir.layout",
- "stripe_unit={ss} stripe_count={sc} object_size={os} pool={pool}".format(
- ss=self.ss, os=self.os, sc=self.sc,
- pool=self._filesystem.get_data_pool_name()
- ))
-
- # Write files, then flush metadata so that its layout gets written into an xattr
- for i, n_bytes in enumerate(self.interesting_sizes):
- self._mount.write_test_pattern("stripey/flushed_file_{0}".format(i), n_bytes)
- # This is really just validating the validator
- self._mount.validate_test_pattern("stripey/flushed_file_{0}".format(i), n_bytes)
- self._filesystem.mds_asok(["flush", "journal"])
-
- # Write another file in the same way, but this time don't flush the metadata,
- # so that it won't have the layout xattr
- self._mount.write_test_pattern("stripey/unflushed_file", 1024 * 512)
- self._mount.validate_test_pattern("stripey/unflushed_file", 1024 * 512)
-
- self._initial_state = {
- "unflushed_ino": self._mount.path_to_ino("stripey/unflushed_file")
- }
-
- def flush(self):
- # Pass because we already selectively flushed during write
- pass
-
- def validate(self):
- # The first files should have been recovered into its original location
- # with the correct layout: read back correct data
- for i, n_bytes in enumerate(self.interesting_sizes):
- try:
- self._mount.validate_test_pattern("stripey/flushed_file_{0}".format(i), n_bytes)
- except CommandFailedError as e:
- self._errors.append(
- ValidationError("File {0} (size {1}): {2}".format(i, n_bytes, e), traceback.format_exc(3))
- )
-
- # The unflushed file should have been recovered into lost+found without
- # the correct layout: read back junk
- ino_name = "%x" % self._initial_state["unflushed_ino"]
- self.assert_equal(self._mount.ls("lost+found"), [ino_name])
- try:
- self._mount.validate_test_pattern(os.path.join("lost+found", ino_name), 1024 * 512)
- except CommandFailedError:
- pass
- else:
- self._errors.append(
- ValidationError("Unexpectedly valid data in unflushed striped file", "")
- )
-
- return self._errors
-
-
-class ManyFilesWorkload(Workload):
- def __init__(self, filesystem, mount, file_count):
- super(ManyFilesWorkload, self).__init__(filesystem, mount)
- self.file_count = file_count
-
- def write(self):
- self._mount.run_shell(["mkdir", "subdir"])
- for n in range(0, self.file_count):
- self._mount.write_test_pattern("subdir/{0}".format(n), 6 * 1024 * 1024)
-
- def validate(self):
- for n in range(0, self.file_count):
- try:
- self._mount.validate_test_pattern("subdir/{0}".format(n), 6 * 1024 * 1024)
- except CommandFailedError as e:
- self._errors.append(
- ValidationError("File {0}: {1}".format(n, e), traceback.format_exc(3))
- )
-
- return self._errors
-
-
-class MovedDir(Workload):
- def write(self):
- # Create a nested dir that we will then move. Two files with two different
- # backtraces referring to the moved dir, claiming two different locations for
- # it. We will see that only one backtrace wins and the dir ends up with
- # single linkage.
- self._mount.run_shell(["mkdir", "-p", "grandmother/parent"])
- self._mount.write_n_mb("grandmother/parent/orig_pos_file", 1)
- self._filesystem.mds_asok(["flush", "journal"])
- self._mount.run_shell(["mkdir", "grandfather"])
- self._mount.run_shell(["mv", "grandmother/parent", "grandfather"])
- self._mount.write_n_mb("grandfather/parent/new_pos_file", 2)
- self._filesystem.mds_asok(["flush", "journal"])
-
- self._initial_state = (
- self._mount.stat("grandfather/parent/orig_pos_file"),
- self._mount.stat("grandfather/parent/new_pos_file")
- )
-
- def validate(self):
- root_files = self._mount.ls()
- self.assert_equal(len(root_files), 1)
- self.assert_equal(root_files[0] in ["grandfather", "grandmother"], True)
- winner = root_files[0]
- st_opf = self._mount.stat("{0}/parent/orig_pos_file".format(winner))
- st_npf = self._mount.stat("{0}/parent/new_pos_file".format(winner))
-
- self.assert_equal(st_opf['st_size'], self._initial_state[0]['st_size'])
- self.assert_equal(st_npf['st_size'], self._initial_state[1]['st_size'])
-
-
-class MissingZerothObject(Workload):
- def write(self):
- self._mount.run_shell(["mkdir", "subdir"])
- self._mount.write_n_mb("subdir/sixmegs", 6)
- self._initial_state = self._mount.stat("subdir/sixmegs")
-
- def damage(self):
- super(MissingZerothObject, self).damage()
- zeroth_id = "{0:x}.00000000".format(self._initial_state['st_ino'])
- self._filesystem.rados(["rm", zeroth_id], pool=self._filesystem.get_data_pool_name())
-
- def validate(self):
- st = self._mount.stat("lost+found/{0:x}".format(self._initial_state['st_ino']))
- self.assert_equal(st['st_size'], self._initial_state['st_size'])
-
-
-class NonDefaultLayout(Workload):
- """
- Check that the reconstruction copes with files that have a different
- object size in their layout
- """
- def write(self):
- self._mount.run_shell(["touch", "datafile"])
- self._mount.setfattr("./datafile", "ceph.file.layout.object_size", "8388608")
- self._mount.run_shell(["dd", "if=/dev/urandom", "of=./datafile", "bs=1M", "count=32"])
- self._initial_state = self._mount.stat("datafile")
-
- def validate(self):
- # Check we got the layout reconstructed properly
- object_size = int(self._mount.getfattr(
- "./datafile", "ceph.file.layout.object_size"))
- self.assert_equal(object_size, 8388608)
-
- # Check we got the file size reconstructed properly
- st = self._mount.stat("datafile")
- self.assert_equal(st['st_size'], self._initial_state['st_size'])
-
-
-class TestDataScan(CephFSTestCase):
- MDSS_REQUIRED = 2
-
- def is_marked_damaged(self, rank):
- mds_map = self.fs.get_mds_map()
- return rank in mds_map['damaged']
-
- def _rebuild_metadata(self, workload, workers=1):
- """
- That when all objects in metadata pool are removed, we can rebuild a metadata pool
- based on the contents of a data pool, and a client can see and read our files.
- """
-
- # First, inject some files
-
- workload.write()
-
- # Unmount the client and flush the journal: the tool should also cope with
- # situations where there is dirty metadata, but we'll test that separately
- self.mount_a.umount_wait()
- workload.flush()
-
- # Stop the MDS
- self.fs.mds_stop()
- self.fs.mds_fail()
-
- # After recovery, we need the MDS to not be strict about stats (in production these options
- # are off by default, but in QA we need to explicitly disable them)
- self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
- self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
-
- # Apply any data damage the workload wants
- workload.damage()
-
- # Reset the MDS map in case multiple ranks were in play: recovery procedure
- # only understands how to rebuild metadata under rank 0
- self.fs.mon_manager.raw_cluster_cmd('fs', 'reset', self.fs.name,
- '--yes-i-really-mean-it')
-
- self.fs.mds_restart()
-
- def get_state(mds_id):
- info = self.mds_cluster.get_mds_info(mds_id)
- return info['state'] if info is not None else None
-
- self.wait_until_true(lambda: self.is_marked_damaged(0), 60)
- for mds_id in self.fs.mds_ids:
- self.wait_until_equal(
- lambda: get_state(mds_id),
- "up:standby",
- timeout=60)
-
- self.fs.table_tool([self.fs.name + ":0", "reset", "session"])
- self.fs.table_tool([self.fs.name + ":0", "reset", "snap"])
- self.fs.table_tool([self.fs.name + ":0", "reset", "inode"])
-
- # Run the recovery procedure
- if False:
- with self.assertRaises(CommandFailedError):
- # Normal reset should fail when no objects are present, we'll use --force instead
- self.fs.journal_tool(["journal", "reset"])
-
- self.fs.journal_tool(["journal", "reset", "--force"])
- self.fs.data_scan(["init"])
- self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()], worker_count=workers)
- self.fs.data_scan(["scan_inodes", self.fs.get_data_pool_name()], worker_count=workers)
-
- # Mark the MDS repaired
- self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
-
- # Start the MDS
- self.fs.mds_restart()
- self.fs.wait_for_daemons()
- log.info(str(self.mds_cluster.status()))
-
- # Mount a client
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
-
- # See that the files are present and correct
- errors = workload.validate()
- if errors:
- log.error("Validation errors found: {0}".format(len(errors)))
- for e in errors:
- log.error(e.exception)
- log.error(e.backtrace)
- raise AssertionError("Validation failed, first error: {0}\n{1}".format(
- errors[0].exception, errors[0].backtrace
- ))
-
- def test_rebuild_simple(self):
- self._rebuild_metadata(SimpleWorkload(self.fs, self.mount_a))
-
- def test_rebuild_moved_file(self):
- self._rebuild_metadata(MovedFile(self.fs, self.mount_a))
-
- def test_rebuild_backtraceless(self):
- self._rebuild_metadata(BacktracelessFile(self.fs, self.mount_a))
-
- def test_rebuild_moved_dir(self):
- self._rebuild_metadata(MovedDir(self.fs, self.mount_a))
-
- def test_rebuild_missing_zeroth(self):
- self._rebuild_metadata(MissingZerothObject(self.fs, self.mount_a))
-
- def test_rebuild_nondefault_layout(self):
- self._rebuild_metadata(NonDefaultLayout(self.fs, self.mount_a))
-
- def test_stashed_layout(self):
- self._rebuild_metadata(StripedStashedLayout(self.fs, self.mount_a))
-
- def _dirfrag_keys(self, object_id):
- keys_str = self.fs.rados(["listomapkeys", object_id])
- if keys_str:
- return keys_str.split("\n")
- else:
- return []
-
- def test_fragmented_injection(self):
- """
- That when injecting a dentry into a fragmented directory, we put it in the right fragment.
- """
-
- self.fs.set_allow_dirfrags(True)
-
- file_count = 100
- file_names = ["%s" % n for n in range(0, file_count)]
-
- # Create a directory of `file_count` files, each named after its
- # decimal number and containing the string of its decimal number
- self.mount_a.run_python(dedent("""
- import os
- path = os.path.join("{path}", "subdir")
- os.mkdir(path)
- for n in range(0, {file_count}):
- open(os.path.join(path, "%s" % n), 'w').write("%s" % n)
- """.format(
- path=self.mount_a.mountpoint,
- file_count=file_count
- )))
-
- dir_ino = self.mount_a.path_to_ino("subdir")
-
- # Only one MDS should be active!
- self.assertEqual(len(self.fs.get_active_names()), 1)
-
- # Ensure that one directory is fragmented
- mds_id = self.fs.get_active_names()[0]
- self.fs.mds_asok(["dirfrag", "split", "/subdir", "0/0", "1"], mds_id)
-
- # Flush journal and stop MDS
- self.mount_a.umount_wait()
- self.fs.mds_asok(["flush", "journal"], mds_id)
- self.fs.mds_stop()
- self.fs.mds_fail()
-
- # Pick a dentry and wipe out its key
- # Because I did a 1 bit split, I know one frag will be named <inode>.01000000
- frag_obj_id = "{0:x}.01000000".format(dir_ino)
- keys = self._dirfrag_keys(frag_obj_id)
- victim_key = keys[7] # arbitrary choice
- log.info("victim_key={0}".format(victim_key))
- victim_dentry = victim_key.split("_head")[0]
- self.fs.rados(["rmomapkey", frag_obj_id, victim_key])
-
- # Start filesystem back up, observe that the file appears to be gone in an `ls`
- self.fs.mds_restart()
- self.fs.wait_for_daemons()
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
- files = self.mount_a.run_shell(["ls", "subdir/"]).stdout.getvalue().strip().split("\n")
- self.assertListEqual(sorted(files), sorted(list(set(file_names) - set([victim_dentry]))))
-
- # Stop the filesystem
- self.mount_a.umount_wait()
- self.fs.mds_stop()
- self.fs.mds_fail()
-
- # Run data-scan, observe that it inserts our dentry back into the correct fragment
- # by checking the omap now has the dentry's key again
- self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()])
- self.fs.data_scan(["scan_inodes", self.fs.get_data_pool_name()])
- self.assertIn(victim_key, self._dirfrag_keys(frag_obj_id))
-
- # Start the filesystem and check that the dentry we deleted is now once again visible
- # and points to the correct file data.
- self.fs.mds_restart()
- self.fs.wait_for_daemons()
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
- out = self.mount_a.run_shell(["cat", "subdir/{0}".format(victim_dentry)]).stdout.getvalue().strip()
- self.assertEqual(out, victim_dentry)
-
- # Finally, close the loop by checking our injected dentry survives a merge
- mds_id = self.fs.get_active_names()[0]
- self.mount_a.ls("subdir") # Do an ls to ensure both frags are in cache so the merge will work
- self.fs.mds_asok(["dirfrag", "merge", "/subdir", "0/0"], mds_id)
- self.fs.mds_asok(["flush", "journal"], mds_id)
- frag_obj_id = "{0:x}.00000000".format(dir_ino)
- keys = self._dirfrag_keys(frag_obj_id)
- self.assertListEqual(sorted(keys), sorted(["%s_head" % f for f in file_names]))
-
- @for_teuthology
- def test_parallel_execution(self):
- self._rebuild_metadata(ManyFilesWorkload(self.fs, self.mount_a, 25), workers=7)
-
- def test_pg_files(self):
- """
- That the pg files command tells us which files are associated with
- a particular PG
- """
- file_count = 20
- self.mount_a.run_shell(["mkdir", "mydir"])
- self.mount_a.create_n_files("mydir/myfile", file_count)
-
- # Some files elsewhere in the system that we will ignore
- # to check that the tool is filtering properly
- self.mount_a.run_shell(["mkdir", "otherdir"])
- self.mount_a.create_n_files("otherdir/otherfile", file_count)
-
- pgs_to_files = defaultdict(list)
- # Rough (slow) reimplementation of the logic
- for i in range(0, file_count):
- file_path = "mydir/myfile_{0}".format(i)
- ino = self.mount_a.path_to_ino(file_path)
- obj = "{0:x}.{1:08x}".format(ino, 0)
- pgid = json.loads(self.fs.mon_manager.raw_cluster_cmd(
- "osd", "map", self.fs.get_data_pool_name(), obj,
- "--format=json-pretty"
- ))['pgid']
- pgs_to_files[pgid].append(file_path)
- log.info("{0}: {1}".format(file_path, pgid))
-
- pg_count = self.fs.get_pgs_per_fs_pool()
- for pg_n in range(0, pg_count):
- pg_str = "{0}.{1}".format(self.fs.get_data_pool_id(), pg_n)
- out = self.fs.data_scan(["pg_files", "mydir", pg_str])
- lines = [l for l in out.split("\n") if l]
- log.info("{0}: {1}".format(pg_str, lines))
- self.assertSetEqual(set(lines), set(pgs_to_files[pg_str]))
-
- def test_scan_links(self):
- """
- The scan_links command fixes linkage errors
- """
- self.mount_a.run_shell(["mkdir", "testdir1"])
- self.mount_a.run_shell(["mkdir", "testdir2"])
- dir1_ino = self.mount_a.path_to_ino("testdir1")
- dir2_ino = self.mount_a.path_to_ino("testdir2")
- dirfrag1_oid = "{0:x}.00000000".format(dir1_ino)
- dirfrag2_oid = "{0:x}.00000000".format(dir2_ino)
-
- self.mount_a.run_shell(["touch", "testdir1/file1"])
- self.mount_a.run_shell(["ln", "testdir1/file1", "testdir1/link1"])
- self.mount_a.run_shell(["ln", "testdir1/file1", "testdir2/link2"])
-
- mds_id = self.fs.get_active_names()[0]
- self.fs.mds_asok(["flush", "journal"], mds_id)
-
- dirfrag1_keys = self._dirfrag_keys(dirfrag1_oid)
-
- # introduce duplicated primary link
- file1_key = "file1_head"
- self.assertIn(file1_key, dirfrag1_keys)
- file1_omap_data = self.fs.rados(["getomapval", dirfrag1_oid, file1_key, '-'])
- self.fs.rados(["setomapval", dirfrag2_oid, file1_key], stdin_data=file1_omap_data)
- self.assertIn(file1_key, self._dirfrag_keys(dirfrag2_oid))
-
- # remove a remote link, make inode link count incorrect
- link1_key = 'link1_head'
- self.assertIn(link1_key, dirfrag1_keys)
- self.fs.rados(["rmomapkey", dirfrag1_oid, link1_key])
-
- # increase good primary link's version
- self.mount_a.run_shell(["touch", "testdir1/file1"])
- self.mount_a.umount_wait()
-
- self.fs.mds_asok(["flush", "journal"], mds_id)
- self.fs.mds_stop()
- self.fs.mds_fail()
-
- # repair linkage errors
- self.fs.data_scan(["scan_links"])
-
- # primary link in testdir2 was deleted?
- self.assertNotIn(file1_key, self._dirfrag_keys(dirfrag2_oid))
-
- self.fs.mds_restart()
- self.fs.wait_for_daemons()
-
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
-
- # link count was adjusted?
- file1_nlink = self.mount_a.path_to_nlink("testdir1/file1")
- self.assertEqual(file1_nlink, 2)
diff --git a/src/ceph/qa/tasks/cephfs/test_dump_tree.py b/src/ceph/qa/tasks/cephfs/test_dump_tree.py
deleted file mode 100644
index 6d943f9..0000000
--- a/src/ceph/qa/tasks/cephfs/test_dump_tree.py
+++ /dev/null
@@ -1,66 +0,0 @@
-from tasks.cephfs.cephfs_test_case import CephFSTestCase
-import random
-import os
-
-class TestDumpTree(CephFSTestCase):
- def get_paths_to_ino(self):
- inos = {}
- p = self.mount_a.run_shell(["find", "./"])
- paths = p.stdout.getvalue().strip().split()
- for path in paths:
- inos[path] = self.mount_a.path_to_ino(path, False)
-
- return inos
-
- def populate(self):
- self.mount_a.run_shell(["git", "clone",
- "https://github.com/ceph/ceph-qa-suite"])
-
- def test_basic(self):
- self.mount_a.run_shell(["mkdir", "parent"])
- self.mount_a.run_shell(["mkdir", "parent/child"])
- self.mount_a.run_shell(["touch", "parent/child/file"])
- self.mount_a.run_shell(["mkdir", "parent/child/grandchild"])
- self.mount_a.run_shell(["touch", "parent/child/grandchild/file"])
-
- inos = self.get_paths_to_ino()
- tree = self.fs.mds_asok(["dump", "tree", "/parent/child", "1"])
-
- target_inos = [inos["./parent/child"], inos["./parent/child/file"],
- inos["./parent/child/grandchild"]]
-
- for ino in tree:
- del target_inos[target_inos.index(ino['ino'])] # don't catch!
-
- assert(len(target_inos) == 0)
-
- def test_random(self):
- random.seed(0)
-
- self.populate()
- inos = self.get_paths_to_ino()
- target = random.choice(inos.keys())
-
- if target != "./":
- target = os.path.dirname(target)
-
- subtree = [path for path in inos.keys() if path.startswith(target)]
- target_inos = [inos[path] for path in subtree]
- tree = self.fs.mds_asok(["dump", "tree", target[1:]])
-
- for ino in tree:
- del target_inos[target_inos.index(ino['ino'])] # don't catch!
-
- assert(len(target_inos) == 0)
-
- target_depth = target.count('/')
- maxdepth = max([path.count('/') for path in subtree]) - target_depth
- depth = random.randint(0, maxdepth)
- target_inos = [inos[path] for path in subtree \
- if path.count('/') <= depth + target_depth]
- tree = self.fs.mds_asok(["dump", "tree", target[1:], str(depth)])
-
- for ino in tree:
- del target_inos[target_inos.index(ino['ino'])] # don't catch!
-
- assert(len(target_inos) == 0)
diff --git a/src/ceph/qa/tasks/cephfs/test_exports.py b/src/ceph/qa/tasks/cephfs/test_exports.py
deleted file mode 100644
index 913999d..0000000
--- a/src/ceph/qa/tasks/cephfs/test_exports.py
+++ /dev/null
@@ -1,107 +0,0 @@
-import logging
-import time
-from tasks.cephfs.fuse_mount import FuseMount
-from tasks.cephfs.cephfs_test_case import CephFSTestCase
-
-log = logging.getLogger(__name__)
-
-class TestExports(CephFSTestCase):
- MDSS_REQUIRED = 2
-
- def _wait_subtrees(self, status, rank, test):
- timeout = 30
- pause = 2
- test = sorted(test)
- for i in range(timeout/pause):
- subtrees = self.fs.mds_asok(["get", "subtrees"], mds_id=status.get_rank(self.fs.id, rank)['name'])
- subtrees = filter(lambda s: s['dir']['path'].startswith('/'), subtrees)
- filtered = sorted([(s['dir']['path'], s['auth_first']) for s in subtrees])
- log.info("%s =?= %s", filtered, test)
- if filtered == test:
- # Confirm export_pin in output is correct:
- for s in subtrees:
- self.assertTrue(s['export_pin'] == s['auth_first'])
- return subtrees
- time.sleep(pause)
- raise RuntimeError("rank {0} failed to reach desired subtree state", rank)
-
- def test_export_pin(self):
- self.fs.set_max_mds(2)
- self.fs.wait_for_daemons()
-
- status = self.fs.status()
-
- self.mount_a.run_shell(["mkdir", "-p", "1/2/3"])
- self._wait_subtrees(status, 0, [])
-
- # NOP
- self.mount_a.setfattr("1", "ceph.dir.pin", "-1")
- self._wait_subtrees(status, 0, [])
-
- # NOP (rank < -1)
- self.mount_a.setfattr("1", "ceph.dir.pin", "-2341")
- self._wait_subtrees(status, 0, [])
-
- # pin /1 to rank 1
- self.mount_a.setfattr("1", "ceph.dir.pin", "1")
- self._wait_subtrees(status, 1, [('/1', 1)])
-
- # Check export_targets is set properly
- status = self.fs.status()
- log.info(status)
- r0 = status.get_rank(self.fs.id, 0)
- self.assertTrue(sorted(r0['export_targets']) == [1])
-
- # redundant pin /1/2 to rank 1
- self.mount_a.setfattr("1/2", "ceph.dir.pin", "1")
- self._wait_subtrees(status, 1, [('/1', 1), ('/1/2', 1)])
-
- # change pin /1/2 to rank 0
- self.mount_a.setfattr("1/2", "ceph.dir.pin", "0")
- self._wait_subtrees(status, 1, [('/1', 1), ('/1/2', 0)])
- self._wait_subtrees(status, 0, [('/1', 1), ('/1/2', 0)])
-
- # change pin /1/2/3 to (presently) non-existent rank 2
- self.mount_a.setfattr("1/2/3", "ceph.dir.pin", "2")
- self._wait_subtrees(status, 0, [('/1', 1), ('/1/2', 0)])
- self._wait_subtrees(status, 1, [('/1', 1), ('/1/2', 0)])
-
- # change pin /1/2 back to rank 1
- self.mount_a.setfattr("1/2", "ceph.dir.pin", "1")
- self._wait_subtrees(status, 1, [('/1', 1), ('/1/2', 1)])
-
- # add another directory pinned to 1
- self.mount_a.run_shell(["mkdir", "-p", "1/4/5"])
- self.mount_a.setfattr("1/4/5", "ceph.dir.pin", "1")
- self._wait_subtrees(status, 1, [('/1', 1), ('/1/2', 1), ('/1/4/5', 1)])
-
- # change pin /1 to 0
- self.mount_a.setfattr("1", "ceph.dir.pin", "0")
- self._wait_subtrees(status, 0, [('/1', 0), ('/1/2', 1), ('/1/4/5', 1)])
-
- # change pin /1/2 to default (-1); does the subtree root properly respect it's parent pin?
- self.mount_a.setfattr("1/2", "ceph.dir.pin", "-1")
- self._wait_subtrees(status, 0, [('/1', 0), ('/1/4/5', 1)])
-
- if len(list(status.get_standbys())):
- self.fs.set_max_mds(3)
- self.fs.wait_for_state('up:active', rank=2)
- self._wait_subtrees(status, 0, [('/1', 0), ('/1/4/5', 1), ('/1/2/3', 2)])
-
- # Check export_targets is set properly
- status = self.fs.status()
- log.info(status)
- r0 = status.get_rank(self.fs.id, 0)
- self.assertTrue(sorted(r0['export_targets']) == [1,2])
- r1 = status.get_rank(self.fs.id, 1)
- self.assertTrue(sorted(r1['export_targets']) == [0])
- r2 = status.get_rank(self.fs.id, 2)
- self.assertTrue(sorted(r2['export_targets']) == [])
-
- # Test rename
- self.mount_a.run_shell(["mkdir", "-p", "a/b", "aa/bb"])
- self.mount_a.setfattr("a", "ceph.dir.pin", "1")
- self.mount_a.setfattr("aa/bb", "ceph.dir.pin", "0")
- self._wait_subtrees(status, 0, [('/1', 0), ('/1/4/5', 1), ('/1/2/3', 2), ('/a', 1), ('/aa/bb', 0)])
- self.mount_a.run_shell(["mv", "aa", "a/b/"])
- self._wait_subtrees(status, 0, [('/1', 0), ('/1/4/5', 1), ('/1/2/3', 2), ('/a', 1), ('/a/b/aa/bb', 0)])
diff --git a/src/ceph/qa/tasks/cephfs/test_failover.py b/src/ceph/qa/tasks/cephfs/test_failover.py
deleted file mode 100644
index 9d3392c..0000000
--- a/src/ceph/qa/tasks/cephfs/test_failover.py
+++ /dev/null
@@ -1,645 +0,0 @@
-import json
-import logging
-from unittest import case, SkipTest
-
-from cephfs_test_case import CephFSTestCase
-from teuthology.exceptions import CommandFailedError
-from teuthology import misc as teuthology
-from tasks.cephfs.fuse_mount import FuseMount
-
-log = logging.getLogger(__name__)
-
-
-class TestFailover(CephFSTestCase):
- CLIENTS_REQUIRED = 1
- MDSS_REQUIRED = 2
-
- def test_simple(self):
- """
- That when the active MDS is killed, a standby MDS is promoted into
- its rank after the grace period.
-
- This is just a simple unit test, the harder cases are covered
- in thrashing tests.
- """
-
- # Need all my standbys up as well as the active daemons
- self.wait_for_daemon_start()
-
- (original_active, ) = self.fs.get_active_names()
- original_standbys = self.mds_cluster.get_standby_daemons()
-
- # Kill the rank 0 daemon's physical process
- self.fs.mds_stop(original_active)
-
- grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon"))
-
- # Wait until the monitor promotes his replacement
- def promoted():
- active = self.fs.get_active_names()
- return active and active[0] in original_standbys
-
- log.info("Waiting for promotion of one of the original standbys {0}".format(
- original_standbys))
- self.wait_until_true(
- promoted,
- timeout=grace*2)
-
- # Start the original rank 0 daemon up again, see that he becomes a standby
- self.fs.mds_restart(original_active)
- self.wait_until_true(
- lambda: original_active in self.mds_cluster.get_standby_daemons(),
- timeout=60 # Approximately long enough for MDS to start and mon to notice
- )
-
- def test_client_abort(self):
- """
- That a client will respect fuse_require_active_mds and error out
- when the cluster appears to be unavailable.
- """
-
- if not isinstance(self.mount_a, FuseMount):
- raise SkipTest("Requires FUSE client to inject client metadata")
-
- require_active = self.fs.get_config("fuse_require_active_mds", service_type="mon").lower() == "true"
- if not require_active:
- raise case.SkipTest("fuse_require_active_mds is not set")
-
- grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon"))
-
- # Check it's not laggy to begin with
- (original_active, ) = self.fs.get_active_names()
- self.assertNotIn("laggy_since", self.fs.mon_manager.get_mds_status(original_active))
-
- self.mounts[0].umount_wait()
-
- # Control: that we can mount and unmount usually, while the cluster is healthy
- self.mounts[0].mount()
- self.mounts[0].wait_until_mounted()
- self.mounts[0].umount_wait()
-
- # Stop the daemon processes
- self.fs.mds_stop()
-
- # Wait for everyone to go laggy
- def laggy():
- mdsmap = self.fs.get_mds_map()
- for info in mdsmap['info'].values():
- if "laggy_since" not in info:
- return False
-
- return True
-
- self.wait_until_true(laggy, grace * 2)
- with self.assertRaises(CommandFailedError):
- self.mounts[0].mount()
-
- def test_standby_count_wanted(self):
- """
- That cluster health warnings are generated by insufficient standbys available.
- """
-
- # Need all my standbys up as well as the active daemons
- self.wait_for_daemon_start()
-
- grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon"))
-
- standbys = self.mds_cluster.get_standby_daemons()
- self.assertGreaterEqual(len(standbys), 1)
- self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys)))
-
- # Kill a standby and check for warning
- victim = standbys.pop()
- self.fs.mds_stop(victim)
- log.info("waiting for insufficient standby daemon warning")
- self.wait_for_health("MDS_INSUFFICIENT_STANDBY", grace*2)
-
- # restart the standby, see that he becomes a standby, check health clears
- self.fs.mds_restart(victim)
- self.wait_until_true(
- lambda: victim in self.mds_cluster.get_standby_daemons(),
- timeout=60 # Approximately long enough for MDS to start and mon to notice
- )
- self.wait_for_health_clear(timeout=30)
-
- # Set it one greater than standbys ever seen
- standbys = self.mds_cluster.get_standby_daemons()
- self.assertGreaterEqual(len(standbys), 1)
- self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys)+1))
- log.info("waiting for insufficient standby daemon warning")
- self.wait_for_health("MDS_INSUFFICIENT_STANDBY", grace*2)
-
- # Set it to 0
- self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', '0')
- self.wait_for_health_clear(timeout=30)
-
-
-
-
-class TestStandbyReplay(CephFSTestCase):
- MDSS_REQUIRED = 4
- REQUIRE_FILESYSTEM = False
-
- def set_standby_for(self, leader, follower, replay):
- self.set_conf("mds.{0}".format(follower), "mds_standby_for_name", leader)
- if replay:
- self.set_conf("mds.{0}".format(follower), "mds_standby_replay", "true")
-
- def get_info_by_name(self, mds_name):
- status = self.mds_cluster.status()
- info = status.get_mds(mds_name)
- if info is None:
- log.warn(str(status))
- raise RuntimeError("MDS '{0}' not found".format(mds_name))
- else:
- return info
-
- def test_standby_replay_unused(self):
- # Pick out exactly 3 daemons to be run during test
- use_daemons = sorted(self.mds_cluster.mds_ids[0:3])
- mds_a, mds_b, mds_c = use_daemons
- log.info("Using MDS daemons: {0}".format(use_daemons))
-
- # B and C should both follow A, but only one will
- # really get into standby replay state.
- self.set_standby_for(mds_a, mds_b, True)
- self.set_standby_for(mds_a, mds_c, True)
-
- # Create FS and start A
- fs_a = self.mds_cluster.newfs("alpha")
- self.mds_cluster.mds_restart(mds_a)
- fs_a.wait_for_daemons()
- self.assertEqual(fs_a.get_active_names(), [mds_a])
-
- # Start B, he should go into standby replay
- self.mds_cluster.mds_restart(mds_b)
- self.wait_for_daemon_start([mds_b])
- info_b = self.get_info_by_name(mds_b)
- self.assertEqual(info_b['state'], "up:standby-replay")
- self.assertEqual(info_b['standby_for_name'], mds_a)
- self.assertEqual(info_b['rank'], 0)
-
- # Start C, he should go into standby (*not* replay)
- self.mds_cluster.mds_restart(mds_c)
- self.wait_for_daemon_start([mds_c])
- info_c = self.get_info_by_name(mds_c)
- self.assertEqual(info_c['state'], "up:standby")
- self.assertEqual(info_c['standby_for_name'], mds_a)
- self.assertEqual(info_c['rank'], -1)
-
- # Kill B, C should go into standby replay
- self.mds_cluster.mds_stop(mds_b)
- self.mds_cluster.mds_fail(mds_b)
- self.wait_until_equal(
- lambda: self.get_info_by_name(mds_c)['state'],
- "up:standby-replay",
- 60)
- info_c = self.get_info_by_name(mds_c)
- self.assertEqual(info_c['state'], "up:standby-replay")
- self.assertEqual(info_c['standby_for_name'], mds_a)
- self.assertEqual(info_c['rank'], 0)
-
- def test_standby_failure(self):
- """
- That the failure of a standby-replay daemon happens cleanly
- and doesn't interrupt anything else.
- """
- # Pick out exactly 2 daemons to be run during test
- use_daemons = sorted(self.mds_cluster.mds_ids[0:2])
- mds_a, mds_b = use_daemons
- log.info("Using MDS daemons: {0}".format(use_daemons))
-
- # Configure two pairs of MDSs that are standby for each other
- self.set_standby_for(mds_a, mds_b, True)
- self.set_standby_for(mds_b, mds_a, False)
-
- # Create FS alpha and get mds_a to come up as active
- fs_a = self.mds_cluster.newfs("alpha")
- self.mds_cluster.mds_restart(mds_a)
- fs_a.wait_for_daemons()
- self.assertEqual(fs_a.get_active_names(), [mds_a])
-
- # Start the standbys
- self.mds_cluster.mds_restart(mds_b)
- self.wait_for_daemon_start([mds_b])
-
- # See the standby come up as the correct rank
- info_b = self.get_info_by_name(mds_b)
- self.assertEqual(info_b['state'], "up:standby-replay")
- self.assertEqual(info_b['standby_for_name'], mds_a)
- self.assertEqual(info_b['rank'], 0)
-
- # Kill the standby
- self.mds_cluster.mds_stop(mds_b)
- self.mds_cluster.mds_fail(mds_b)
-
- # See that the standby is gone and the active remains
- self.assertEqual(fs_a.get_active_names(), [mds_a])
- mds_map = fs_a.get_mds_map()
- self.assertEqual(len(mds_map['info']), 1)
- self.assertEqual(mds_map['failed'], [])
- self.assertEqual(mds_map['damaged'], [])
- self.assertEqual(mds_map['stopped'], [])
-
- def test_rank_stopped(self):
- """
- That when a rank is STOPPED, standby replays for
- that rank get torn down
- """
- # Pick out exactly 2 daemons to be run during test
- use_daemons = sorted(self.mds_cluster.mds_ids[0:4])
- mds_a, mds_b, mds_a_s, mds_b_s = use_daemons
- log.info("Using MDS daemons: {0}".format(use_daemons))
-
- # a and b both get a standby
- self.set_standby_for(mds_a, mds_a_s, True)
- self.set_standby_for(mds_b, mds_b_s, True)
-
- # Create FS alpha and get mds_a to come up as active
- fs_a = self.mds_cluster.newfs("alpha")
- fs_a.set_max_mds(2)
-
- self.mds_cluster.mds_restart(mds_a)
- self.wait_until_equal(lambda: fs_a.get_active_names(), [mds_a], 30)
- self.mds_cluster.mds_restart(mds_b)
- fs_a.wait_for_daemons()
- self.assertEqual(sorted(fs_a.get_active_names()), [mds_a, mds_b])
-
- # Start the standbys
- self.mds_cluster.mds_restart(mds_b_s)
- self.wait_for_daemon_start([mds_b_s])
- self.mds_cluster.mds_restart(mds_a_s)
- self.wait_for_daemon_start([mds_a_s])
- info_b_s = self.get_info_by_name(mds_b_s)
- self.assertEqual(info_b_s['state'], "up:standby-replay")
- info_a_s = self.get_info_by_name(mds_a_s)
- self.assertEqual(info_a_s['state'], "up:standby-replay")
-
- # Shrink the cluster
- fs_a.set_max_mds(1)
- fs_a.mon_manager.raw_cluster_cmd("mds", "stop", "{0}:1".format(fs_a.name))
- self.wait_until_equal(
- lambda: fs_a.get_active_names(), [mds_a],
- 60
- )
-
- # Both 'b' and 'b_s' should go back to being standbys
- self.wait_until_equal(
- lambda: self.mds_cluster.get_standby_daemons(), {mds_b, mds_b_s},
- 60
- )
-
-
-class TestMultiFilesystems(CephFSTestCase):
- CLIENTS_REQUIRED = 2
- MDSS_REQUIRED = 4
-
- # We'll create our own filesystems and start our own daemons
- REQUIRE_FILESYSTEM = False
-
- def setUp(self):
- super(TestMultiFilesystems, self).setUp()
- self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set",
- "enable_multiple", "true",
- "--yes-i-really-mean-it")
-
- def _setup_two(self):
- fs_a = self.mds_cluster.newfs("alpha")
- fs_b = self.mds_cluster.newfs("bravo")
-
- self.mds_cluster.mds_restart()
-
- # Wait for both filesystems to go healthy
- fs_a.wait_for_daemons()
- fs_b.wait_for_daemons()
-
- # Reconfigure client auth caps
- for mount in self.mounts:
- self.mds_cluster.mon_manager.raw_cluster_cmd_result(
- 'auth', 'caps', "client.{0}".format(mount.client_id),
- 'mds', 'allow',
- 'mon', 'allow r',
- 'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
- fs_a.get_data_pool_name(), fs_b.get_data_pool_name()))
-
- return fs_a, fs_b
-
- def test_clients(self):
- fs_a, fs_b = self._setup_two()
-
- # Mount a client on fs_a
- self.mount_a.mount(mount_fs_name=fs_a.name)
- self.mount_a.write_n_mb("pad.bin", 1)
- self.mount_a.write_n_mb("test.bin", 2)
- a_created_ino = self.mount_a.path_to_ino("test.bin")
- self.mount_a.create_files()
-
- # Mount a client on fs_b
- self.mount_b.mount(mount_fs_name=fs_b.name)
- self.mount_b.write_n_mb("test.bin", 1)
- b_created_ino = self.mount_b.path_to_ino("test.bin")
- self.mount_b.create_files()
-
- # Check that a non-default filesystem mount survives an MDS
- # failover (i.e. that map subscription is continuous, not
- # just the first time), reproduces #16022
- old_fs_b_mds = fs_b.get_active_names()[0]
- self.mds_cluster.mds_stop(old_fs_b_mds)
- self.mds_cluster.mds_fail(old_fs_b_mds)
- fs_b.wait_for_daemons()
- background = self.mount_b.write_background()
- # Raise exception if the write doesn't finish (i.e. if client
- # has not kept up with MDS failure)
- try:
- self.wait_until_true(lambda: background.finished, timeout=30)
- except RuntimeError:
- # The mount is stuck, we'll have to force it to fail cleanly
- background.stdin.close()
- self.mount_b.umount_wait(force=True)
- raise
-
- self.mount_a.umount_wait()
- self.mount_b.umount_wait()
-
- # See that the client's files went into the correct pool
- self.assertTrue(fs_a.data_objects_present(a_created_ino, 1024 * 1024))
- self.assertTrue(fs_b.data_objects_present(b_created_ino, 1024 * 1024))
-
- def test_standby(self):
- fs_a, fs_b = self._setup_two()
-
- # Assert that the remaining two MDS daemons are now standbys
- a_daemons = fs_a.get_active_names()
- b_daemons = fs_b.get_active_names()
- self.assertEqual(len(a_daemons), 1)
- self.assertEqual(len(b_daemons), 1)
- original_a = a_daemons[0]
- original_b = b_daemons[0]
- expect_standby_daemons = set(self.mds_cluster.mds_ids) - (set(a_daemons) | set(b_daemons))
-
- # Need all my standbys up as well as the active daemons
- self.wait_for_daemon_start()
- self.assertEqual(expect_standby_daemons, self.mds_cluster.get_standby_daemons())
-
- # Kill fs_a's active MDS, see a standby take over
- self.mds_cluster.mds_stop(original_a)
- self.mds_cluster.mon_manager.raw_cluster_cmd("mds", "fail", original_a)
- self.wait_until_equal(lambda: len(fs_a.get_active_names()), 1, 30,
- reject_fn=lambda v: v > 1)
- # Assert that it's a *different* daemon that has now appeared in the map for fs_a
- self.assertNotEqual(fs_a.get_active_names()[0], original_a)
-
- # Kill fs_b's active MDS, see a standby take over
- self.mds_cluster.mds_stop(original_b)
- self.mds_cluster.mon_manager.raw_cluster_cmd("mds", "fail", original_b)
- self.wait_until_equal(lambda: len(fs_b.get_active_names()), 1, 30,
- reject_fn=lambda v: v > 1)
- # Assert that it's a *different* daemon that has now appeared in the map for fs_a
- self.assertNotEqual(fs_b.get_active_names()[0], original_b)
-
- # Both of the original active daemons should be gone, and all standbys used up
- self.assertEqual(self.mds_cluster.get_standby_daemons(), set())
-
- # Restart the ones I killed, see them reappear as standbys
- self.mds_cluster.mds_restart(original_a)
- self.mds_cluster.mds_restart(original_b)
- self.wait_until_true(
- lambda: {original_a, original_b} == self.mds_cluster.get_standby_daemons(),
- timeout=30
- )
-
- def test_grow_shrink(self):
- # Usual setup...
- fs_a, fs_b = self._setup_two()
-
- # Increase max_mds on fs_b, see a standby take up the role
- fs_b.set_max_mds(2)
- self.wait_until_equal(lambda: len(fs_b.get_active_names()), 2, 30,
- reject_fn=lambda v: v > 2 or v < 1)
-
- # Increase max_mds on fs_a, see a standby take up the role
- fs_a.set_max_mds(2)
- self.wait_until_equal(lambda: len(fs_a.get_active_names()), 2, 30,
- reject_fn=lambda v: v > 2 or v < 1)
-
- # Shrink fs_b back to 1, see a daemon go back to standby
- fs_b.set_max_mds(1)
- fs_b.deactivate(1)
- self.wait_until_equal(lambda: len(fs_b.get_active_names()), 1, 30,
- reject_fn=lambda v: v > 2 or v < 1)
-
- # Grow fs_a up to 3, see the former fs_b daemon join it.
- fs_a.set_max_mds(3)
- self.wait_until_equal(lambda: len(fs_a.get_active_names()), 3, 60,
- reject_fn=lambda v: v > 3 or v < 2)
-
- def test_standby_for_name(self):
- # Pick out exactly 4 daemons to be run during test
- use_daemons = sorted(self.mds_cluster.mds_ids[0:4])
- mds_a, mds_b, mds_c, mds_d = use_daemons
- log.info("Using MDS daemons: {0}".format(use_daemons))
-
- def set_standby_for(leader, follower, replay):
- self.set_conf("mds.{0}".format(follower), "mds_standby_for_name", leader)
- if replay:
- self.set_conf("mds.{0}".format(follower), "mds_standby_replay", "true")
-
- # Configure two pairs of MDSs that are standby for each other
- set_standby_for(mds_a, mds_b, True)
- set_standby_for(mds_b, mds_a, False)
- set_standby_for(mds_c, mds_d, True)
- set_standby_for(mds_d, mds_c, False)
-
- # Create FS alpha and get mds_a to come up as active
- fs_a = self.mds_cluster.newfs("alpha")
- self.mds_cluster.mds_restart(mds_a)
- fs_a.wait_for_daemons()
- self.assertEqual(fs_a.get_active_names(), [mds_a])
-
- # Create FS bravo and get mds_c to come up as active
- fs_b = self.mds_cluster.newfs("bravo")
- self.mds_cluster.mds_restart(mds_c)
- fs_b.wait_for_daemons()
- self.assertEqual(fs_b.get_active_names(), [mds_c])
-
- # Start the standbys
- self.mds_cluster.mds_restart(mds_b)
- self.mds_cluster.mds_restart(mds_d)
- self.wait_for_daemon_start([mds_b, mds_d])
-
- def get_info_by_name(fs, mds_name):
- mds_map = fs.get_mds_map()
- for gid_str, info in mds_map['info'].items():
- if info['name'] == mds_name:
- return info
-
- log.warn(json.dumps(mds_map, indent=2))
- raise RuntimeError("MDS '{0}' not found in filesystem MDSMap".format(mds_name))
-
- # See both standbys come up as standby replay for the correct ranks
- # mds_b should be in filesystem alpha following mds_a
- info_b = get_info_by_name(fs_a, mds_b)
- self.assertEqual(info_b['state'], "up:standby-replay")
- self.assertEqual(info_b['standby_for_name'], mds_a)
- self.assertEqual(info_b['rank'], 0)
- # mds_d should be in filesystem alpha following mds_c
- info_d = get_info_by_name(fs_b, mds_d)
- self.assertEqual(info_d['state'], "up:standby-replay")
- self.assertEqual(info_d['standby_for_name'], mds_c)
- self.assertEqual(info_d['rank'], 0)
-
- # Kill both active daemons
- self.mds_cluster.mds_stop(mds_a)
- self.mds_cluster.mds_fail(mds_a)
- self.mds_cluster.mds_stop(mds_c)
- self.mds_cluster.mds_fail(mds_c)
-
- # Wait for standbys to take over
- fs_a.wait_for_daemons()
- self.assertEqual(fs_a.get_active_names(), [mds_b])
- fs_b.wait_for_daemons()
- self.assertEqual(fs_b.get_active_names(), [mds_d])
-
- # Start the original active daemons up again
- self.mds_cluster.mds_restart(mds_a)
- self.mds_cluster.mds_restart(mds_c)
- self.wait_for_daemon_start([mds_a, mds_c])
-
- self.assertEqual(set(self.mds_cluster.get_standby_daemons()),
- {mds_a, mds_c})
-
- def test_standby_for_rank(self):
- use_daemons = sorted(self.mds_cluster.mds_ids[0:4])
- mds_a, mds_b, mds_c, mds_d = use_daemons
- log.info("Using MDS daemons: {0}".format(use_daemons))
-
- def set_standby_for(leader_rank, leader_fs, follower_id):
- self.set_conf("mds.{0}".format(follower_id),
- "mds_standby_for_rank", leader_rank)
-
- fscid = leader_fs.get_namespace_id()
- self.set_conf("mds.{0}".format(follower_id),
- "mds_standby_for_fscid", fscid)
-
- fs_a = self.mds_cluster.newfs("alpha")
- fs_b = self.mds_cluster.newfs("bravo")
- set_standby_for(0, fs_a, mds_a)
- set_standby_for(0, fs_a, mds_b)
- set_standby_for(0, fs_b, mds_c)
- set_standby_for(0, fs_b, mds_d)
-
- self.mds_cluster.mds_restart(mds_a)
- fs_a.wait_for_daemons()
- self.assertEqual(fs_a.get_active_names(), [mds_a])
-
- self.mds_cluster.mds_restart(mds_c)
- fs_b.wait_for_daemons()
- self.assertEqual(fs_b.get_active_names(), [mds_c])
-
- self.mds_cluster.mds_restart(mds_b)
- self.mds_cluster.mds_restart(mds_d)
- self.wait_for_daemon_start([mds_b, mds_d])
-
- self.mds_cluster.mds_stop(mds_a)
- self.mds_cluster.mds_fail(mds_a)
- self.mds_cluster.mds_stop(mds_c)
- self.mds_cluster.mds_fail(mds_c)
-
- fs_a.wait_for_daemons()
- self.assertEqual(fs_a.get_active_names(), [mds_b])
- fs_b.wait_for_daemons()
- self.assertEqual(fs_b.get_active_names(), [mds_d])
-
- def test_standby_for_fscid(self):
- """
- That I can set a standby FSCID with no rank, and the result is
- that daemons join any rank for that filesystem.
- """
- use_daemons = sorted(self.mds_cluster.mds_ids[0:4])
- mds_a, mds_b, mds_c, mds_d = use_daemons
-
- log.info("Using MDS daemons: {0}".format(use_daemons))
-
- def set_standby_for(leader_fs, follower_id):
- fscid = leader_fs.get_namespace_id()
- self.set_conf("mds.{0}".format(follower_id),
- "mds_standby_for_fscid", fscid)
-
- # Create two filesystems which should have two ranks each
- fs_a = self.mds_cluster.newfs("alpha")
-
- fs_b = self.mds_cluster.newfs("bravo")
-
- fs_a.set_max_mds(2)
- fs_b.set_max_mds(2)
-
- # Set all the daemons to have a FSCID assignment but no other
- # standby preferences.
- set_standby_for(fs_a, mds_a)
- set_standby_for(fs_a, mds_b)
- set_standby_for(fs_b, mds_c)
- set_standby_for(fs_b, mds_d)
-
- # Now when we start all daemons at once, they should fall into
- # ranks in the right filesystem
- self.mds_cluster.mds_restart(mds_a)
- self.mds_cluster.mds_restart(mds_b)
- self.mds_cluster.mds_restart(mds_c)
- self.mds_cluster.mds_restart(mds_d)
- self.wait_for_daemon_start([mds_a, mds_b, mds_c, mds_d])
- fs_a.wait_for_daemons()
- fs_b.wait_for_daemons()
- self.assertEqual(set(fs_a.get_active_names()), {mds_a, mds_b})
- self.assertEqual(set(fs_b.get_active_names()), {mds_c, mds_d})
-
- def test_standby_for_invalid_fscid(self):
- """
- That an invalid standby_fscid does not cause a mon crash
- """
- use_daemons = sorted(self.mds_cluster.mds_ids[0:3])
- mds_a, mds_b, mds_c = use_daemons
- log.info("Using MDS daemons: {0}".format(use_daemons))
-
- def set_standby_for_rank(leader_rank, follower_id):
- self.set_conf("mds.{0}".format(follower_id),
- "mds_standby_for_rank", leader_rank)
-
- # Create one fs
- fs_a = self.mds_cluster.newfs("cephfs")
-
- # Get configured mons in the cluster, so we can see if any
- # crashed later.
- configured_mons = fs_a.mon_manager.get_mon_quorum()
-
- # Set all the daemons to have a rank assignment but no other
- # standby preferences.
- set_standby_for_rank(0, mds_a)
- set_standby_for_rank(0, mds_b)
-
- # Set third daemon to have invalid fscid assignment and no other
- # standby preferences
- invalid_fscid = 123
- self.set_conf("mds.{0}".format(mds_c), "mds_standby_for_fscid", invalid_fscid)
-
- #Restart all the daemons to make the standby preference applied
- self.mds_cluster.mds_restart(mds_a)
- self.mds_cluster.mds_restart(mds_b)
- self.mds_cluster.mds_restart(mds_c)
- self.wait_for_daemon_start([mds_a, mds_b, mds_c])
-
- #Stop active mds daemon service of fs
- if (fs_a.get_active_names(), [mds_a]):
- self.mds_cluster.mds_stop(mds_a)
- self.mds_cluster.mds_fail(mds_a)
- fs_a.wait_for_daemons()
- else:
- self.mds_cluster.mds_stop(mds_b)
- self.mds_cluster.mds_fail(mds_b)
- fs_a.wait_for_daemons()
-
- #Get active mons from cluster
- active_mons = fs_a.mon_manager.get_mon_quorum()
-
- #Check for active quorum mon status and configured mon status
- self.assertEqual(active_mons, configured_mons,
- "Not all mons are in quorum Invalid standby invalid fscid test failed!")
diff --git a/src/ceph/qa/tasks/cephfs/test_flush.py b/src/ceph/qa/tasks/cephfs/test_flush.py
deleted file mode 100644
index 1f84e42..0000000
--- a/src/ceph/qa/tasks/cephfs/test_flush.py
+++ /dev/null
@@ -1,113 +0,0 @@
-
-from textwrap import dedent
-from tasks.cephfs.cephfs_test_case import CephFSTestCase
-from tasks.cephfs.filesystem import ObjectNotFound, ROOT_INO
-
-
-class TestFlush(CephFSTestCase):
- def test_flush(self):
- self.mount_a.run_shell(["mkdir", "mydir"])
- self.mount_a.run_shell(["touch", "mydir/alpha"])
- dir_ino = self.mount_a.path_to_ino("mydir")
- file_ino = self.mount_a.path_to_ino("mydir/alpha")
-
- # Unmount the client so that it isn't still holding caps
- self.mount_a.umount_wait()
-
- # Before flush, the dirfrag object does not exist
- with self.assertRaises(ObjectNotFound):
- self.fs.list_dirfrag(dir_ino)
-
- # Before flush, the file's backtrace has not been written
- with self.assertRaises(ObjectNotFound):
- self.fs.read_backtrace(file_ino)
-
- # Before flush, there are no dentries in the root
- self.assertEqual(self.fs.list_dirfrag(ROOT_INO), [])
-
- # Execute flush
- flush_data = self.fs.mds_asok(["flush", "journal"])
- self.assertEqual(flush_data['return_code'], 0)
-
- # After flush, the dirfrag object has been created
- dir_list = self.fs.list_dirfrag(dir_ino)
- self.assertEqual(dir_list, ["alpha_head"])
-
- # And the 'mydir' dentry is in the root
- self.assertEqual(self.fs.list_dirfrag(ROOT_INO), ['mydir_head'])
-
- # ...and the data object has its backtrace
- backtrace = self.fs.read_backtrace(file_ino)
- self.assertEqual(['alpha', 'mydir'], [a['dname'] for a in backtrace['ancestors']])
- self.assertEqual([dir_ino, 1], [a['dirino'] for a in backtrace['ancestors']])
- self.assertEqual(file_ino, backtrace['ino'])
-
- # ...and the journal is truncated to just a single subtreemap from the
- # newly created segment
- summary_output = self.fs.journal_tool(["event", "get", "summary"])
- try:
- self.assertEqual(summary_output,
- dedent(
- """
- Events by type:
- SUBTREEMAP: 1
- Errors: 0
- """
- ).strip())
- except AssertionError:
- # In some states, flushing the journal will leave you
- # an extra event from locks a client held. This is
- # correct behaviour: the MDS is flushing the journal,
- # it's just that new events are getting added too.
- # In this case, we should nevertheless see a fully
- # empty journal after a second flush.
- self.assertEqual(summary_output,
- dedent(
- """
- Events by type:
- SUBTREEMAP: 1
- UPDATE: 1
- Errors: 0
- """
- ).strip())
- flush_data = self.fs.mds_asok(["flush", "journal"])
- self.assertEqual(flush_data['return_code'], 0)
- self.assertEqual(self.fs.journal_tool(["event", "get", "summary"]),
- dedent(
- """
- Events by type:
- SUBTREEMAP: 1
- Errors: 0
- """
- ).strip())
-
- # Now for deletion!
- # We will count the RADOS deletions and MDS file purges, to verify that
- # the expected behaviour is happening as a result of the purge
- initial_dels = self.fs.mds_asok(['perf', 'dump', 'objecter'])['objecter']['osdop_delete']
- initial_purges = self.fs.mds_asok(['perf', 'dump', 'mds_cache'])['mds_cache']['strays_enqueued']
-
- # Use a client to delete a file
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
- self.mount_a.run_shell(["rm", "-rf", "mydir"])
-
- # Flush the journal so that the directory inode can be purged
- flush_data = self.fs.mds_asok(["flush", "journal"])
- self.assertEqual(flush_data['return_code'], 0)
-
- # We expect to see a single file purge
- self.wait_until_true(
- lambda: self.fs.mds_asok(['perf', 'dump', 'mds_cache'])['mds_cache']['strays_enqueued'] - initial_purges >= 2,
- 60)
-
- # We expect two deletions, one of the dirfrag and one of the backtrace
- self.wait_until_true(
- lambda: self.fs.mds_asok(['perf', 'dump', 'objecter'])['objecter']['osdop_delete'] - initial_dels >= 2,
- 60) # timeout is fairly long to allow for tick+rados latencies
-
- with self.assertRaises(ObjectNotFound):
- self.fs.list_dirfrag(dir_ino)
- with self.assertRaises(ObjectNotFound):
- self.fs.read_backtrace(file_ino)
- self.assertEqual(self.fs.list_dirfrag(ROOT_INO), [])
diff --git a/src/ceph/qa/tasks/cephfs/test_forward_scrub.py b/src/ceph/qa/tasks/cephfs/test_forward_scrub.py
deleted file mode 100644
index ac912dd..0000000
--- a/src/ceph/qa/tasks/cephfs/test_forward_scrub.py
+++ /dev/null
@@ -1,291 +0,0 @@
-
-"""
-Test that the forward scrub functionality can traverse metadata and apply
-requested tags, on well formed metadata.
-
-This is *not* the real testing for forward scrub, which will need to test
-how the functionality responds to damaged metadata.
-
-"""
-import json
-
-import logging
-from collections import namedtuple
-from textwrap import dedent
-
-from teuthology.orchestra.run import CommandFailedError
-from tasks.cephfs.cephfs_test_case import CephFSTestCase
-
-import struct
-
-log = logging.getLogger(__name__)
-
-
-ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
-
-
-class TestForwardScrub(CephFSTestCase):
- MDSS_REQUIRED = 1
-
- def _read_str_xattr(self, pool, obj, attr):
- """
- Read a ceph-encoded string from a rados xattr
- """
- output = self.fs.rados(["getxattr", obj, attr], pool=pool)
- strlen = struct.unpack('i', output[0:4])[0]
- return output[4:(4 + strlen)]
-
- def _get_paths_to_ino(self):
- inos = {}
- p = self.mount_a.run_shell(["find", "./"])
- paths = p.stdout.getvalue().strip().split()
- for path in paths:
- inos[path] = self.mount_a.path_to_ino(path)
-
- return inos
-
- def test_apply_tag(self):
- self.mount_a.run_shell(["mkdir", "parentdir"])
- self.mount_a.run_shell(["mkdir", "parentdir/childdir"])
- self.mount_a.run_shell(["touch", "rfile"])
- self.mount_a.run_shell(["touch", "parentdir/pfile"])
- self.mount_a.run_shell(["touch", "parentdir/childdir/cfile"])
-
- # Build a structure mapping path to inode, as we will later want
- # to check object by object and objects are named after ino number
- inos = self._get_paths_to_ino()
-
- # Flush metadata: this is a friendly test of forward scrub so we're skipping
- # the part where it's meant to cope with dirty metadata
- self.mount_a.umount_wait()
- self.fs.mds_asok(["flush", "journal"])
-
- tag = "mytag"
-
- # Execute tagging forward scrub
- self.fs.mds_asok(["tag", "path", "/parentdir", tag])
- # Wait for completion
- import time
- time.sleep(10)
- # FIXME watching clog isn't a nice mechanism for this, once we have a ScrubMap we'll
- # watch that instead
-
- # Check that dirs were tagged
- for dirpath in ["./parentdir", "./parentdir/childdir"]:
- self.assertTagged(inos[dirpath], tag, self.fs.get_metadata_pool_name())
-
- # Check that files were tagged
- for filepath in ["./parentdir/pfile", "./parentdir/childdir/cfile"]:
- self.assertTagged(inos[filepath], tag, self.fs.get_data_pool_name())
-
- # This guy wasn't in the tag path, shouldn't have been tagged
- self.assertUntagged(inos["./rfile"])
-
- def assertUntagged(self, ino):
- file_obj_name = "{0:x}.00000000".format(ino)
- with self.assertRaises(CommandFailedError):
- self._read_str_xattr(
- self.fs.get_data_pool_name(),
- file_obj_name,
- "scrub_tag"
- )
-
- def assertTagged(self, ino, tag, pool):
- file_obj_name = "{0:x}.00000000".format(ino)
- wrote = self._read_str_xattr(
- pool,
- file_obj_name,
- "scrub_tag"
- )
- self.assertEqual(wrote, tag)
-
- def _validate_linkage(self, expected):
- inos = self._get_paths_to_ino()
- try:
- self.assertDictEqual(inos, expected)
- except AssertionError:
- log.error("Expected: {0}".format(json.dumps(expected, indent=2)))
- log.error("Actual: {0}".format(json.dumps(inos, indent=2)))
- raise
-
- def test_orphan_scan(self):
- # Create some files whose metadata we will flush
- self.mount_a.run_python(dedent("""
- import os
- mount_point = "{mount_point}"
- parent = os.path.join(mount_point, "parent")
- os.mkdir(parent)
- flushed = os.path.join(parent, "flushed")
- os.mkdir(flushed)
- for f in ["alpha", "bravo", "charlie"]:
- open(os.path.join(flushed, f), 'w').write(f)
- """.format(mount_point=self.mount_a.mountpoint)))
-
- inos = self._get_paths_to_ino()
-
- # Flush journal
- # Umount before flush to avoid cap releases putting
- # things we don't want in the journal later.
- self.mount_a.umount_wait()
- self.fs.mds_asok(["flush", "journal"])
-
- # Create a new inode that's just in the log, i.e. would
- # look orphaned to backward scan if backward scan wisnae
- # respectin' tha scrub_tag xattr.
- self.mount_a.mount()
- self.mount_a.run_shell(["mkdir", "parent/unflushed"])
- self.mount_a.run_shell(["dd", "if=/dev/urandom",
- "of=./parent/unflushed/jfile",
- "bs=1M", "count=8"])
- inos["./parent/unflushed"] = self.mount_a.path_to_ino("./parent/unflushed")
- inos["./parent/unflushed/jfile"] = self.mount_a.path_to_ino("./parent/unflushed/jfile")
- self.mount_a.umount_wait()
-
- # Orphan an inode by deleting its dentry
- # Our victim will be.... bravo.
- self.mount_a.umount_wait()
- self.fs.mds_stop()
- self.fs.mds_fail()
- self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
- self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
- frag_obj_id = "{0:x}.00000000".format(inos["./parent/flushed"])
- self.fs.rados(["rmomapkey", frag_obj_id, "bravo_head"])
-
- self.fs.mds_restart()
- self.fs.wait_for_daemons()
-
- # See that the orphaned file is indeed missing from a client's POV
- self.mount_a.mount()
- damaged_state = self._get_paths_to_ino()
- self.assertNotIn("./parent/flushed/bravo", damaged_state)
- self.mount_a.umount_wait()
-
- # Run a tagging forward scrub
- tag = "mytag123"
- self.fs.mds_asok(["tag", "path", "/parent", tag])
-
- # See that the orphan wisnae tagged
- self.assertUntagged(inos['./parent/flushed/bravo'])
-
- # See that the flushed-metadata-and-still-present files are tagged
- self.assertTagged(inos['./parent/flushed/alpha'], tag, self.fs.get_data_pool_name())
- self.assertTagged(inos['./parent/flushed/charlie'], tag, self.fs.get_data_pool_name())
-
- # See that journalled-but-not-flushed file *was* tagged
- self.assertTagged(inos['./parent/unflushed/jfile'], tag, self.fs.get_data_pool_name())
-
- # Run cephfs-data-scan targeting only orphans
- self.fs.mds_stop()
- self.fs.mds_fail()
- self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()])
- self.fs.data_scan([
- "scan_inodes",
- "--filter-tag", tag,
- self.fs.get_data_pool_name()
- ])
-
- # After in-place injection stats should be kosher again
- self.fs.set_ceph_conf('mds', 'mds verify scatter', True)
- self.fs.set_ceph_conf('mds', 'mds debug scatterstat', True)
-
- # And we should have all the same linkage we started with,
- # and no lost+found, and no extra inodes!
- self.fs.mds_restart()
- self.fs.wait_for_daemons()
- self.mount_a.mount()
- self._validate_linkage(inos)
-
- def _stash_inotable(self):
- # Get all active ranks
- ranks = self.fs.get_all_mds_rank()
-
- inotable_dict = {}
- for rank in ranks:
- inotable_oid = "mds{rank:d}_".format(rank=rank) + "inotable"
- print "Trying to fetch inotable object: " + inotable_oid
-
- #self.fs.get_metadata_object("InoTable", "mds0_inotable")
- inotable_raw = self.fs.get_metadata_object_raw(inotable_oid)
- inotable_dict[inotable_oid] = inotable_raw
- return inotable_dict
-
- def test_inotable_sync(self):
- self.mount_a.write_n_mb("file1_sixmegs", 6)
-
- # Flush journal
- self.mount_a.umount_wait()
- self.fs.mds_asok(["flush", "journal"])
-
- inotable_copy = self._stash_inotable()
-
- self.mount_a.mount()
-
- self.mount_a.write_n_mb("file2_sixmegs", 6)
- self.mount_a.write_n_mb("file3_sixmegs", 6)
-
- inos = self._get_paths_to_ino()
-
- # Flush journal
- self.mount_a.umount_wait()
- self.fs.mds_asok(["flush", "journal"])
-
- self.mount_a.umount_wait()
-
- with self.assert_cluster_log("inode table repaired", invert_match=True):
- self.fs.mds_asok(["scrub_path", "/", "repair", "recursive"])
-
- self.mds_cluster.mds_stop()
- self.mds_cluster.mds_fail()
-
- # Truncate the journal (to ensure the inotable on disk
- # is all that will be in the InoTable in memory)
-
- self.fs.journal_tool(["event", "splice",
- "--inode={0}".format(inos["./file2_sixmegs"]), "summary"])
-
- self.fs.journal_tool(["event", "splice",
- "--inode={0}".format(inos["./file3_sixmegs"]), "summary"])
-
- # Revert to old inotable.
- for key, value in inotable_copy.iteritems():
- self.fs.put_metadata_object_raw(key, value)
-
- self.mds_cluster.mds_restart()
- self.fs.wait_for_daemons()
-
- with self.assert_cluster_log("inode table repaired"):
- self.fs.mds_asok(["scrub_path", "/", "repair", "recursive"])
-
- self.mds_cluster.mds_stop()
- table_text = self.fs.table_tool(["0", "show", "inode"])
- table = json.loads(table_text)
- self.assertGreater(
- table['0']['data']['inotable']['free'][0]['start'],
- inos['./file3_sixmegs'])
-
- def test_backtrace_repair(self):
- """
- That the MDS can repair an inodes backtrace in the data pool
- if it is found to be damaged.
- """
- # Create a file for subsequent checks
- self.mount_a.run_shell(["mkdir", "parent_a"])
- self.mount_a.run_shell(["touch", "parent_a/alpha"])
- file_ino = self.mount_a.path_to_ino("parent_a/alpha")
-
- # That backtrace and layout are written after initial flush
- self.fs.mds_asok(["flush", "journal"])
- backtrace = self.fs.read_backtrace(file_ino)
- self.assertEqual(['alpha', 'parent_a'],
- [a['dname'] for a in backtrace['ancestors']])
-
- # Go corrupt the backtrace
- self.fs._write_data_xattr(file_ino, "parent",
- "oh i'm sorry did i overwrite your xattr?")
-
- with self.assert_cluster_log("bad backtrace on inode"):
- self.fs.mds_asok(["scrub_path", "/", "repair", "recursive"])
- self.fs.mds_asok(["flush", "journal"])
- backtrace = self.fs.read_backtrace(file_ino)
- self.assertEqual(['alpha', 'parent_a'],
- [a['dname'] for a in backtrace['ancestors']])
diff --git a/src/ceph/qa/tasks/cephfs/test_fragment.py b/src/ceph/qa/tasks/cephfs/test_fragment.py
deleted file mode 100644
index a62ef74..0000000
--- a/src/ceph/qa/tasks/cephfs/test_fragment.py
+++ /dev/null
@@ -1,232 +0,0 @@
-
-
-from tasks.cephfs.cephfs_test_case import CephFSTestCase
-from teuthology.orchestra import run
-
-import logging
-log = logging.getLogger(__name__)
-
-
-class TestFragmentation(CephFSTestCase):
- CLIENTS_REQUIRED = 1
- MDSS_REQUIRED = 1
-
- def get_splits(self):
- return self.fs.mds_asok(['perf', 'dump', 'mds'])['mds']['dir_split']
-
- def get_merges(self):
- return self.fs.mds_asok(['perf', 'dump', 'mds'])['mds']['dir_merge']
-
- def get_dir_ino(self, path):
- dir_cache = self.fs.read_cache(path, 0)
- dir_ino = None
- dir_inono = self.mount_a.path_to_ino(path.strip("/"))
- for ino in dir_cache:
- if ino['ino'] == dir_inono:
- dir_ino = ino
- break
- self.assertIsNotNone(dir_ino)
- return dir_ino
-
- def _configure(self, **kwargs):
- """
- Apply kwargs as MDS configuration settings, enable dirfrags
- and restart the MDSs.
- """
- kwargs['mds_bal_frag'] = "true"
-
- for k, v in kwargs.items():
- self.ceph_cluster.set_ceph_conf("mds", k, v.__str__())
-
- self.fs.set_allow_dirfrags(True)
-
- self.mds_cluster.mds_fail_restart()
- self.fs.wait_for_daemons()
-
- def test_oversize(self):
- """
- That a directory is split when it becomes too large.
- """
-
- split_size = 20
- merge_size = 5
-
- self._configure(
- mds_bal_split_size=split_size,
- mds_bal_merge_size=merge_size,
- mds_bal_split_bits=1
- )
-
- self.assertEqual(self.get_splits(), 0)
-
- self.mount_a.create_n_files("splitdir/file", split_size + 1)
-
- self.wait_until_true(
- lambda: self.get_splits() == 1,
- timeout=30
- )
-
- frags = self.get_dir_ino("/splitdir")['dirfrags']
- self.assertEqual(len(frags), 2)
- self.assertEqual(frags[0]['dirfrag'], "0x10000000000.0*")
- self.assertEqual(frags[1]['dirfrag'], "0x10000000000.1*")
- self.assertEqual(
- sum([len(f['dentries']) for f in frags]),
- split_size + 1
- )
-
- self.assertEqual(self.get_merges(), 0)
-
- self.mount_a.run_shell(["rm", "-f", run.Raw("splitdir/file*")])
-
- self.wait_until_true(
- lambda: self.get_merges() == 1,
- timeout=30
- )
-
- self.assertEqual(len(self.get_dir_ino("/splitdir")["dirfrags"]), 1)
-
- def test_rapid_creation(self):
- """
- That the fast-splitting limit of 1.5x normal limit is
- applied when creating dentries quickly.
- """
-
- split_size = 100
- merge_size = 1
-
- self._configure(
- mds_bal_split_size=split_size,
- mds_bal_merge_size=merge_size,
- mds_bal_split_bits=3,
- mds_bal_fragment_size_max=int(split_size * 1.5 + 2)
- )
-
- # We test this only at a single split level. If a client was sending
- # IO so fast that it hit a second split before the first split
- # was complete, it could violate mds_bal_fragment_size_max -- there
- # is a window where the child dirfrags of a split are unfrozen
- # (so they can grow), but still have STATE_FRAGMENTING (so they
- # can't be split).
-
- # By writing 4x the split size when the split bits are set
- # to 3 (i.e. 4-ways), I am reasonably sure to see precisely
- # one split. The test is to check whether that split
- # happens soon enough that the client doesn't exceed
- # 2x the split_size (the "immediate" split mode should
- # kick in at 1.5x the split size).
-
- self.assertEqual(self.get_splits(), 0)
- self.mount_a.create_n_files("splitdir/file", split_size * 4)
- self.wait_until_equal(
- self.get_splits,
- 1,
- reject_fn=lambda s: s > 1,
- timeout=30
- )
-
- def test_deep_split(self):
- """
- That when the directory grows many times larger than split size,
- the fragments get split again.
- """
-
- split_size = 100
- merge_size = 1 # i.e. don't merge frag unless its empty
- split_bits = 1
-
- branch_factor = 2**split_bits
-
- # Arbitrary: how many levels shall we try fragmenting before
- # ending the test?
- max_depth = 5
-
- self._configure(
- mds_bal_split_size=split_size,
- mds_bal_merge_size=merge_size,
- mds_bal_split_bits=split_bits
- )
-
- # Each iteration we will create another level of fragments. The
- # placement of dentries into fragments is by hashes (i.e. pseudo
- # random), so we rely on statistics to get the behaviour that
- # by writing about 1.5x as many dentries as the split_size times
- # the number of frags, we will get them all to exceed their
- # split size and trigger a split.
- depth = 0
- files_written = 0
- splits_expected = 0
- while depth < max_depth:
- log.info("Writing files for depth {0}".format(depth))
- target_files = branch_factor**depth * int(split_size * 1.5)
- create_files = target_files - files_written
-
- self.ceph_cluster.mon_manager.raw_cluster_cmd("log",
- "{0} Writing {1} files (depth={2})".format(
- self.__class__.__name__, create_files, depth
- ))
- self.mount_a.create_n_files("splitdir/file_{0}".format(depth),
- create_files)
- self.ceph_cluster.mon_manager.raw_cluster_cmd("log",
- "{0} Done".format(self.__class__.__name__))
-
- files_written += create_files
- log.info("Now have {0} files".format(files_written))
-
- splits_expected += branch_factor**depth
- log.info("Waiting to see {0} splits".format(splits_expected))
- try:
- self.wait_until_equal(
- self.get_splits,
- splits_expected,
- timeout=30,
- reject_fn=lambda x: x > splits_expected
- )
-
- frags = self.get_dir_ino("/splitdir")['dirfrags']
- self.assertEqual(len(frags), branch_factor**(depth+1))
- self.assertEqual(
- sum([len(f['dentries']) for f in frags]),
- target_files
- )
- except:
- # On failures, log what fragmentation we actually ended
- # up with. This block is just for logging, at the end
- # we raise the exception again.
- frags = self.get_dir_ino("/splitdir")['dirfrags']
- log.info("depth={0} splits_expected={1} files_written={2}".format(
- depth, splits_expected, files_written
- ))
- log.info("Dirfrags:")
- for f in frags:
- log.info("{0}: {1}".format(
- f['dirfrag'], len(f['dentries'])
- ))
- raise
-
- depth += 1
-
- # Remember the inode number because we will be checking for
- # objects later.
- dir_inode_no = self.mount_a.path_to_ino("splitdir")
-
- self.mount_a.run_shell(["rm", "-rf", "splitdir/"])
- self.mount_a.umount_wait()
-
- self.fs.mds_asok(['flush', 'journal'])
-
- # Wait for all strays to purge
- self.wait_until_equal(
- lambda: self.fs.mds_asok(['perf', 'dump', 'mds_cache']
- )['mds_cache']['num_strays'],
- 0,
- timeout=1200
- )
- # Check that the metadata pool objects for all the myriad
- # child fragments are gone
- metadata_objs = self.fs.rados(["ls"])
- frag_objs = []
- for o in metadata_objs:
- if o.startswith("{0:x}.".format(dir_inode_no)):
- frag_objs.append(o)
- self.assertListEqual(frag_objs, [])
diff --git a/src/ceph/qa/tasks/cephfs/test_full.py b/src/ceph/qa/tasks/cephfs/test_full.py
deleted file mode 100644
index e69ccb3..0000000
--- a/src/ceph/qa/tasks/cephfs/test_full.py
+++ /dev/null
@@ -1,414 +0,0 @@
-
-
-import json
-import logging
-import os
-from textwrap import dedent
-import time
-from teuthology.orchestra.run import CommandFailedError
-from tasks.cephfs.fuse_mount import FuseMount
-from tasks.cephfs.cephfs_test_case import CephFSTestCase
-
-
-log = logging.getLogger(__name__)
-
-
-class FullnessTestCase(CephFSTestCase):
- CLIENTS_REQUIRED = 2
-
- # Subclasses define whether they're filling whole cluster or just data pool
- data_only = False
-
- # Subclasses define how many bytes should be written to achieve fullness
- pool_capacity = None
- fill_mb = None
-
- # Subclasses define what fullness means to them
- def is_full(self):
- raise NotImplementedError()
-
- def setUp(self):
- CephFSTestCase.setUp(self)
-
- # These tests just use a single active MDS throughout, so remember its ID
- # for use in mds_asok calls
- self.active_mds_id = self.fs.get_active_names()[0]
-
- # Capture the initial OSD map epoch for later use
- self.initial_osd_epoch = json.loads(
- self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json").strip()
- )['epoch']
-
- # Check the initial barrier epoch on the MDS: this should be
- # set to the latest map at MDS startup. We do this check in
- # setUp to get in there before subclasses might touch things
- # in their own setUp functions.
- self.assertGreaterEqual(self.fs.mds_asok(["status"], mds_id=self.active_mds_id)['osdmap_epoch_barrier'],
- self.initial_osd_epoch)
-
- def test_barrier(self):
- """
- That when an OSD epoch barrier is set on an MDS, subsequently
- issued capabilities cause clients to update their OSD map to that
- epoch.
- """
-
- # Sync up clients with initial MDS OSD map barrier
- self.mount_a.open_no_data("foo")
- self.mount_b.open_no_data("bar")
-
- # Grab mounts' initial OSD epochs: later we will check that
- # it hasn't advanced beyond this point.
- mount_a_initial_epoch = self.mount_a.get_osd_epoch()[0]
- mount_b_initial_epoch = self.mount_b.get_osd_epoch()[0]
-
- # Freshly mounted at start of test, should be up to date with OSD map
- self.assertGreaterEqual(mount_a_initial_epoch, self.initial_osd_epoch)
- self.assertGreaterEqual(mount_b_initial_epoch, self.initial_osd_epoch)
-
- # Set and unset a flag to cause OSD epoch to increment
- self.fs.mon_manager.raw_cluster_cmd("osd", "set", "pause")
- self.fs.mon_manager.raw_cluster_cmd("osd", "unset", "pause")
-
- out = self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json").strip()
- new_epoch = json.loads(out)['epoch']
- self.assertNotEqual(self.initial_osd_epoch, new_epoch)
-
- # Do a metadata operation on clients, witness that they end up with
- # the old OSD map from startup time (nothing has prompted client
- # to update its map)
- self.mount_a.open_no_data("alpha")
- self.mount_b.open_no_data("bravo1")
-
- # Sleep long enough that if the OSD map was propagating it would
- # have done so (this is arbitrary because we are 'waiting' for something
- # to *not* happen).
- time.sleep(30)
-
- mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch()
- self.assertEqual(mount_a_epoch, mount_a_initial_epoch)
- mount_b_epoch, mount_b_barrier = self.mount_b.get_osd_epoch()
- self.assertEqual(mount_b_epoch, mount_b_initial_epoch)
-
- # Set a barrier on the MDS
- self.fs.mds_asok(["osdmap", "barrier", new_epoch.__str__()], mds_id=self.active_mds_id)
-
- # Do an operation on client B, witness that it ends up with
- # the latest OSD map from the barrier. This shouldn't generate any
- # cap revokes to A because B was already the last one to touch
- # a file in root.
- self.mount_b.run_shell(["touch", "bravo2"])
- self.mount_b.open_no_data("bravo2")
-
- # Some time passes here because the metadata part of the operation
- # completes immediately, while the resulting OSD map update happens
- # asynchronously (it's an Objecter::_maybe_request_map) as a result
- # of seeing the new epoch barrier.
- self.wait_until_equal(
- lambda: self.mount_b.get_osd_epoch(),
- (new_epoch, new_epoch),
- 30,
- lambda x: x[0] > new_epoch or x[1] > new_epoch)
-
- # ...and none of this should have affected the oblivious mount a,
- # because it wasn't doing any data or metadata IO
- mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch()
- self.assertEqual(mount_a_epoch, mount_a_initial_epoch)
-
- def _data_pool_name(self):
- data_pool_names = self.fs.get_data_pool_names()
- if len(data_pool_names) > 1:
- raise RuntimeError("This test can't handle multiple data pools")
- else:
- return data_pool_names[0]
-
- def _test_full(self, easy_case):
- """
- - That a client trying to write data to a file is prevented
- from doing so with an -EFULL result
- - That they are also prevented from creating new files by the MDS.
- - That they may delete another file to get the system healthy again
-
- :param easy_case: if true, delete a successfully written file to
- free up space. else, delete the file that experienced
- the failed write.
- """
-
- osd_mon_report_interval_max = int(self.fs.get_config("osd_mon_report_interval_max", service_type='osd'))
-
- log.info("Writing {0}MB should fill this cluster".format(self.fill_mb))
-
- # Fill up the cluster. This dd may or may not fail, as it depends on
- # how soon the cluster recognises its own fullness
- self.mount_a.write_n_mb("large_file_a", self.fill_mb / 2)
- try:
- self.mount_a.write_n_mb("large_file_b", self.fill_mb / 2)
- except CommandFailedError:
- log.info("Writing file B failed (full status happened already)")
- assert self.is_full()
- else:
- log.info("Writing file B succeeded (full status will happen soon)")
- self.wait_until_true(lambda: self.is_full(),
- timeout=osd_mon_report_interval_max * 5)
-
- # Attempting to write more data should give me ENOSPC
- with self.assertRaises(CommandFailedError) as ar:
- self.mount_a.write_n_mb("large_file_b", 50, seek=self.fill_mb / 2)
- self.assertEqual(ar.exception.exitstatus, 1) # dd returns 1 on "No space"
-
- # Wait for the MDS to see the latest OSD map so that it will reliably
- # be applying the policy of rejecting non-deletion metadata operations
- # while in the full state.
- osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch']
- self.wait_until_true(
- lambda: self.fs.mds_asok(['status'], mds_id=self.active_mds_id)['osdmap_epoch'] >= osd_epoch,
- timeout=10)
-
- if not self.data_only:
- with self.assertRaises(CommandFailedError):
- self.mount_a.write_n_mb("small_file_1", 0)
-
- # Clear out some space
- if easy_case:
- self.mount_a.run_shell(['rm', '-f', 'large_file_a'])
- self.mount_a.run_shell(['rm', '-f', 'large_file_b'])
- else:
- # In the hard case it is the file that filled the system.
- # Before the new #7317 (ENOSPC, epoch barrier) changes, this
- # would fail because the last objects written would be
- # stuck in the client cache as objecter operations.
- self.mount_a.run_shell(['rm', '-f', 'large_file_b'])
- self.mount_a.run_shell(['rm', '-f', 'large_file_a'])
-
- # Here we are waiting for two things to happen:
- # * The MDS to purge the stray folder and execute object deletions
- # * The OSDs to inform the mon that they are no longer full
- self.wait_until_true(lambda: not self.is_full(),
- timeout=osd_mon_report_interval_max * 5)
-
- # Wait for the MDS to see the latest OSD map so that it will reliably
- # be applying the free space policy
- osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch']
- self.wait_until_true(
- lambda: self.fs.mds_asok(['status'], mds_id=self.active_mds_id)['osdmap_epoch'] >= osd_epoch,
- timeout=10)
-
- # Now I should be able to write again
- self.mount_a.write_n_mb("large_file", 50, seek=0)
-
- # Ensure that the MDS keeps its OSD epoch barrier across a restart
-
- def test_full_different_file(self):
- self._test_full(True)
-
- def test_full_same_file(self):
- self._test_full(False)
-
- def _remote_write_test(self, template):
- """
- Run some remote python in a way that's useful for
- testing free space behaviour (see test_* methods using this)
- """
- file_path = os.path.join(self.mount_a.mountpoint, "full_test_file")
-
- # Enough to trip the full flag
- osd_mon_report_interval_max = int(self.fs.get_config("osd_mon_report_interval_max", service_type='osd'))
- mon_tick_interval = int(self.fs.get_config("mon_tick_interval", service_type="mon"))
-
- # Sufficient data to cause RADOS cluster to go 'full'
- log.info("pool capacity {0}, {1}MB should be enough to fill it".format(self.pool_capacity, self.fill_mb))
-
- # Long enough for RADOS cluster to notice it is full and set flag on mons
- # (report_interval for mon to learn PG stats, tick interval for it to update OSD map,
- # factor of 1.5 for I/O + network latency in committing OSD map and distributing it
- # to the OSDs)
- full_wait = (osd_mon_report_interval_max + mon_tick_interval) * 1.5
-
- # Configs for this test should bring this setting down in order to
- # run reasonably quickly
- if osd_mon_report_interval_max > 10:
- log.warn("This test may run rather slowly unless you decrease"
- "osd_mon_report_interval_max (5 is a good setting)!")
-
- self.mount_a.run_python(template.format(
- fill_mb=self.fill_mb,
- file_path=file_path,
- full_wait=full_wait,
- is_fuse=isinstance(self.mount_a, FuseMount)
- ))
-
- def test_full_fclose(self):
- # A remote script which opens a file handle, fills up the filesystem, and then
- # checks that ENOSPC errors on buffered writes appear correctly as errors in fsync
- remote_script = dedent("""
- import time
- import datetime
- import subprocess
- import os
-
- # Write some buffered data through before going full, all should be well
- print "writing some data through which we expect to succeed"
- bytes = 0
- f = os.open("{file_path}", os.O_WRONLY | os.O_CREAT)
- bytes += os.write(f, 'a' * 4096)
- os.fsync(f)
- print "fsync'ed data successfully, will now attempt to fill fs"
-
- # Okay, now we're going to fill up the filesystem, and then keep
- # writing until we see an error from fsync. As long as we're doing
- # buffered IO, the error should always only appear from fsync and not
- # from write
- full = False
-
- for n in range(0, {fill_mb}):
- bytes += os.write(f, 'x' * 1024 * 1024)
- print "wrote bytes via buffered write, may repeat"
- print "done writing bytes"
-
- # OK, now we should sneak in under the full condition
- # due to the time it takes the OSDs to report to the
- # mons, and get a successful fsync on our full-making data
- os.fsync(f)
- print "successfully fsync'ed prior to getting full state reported"
-
- # Now wait for the full flag to get set so that our
- # next flush IO will fail
- time.sleep(30)
-
- # A buffered IO, should succeed
- print "starting buffered write we expect to succeed"
- os.write(f, 'x' * 4096)
- print "wrote, now waiting 30s and then doing a close we expect to fail"
-
- # Wait long enough for a background flush that should fail
- time.sleep(30)
-
- if {is_fuse}:
- # ...and check that the failed background flush is reflected in fclose
- try:
- os.close(f)
- except OSError:
- print "close() returned an error as expected"
- else:
- raise RuntimeError("close() failed to raise error")
- else:
- # The kernel cephfs client does not raise errors on fclose
- os.close(f)
-
- os.unlink("{file_path}")
- """)
- self._remote_write_test(remote_script)
-
- def test_full_fsync(self):
- """
- That when the full flag is encountered during asynchronous
- flushes, such that an fwrite() succeeds but an fsync/fclose()
- should return the ENOSPC error.
- """
-
- # A remote script which opens a file handle, fills up the filesystem, and then
- # checks that ENOSPC errors on buffered writes appear correctly as errors in fsync
- remote_script = dedent("""
- import time
- import datetime
- import subprocess
- import os
-
- # Write some buffered data through before going full, all should be well
- print "writing some data through which we expect to succeed"
- bytes = 0
- f = os.open("{file_path}", os.O_WRONLY | os.O_CREAT)
- bytes += os.write(f, 'a' * 4096)
- os.fsync(f)
- print "fsync'ed data successfully, will now attempt to fill fs"
-
- # Okay, now we're going to fill up the filesystem, and then keep
- # writing until we see an error from fsync. As long as we're doing
- # buffered IO, the error should always only appear from fsync and not
- # from write
- full = False
-
- for n in range(0, {fill_mb} + 1):
- try:
- bytes += os.write(f, 'x' * 1024 * 1024)
- print "wrote bytes via buffered write, moving on to fsync"
- except OSError as e:
- print "Unexpected error %s from write() instead of fsync()" % e
- raise
-
- try:
- os.fsync(f)
- print "fsync'ed successfully"
- except OSError as e:
- print "Reached fullness after %.2f MB" % (bytes / (1024.0 * 1024.0))
- full = True
- break
- else:
- print "Not full yet after %.2f MB" % (bytes / (1024.0 * 1024.0))
-
- if n > {fill_mb} * 0.8:
- # Be cautious in the last region where we expect to hit
- # the full condition, so that we don't overshoot too dramatically
- print "sleeping a bit as we've exceeded 80% of our expected full ratio"
- time.sleep({full_wait})
-
- if not full:
- raise RuntimeError("Failed to reach fullness after writing %d bytes" % bytes)
-
- # close() should not raise an error because we already caught it in
- # fsync. There shouldn't have been any more writeback errors
- # since then because all IOs got cancelled on the full flag.
- print "calling close"
- os.close(f)
- print "close() did not raise error"
-
- os.unlink("{file_path}")
- """)
-
- self._remote_write_test(remote_script)
-
-
-class TestQuotaFull(FullnessTestCase):
- """
- Test per-pool fullness, which indicates quota limits exceeded
- """
- pool_capacity = 1024 * 1024 * 32 # arbitrary low-ish limit
- fill_mb = pool_capacity / (1024 * 1024)
-
- # We are only testing quota handling on the data pool, not the metadata
- # pool.
- data_only = True
-
- def setUp(self):
- super(TestQuotaFull, self).setUp()
-
- pool_name = self.fs.get_data_pool_name()
- self.fs.mon_manager.raw_cluster_cmd("osd", "pool", "set-quota", pool_name,
- "max_bytes", "{0}".format(self.pool_capacity))
-
- def is_full(self):
- return self.fs.is_pool_full(self.fs.get_data_pool_name())
-
-
-class TestClusterFull(FullnessTestCase):
- """
- Test cluster-wide fullness, which indicates that an OSD has become too full
- """
- pool_capacity = None
- REQUIRE_MEMSTORE = True
-
- def setUp(self):
- super(TestClusterFull, self).setUp()
-
- if self.pool_capacity is None:
- # This is a hack to overcome weird fluctuations in the reported
- # `max_avail` attribute of pools that sometimes occurs in between
- # tests (reason as yet unclear, but this dodges the issue)
- TestClusterFull.pool_capacity = self.fs.get_pool_df(self._data_pool_name())['max_avail']
- TestClusterFull.fill_mb = int(1.05 * (self.pool_capacity / (1024.0 * 1024.0)))
-
- def is_full(self):
- return self.fs.is_full()
-
-# Hide the parent class so that unittest.loader doesn't try to run it.
-del globals()['FullnessTestCase']
diff --git a/src/ceph/qa/tasks/cephfs/test_journal_migration.py b/src/ceph/qa/tasks/cephfs/test_journal_migration.py
deleted file mode 100644
index 64fe939..0000000
--- a/src/ceph/qa/tasks/cephfs/test_journal_migration.py
+++ /dev/null
@@ -1,118 +0,0 @@
-
-from StringIO import StringIO
-from tasks.cephfs.cephfs_test_case import CephFSTestCase
-from tasks.workunit import task as workunit
-
-JOURNAL_FORMAT_LEGACY = 0
-JOURNAL_FORMAT_RESILIENT = 1
-
-
-class TestJournalMigration(CephFSTestCase):
- CLIENTS_REQUIRED = 1
- MDSS_REQUIRED = 2
-
- def test_journal_migration(self):
- old_journal_version = JOURNAL_FORMAT_LEGACY
- new_journal_version = JOURNAL_FORMAT_RESILIENT
-
- # Pick out two daemons to use
- mds_a, mds_b = sorted(self.mds_cluster.mds_ids[0:2])
-
- self.mount_a.umount_wait()
- self.fs.mds_stop()
-
- # Enable standby replay, to cover the bug case #8811 where
- # a standby replay might mistakenly end up trying to rewrite
- # the journal at the same time as an active daemon.
- self.fs.set_ceph_conf('mds', 'mds standby replay', "true")
- self.fs.set_ceph_conf('mds', 'mds standby for rank', "0")
-
- # Create a filesystem using the older journal format.
- self.fs.set_ceph_conf('mds', 'mds journal format', old_journal_version)
- self.fs.recreate()
- self.fs.mds_restart(mds_id=mds_a)
- self.fs.wait_for_daemons()
- self.assertEqual(self.fs.get_active_names(), [mds_a])
-
- def replay_names():
- return [s['name']
- for s in self.fs.status().get_replays(fscid = self.fs.id)]
-
- # Start the standby and wait for it to come up
- self.fs.mds_restart(mds_id=mds_b)
- self.wait_until_equal(
- replay_names,
- [mds_b],
- timeout = 30)
-
- # Do some client work so that the log is populated with something.
- with self.mount_a.mounted():
- self.mount_a.create_files()
- self.mount_a.check_files() # sanity, this should always pass
-
- # Run a more substantial workunit so that the length of the log to be
- # coverted is going span at least a few segments
- workunit(self.ctx, {
- 'clients': {
- "client.{0}".format(self.mount_a.client_id): ["suites/fsstress.sh"],
- },
- "timeout": "3h"
- })
-
- # Modify the ceph.conf to ask the MDS to use the new journal format.
- self.fs.set_ceph_conf('mds', 'mds journal format', new_journal_version)
-
- # Restart the MDS.
- self.fs.mds_fail_restart(mds_id=mds_a)
- self.fs.mds_fail_restart(mds_id=mds_b)
-
- # This ensures that all daemons come up into a valid state
- self.fs.wait_for_daemons()
-
- # Check that files created in the initial client workload are still visible
- # in a client mount.
- with self.mount_a.mounted():
- self.mount_a.check_files()
-
- # Verify that the journal really has been rewritten.
- journal_version = self.fs.get_journal_version()
- if journal_version != new_journal_version:
- raise RuntimeError("Journal was not upgraded, version should be {0} but is {1}".format(
- new_journal_version, journal_version()
- ))
-
- # Verify that cephfs-journal-tool can now read the rewritten journal
- inspect_out = self.fs.journal_tool(["journal", "inspect"])
- if not inspect_out.endswith(": OK"):
- raise RuntimeError("Unexpected journal-tool result: '{0}'".format(
- inspect_out
- ))
-
- self.fs.journal_tool(["event", "get", "json", "--path", "/tmp/journal.json"])
- p = self.fs.tool_remote.run(
- args=[
- "python",
- "-c",
- "import json; print len(json.load(open('/tmp/journal.json')))"
- ],
- stdout=StringIO())
- event_count = int(p.stdout.getvalue().strip())
- if event_count < 1000:
- # Approximate value of "lots", expected from having run fsstress
- raise RuntimeError("Unexpectedly few journal events: {0}".format(event_count))
-
- # Do some client work to check that writing the log is still working
- with self.mount_a.mounted():
- workunit(self.ctx, {
- 'clients': {
- "client.{0}".format(self.mount_a.client_id): ["fs/misc/trivial_sync.sh"],
- },
- "timeout": "3h"
- })
-
- # Check that both an active and a standby replay are still up
- self.assertEqual(len(replay_names()), 1)
- self.assertEqual(len(self.fs.get_active_names()), 1)
- self.assertTrue(self.mds_cluster.mds_daemons[mds_a].running())
- self.assertTrue(self.mds_cluster.mds_daemons[mds_b].running())
-
diff --git a/src/ceph/qa/tasks/cephfs/test_journal_repair.py b/src/ceph/qa/tasks/cephfs/test_journal_repair.py
deleted file mode 100644
index 62cbbb0..0000000
--- a/src/ceph/qa/tasks/cephfs/test_journal_repair.py
+++ /dev/null
@@ -1,443 +0,0 @@
-
-"""
-Test our tools for recovering the content of damaged journals
-"""
-
-import json
-import logging
-from textwrap import dedent
-import time
-
-from teuthology.exceptions import CommandFailedError, ConnectionLostError
-from tasks.cephfs.filesystem import ObjectNotFound, ROOT_INO
-from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
-from tasks.workunit import task as workunit
-
-log = logging.getLogger(__name__)
-
-
-class TestJournalRepair(CephFSTestCase):
- MDSS_REQUIRED = 2
-
- def test_inject_to_empty(self):
- """
- That when some dentries in the journal but nothing is in
- the backing store, we correctly populate the backing store
- from the journalled dentries.
- """
-
- # Inject metadata operations
- self.mount_a.run_shell(["touch", "rootfile"])
- self.mount_a.run_shell(["mkdir", "subdir"])
- self.mount_a.run_shell(["touch", "subdir/subdirfile"])
- # There are several different paths for handling hardlinks, depending
- # on whether an existing dentry (being overwritten) is also a hardlink
- self.mount_a.run_shell(["mkdir", "linkdir"])
-
- # Test inode -> remote transition for a dentry
- self.mount_a.run_shell(["touch", "linkdir/link0"])
- self.mount_a.run_shell(["rm", "-f", "linkdir/link0"])
- self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link0"])
-
- # Test nothing -> remote transition
- self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link1"])
-
- # Test remote -> inode transition
- self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link2"])
- self.mount_a.run_shell(["rm", "-f", "linkdir/link2"])
- self.mount_a.run_shell(["touch", "linkdir/link2"])
-
- # Test remote -> diff remote transition
- self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link3"])
- self.mount_a.run_shell(["rm", "-f", "linkdir/link3"])
- self.mount_a.run_shell(["ln", "rootfile", "linkdir/link3"])
-
- # Test an empty directory
- self.mount_a.run_shell(["mkdir", "subdir/subsubdir"])
- self.mount_a.run_shell(["sync"])
-
- # Before we unmount, make a note of the inode numbers, later we will
- # check that they match what we recover from the journal
- rootfile_ino = self.mount_a.path_to_ino("rootfile")
- subdir_ino = self.mount_a.path_to_ino("subdir")
- linkdir_ino = self.mount_a.path_to_ino("linkdir")
- subdirfile_ino = self.mount_a.path_to_ino("subdir/subdirfile")
- subsubdir_ino = self.mount_a.path_to_ino("subdir/subsubdir")
-
- self.mount_a.umount_wait()
-
- # Stop the MDS
- self.fs.mds_stop()
- self.fs.mds_fail()
-
- # Now, the journal should contain the operations, but the backing
- # store shouldn't
- with self.assertRaises(ObjectNotFound):
- self.fs.list_dirfrag(subdir_ino)
- self.assertEqual(self.fs.list_dirfrag(ROOT_INO), [])
-
- # Execute the dentry recovery, this should populate the backing store
- self.fs.journal_tool(['event', 'recover_dentries', 'list'])
-
- # Dentries in ROOT_INO are present
- self.assertEqual(sorted(self.fs.list_dirfrag(ROOT_INO)), sorted(['rootfile_head', 'subdir_head', 'linkdir_head']))
- self.assertEqual(self.fs.list_dirfrag(subdir_ino), ['subdirfile_head', 'subsubdir_head'])
- self.assertEqual(sorted(self.fs.list_dirfrag(linkdir_ino)),
- sorted(['link0_head', 'link1_head', 'link2_head', 'link3_head']))
-
- # Now check the MDS can read what we wrote: truncate the journal
- # and start the mds.
- self.fs.journal_tool(['journal', 'reset'])
- self.fs.mds_fail_restart()
- self.fs.wait_for_daemons()
-
- # List files
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
-
- # First ls -R to populate MDCache, such that hardlinks will
- # resolve properly (recover_dentries does not create backtraces,
- # so ordinarily hardlinks to inodes that happen not to have backtraces
- # will be invisible in readdir).
- # FIXME: hook in forward scrub here to regenerate backtraces
- proc = self.mount_a.run_shell(['ls', '-R'])
- self.mount_a.umount_wait() # remount to clear client cache before our second ls
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
-
- proc = self.mount_a.run_shell(['ls', '-R'])
- self.assertEqual(proc.stdout.getvalue().strip(),
- dedent("""
- .:
- linkdir
- rootfile
- subdir
-
- ./linkdir:
- link0
- link1
- link2
- link3
-
- ./subdir:
- subdirfile
- subsubdir
-
- ./subdir/subsubdir:
- """).strip())
-
- # Check the correct inos were preserved by path
- self.assertEqual(rootfile_ino, self.mount_a.path_to_ino("rootfile"))
- self.assertEqual(subdir_ino, self.mount_a.path_to_ino("subdir"))
- self.assertEqual(subdirfile_ino, self.mount_a.path_to_ino("subdir/subdirfile"))
- self.assertEqual(subsubdir_ino, self.mount_a.path_to_ino("subdir/subsubdir"))
-
- # Check that the hard link handling came out correctly
- self.assertEqual(self.mount_a.path_to_ino("linkdir/link0"), subdirfile_ino)
- self.assertEqual(self.mount_a.path_to_ino("linkdir/link1"), subdirfile_ino)
- self.assertNotEqual(self.mount_a.path_to_ino("linkdir/link2"), subdirfile_ino)
- self.assertEqual(self.mount_a.path_to_ino("linkdir/link3"), rootfile_ino)
-
- # Create a new file, ensure it is not issued the same ino as one of the
- # recovered ones
- self.mount_a.run_shell(["touch", "afterwards"])
- new_ino = self.mount_a.path_to_ino("afterwards")
- self.assertNotIn(new_ino, [rootfile_ino, subdir_ino, subdirfile_ino])
-
- # Check that we can do metadata ops in the recovered directory
- self.mount_a.run_shell(["touch", "subdir/subsubdir/subsubdirfile"])
-
- @for_teuthology # 308s
- def test_reset(self):
- """
- That after forcibly modifying the backing store, we can get back into
- a good state by resetting the MDSMap.
-
- The scenario is that we have two active MDSs, and we lose the journals. Once
- we have completely lost confidence in the integrity of the metadata, we want to
- return the system to a single-MDS state to go into a scrub to recover what we
- can.
- """
-
- # Set max_mds to 2
- self.fs.set_max_mds(2)
-
- # See that we have two active MDSs
- self.wait_until_equal(lambda: len(self.fs.get_active_names()), 2, 30,
- reject_fn=lambda v: v > 2 or v < 1)
- active_mds_names = self.fs.get_active_names()
-
- # Switch off any unneeded MDS daemons
- for unneeded_mds in set(self.mds_cluster.mds_ids) - set(active_mds_names):
- self.mds_cluster.mds_stop(unneeded_mds)
- self.mds_cluster.mds_fail(unneeded_mds)
-
- # Create a dir on each rank
- self.mount_a.run_shell(["mkdir", "alpha"])
- self.mount_a.run_shell(["mkdir", "bravo"])
- self.mount_a.setfattr("alpha/", "ceph.dir.pin", "0")
- self.mount_a.setfattr("bravo/", "ceph.dir.pin", "1")
-
- def subtrees_assigned():
- got_subtrees = self.fs.mds_asok(["get", "subtrees"], mds_id=active_mds_names[0])
-
- for s in got_subtrees:
- if s['dir']['path'] == '/bravo':
- if s['auth_first'] == 1:
- return True
- else:
- # Should not happen
- raise RuntimeError("/bravo is subtree but not rank 1!")
-
- return False
-
- # Ensure the pinning has taken effect and the /bravo dir is now
- # migrated to rank 1.
- self.wait_until_true(subtrees_assigned, 30)
-
- # Do some IO (this should be split across ranks according to
- # the rank-pinned dirs)
- self.mount_a.create_n_files("alpha/file", 1000)
- self.mount_a.create_n_files("bravo/file", 1000)
-
- # Flush the journals so that we have some backing store data
- # belonging to one MDS, and some to the other MDS.
- for mds_name in active_mds_names:
- self.fs.mds_asok(["flush", "journal"], mds_name)
-
- # Stop (hard) the second MDS daemon
- self.fs.mds_stop(active_mds_names[1])
-
- # Wipe out the tables for MDS rank 1 so that it is broken and can't start
- # (this is the simulated failure that we will demonstrate that the disaster
- # recovery tools can get us back from)
- self.fs.erase_metadata_objects(prefix="mds1_")
-
- # Try to access files from the client
- blocked_ls = self.mount_a.run_shell(["ls", "-R"], wait=False)
-
- # Check that this "ls -R" blocked rather than completing: indicates
- # it got stuck trying to access subtrees which were on the now-dead MDS.
- log.info("Sleeping to check ls is blocked...")
- time.sleep(60)
- self.assertFalse(blocked_ls.finished)
-
- # This mount is now useless because it will depend on MDS rank 1, and MDS rank 1
- # is not coming back. Kill it.
- log.info("Killing mount, it's blocked on the MDS we killed")
- self.mount_a.kill()
- self.mount_a.kill_cleanup()
- try:
- # Now that the mount is dead, the ls -R should error out.
- blocked_ls.wait()
- except (CommandFailedError, ConnectionLostError):
- # The ConnectionLostError case is for kernel client, where
- # killing the mount also means killing the node.
- pass
-
- # See that the second MDS will crash when it starts and tries to
- # acquire rank 1
- damaged_id = active_mds_names[1]
- self.fs.mds_restart(damaged_id)
-
- # The daemon taking the damaged rank should start starting, then
- # restart back into standby after asking the mon to mark the rank
- # damaged.
- def is_marked_damaged():
- mds_map = self.fs.get_mds_map()
- return 1 in mds_map['damaged']
-
- self.wait_until_true(is_marked_damaged, 60)
-
- def get_state():
- info = self.mds_cluster.get_mds_info(damaged_id)
- return info['state'] if info is not None else None
-
- self.wait_until_equal(
- get_state,
- "up:standby",
- timeout=60)
-
- self.fs.mds_stop(damaged_id)
- self.fs.mds_fail(damaged_id)
-
- # Now give up and go through a disaster recovery procedure
- self.fs.mds_stop(active_mds_names[0])
- self.fs.mds_fail(active_mds_names[0])
- # Invoke recover_dentries quietly, because otherwise log spews millions of lines
- self.fs.journal_tool(["event", "recover_dentries", "summary"], rank=0, quiet=True)
- self.fs.journal_tool(["event", "recover_dentries", "summary"], rank=1, quiet=True)
- self.fs.table_tool(["0", "reset", "session"])
- self.fs.journal_tool(["journal", "reset"], rank=0)
- self.fs.erase_mds_objects(1)
- self.fs.mon_manager.raw_cluster_cmd('fs', 'reset', self.fs.name,
- '--yes-i-really-mean-it')
-
- # Bring an MDS back online, mount a client, and see that we can walk the full
- # filesystem tree again
- self.fs.mds_fail_restart(active_mds_names[0])
- self.wait_until_equal(lambda: self.fs.get_active_names(), [active_mds_names[0]], 30,
- reject_fn=lambda v: len(v) > 1)
- self.mount_a.mount()
- self.mount_a.run_shell(["ls", "-R"], wait=True)
-
- def test_table_tool(self):
- active_mdss = self.fs.get_active_names()
- self.assertEqual(len(active_mdss), 1)
- mds_name = active_mdss[0]
-
- self.mount_a.run_shell(["touch", "foo"])
- self.fs.mds_asok(["flush", "journal"], mds_name)
-
- log.info(self.fs.table_tool(["all", "show", "inode"]))
- log.info(self.fs.table_tool(["all", "show", "snap"]))
- log.info(self.fs.table_tool(["all", "show", "session"]))
-
- # Inode table should always be the same because initial state
- # and choice of inode are deterministic.
- # Should see one inode consumed
- self.assertEqual(
- json.loads(self.fs.table_tool(["all", "show", "inode"])),
- {"0": {
- "data": {
- "version": 2,
- "inotable": {
- "projected_free": [
- {"start": 1099511628777,
- "len": 1099511626775}],
- "free": [
- {"start": 1099511628777,
- "len": 1099511626775}]}},
- "result": 0}}
-
- )
-
- # Should see one session
- session_data = json.loads(self.fs.table_tool(
- ["all", "show", "session"]))
- self.assertEqual(len(session_data["0"]["data"]["Sessions"]), 1)
- self.assertEqual(session_data["0"]["result"], 0)
-
- # Should see no snaps
- self.assertEqual(
- json.loads(self.fs.table_tool(["all", "show", "snap"])),
- {"version": 0,
- "snapserver": {"last_snap": 1,
- "pending_noop": [],
- "snaps": [],
- "need_to_purge": {},
- "pending_update": [],
- "pending_destroy": []},
- "result": 0}
- )
-
- # Reset everything
- for table in ["session", "inode", "snap"]:
- self.fs.table_tool(["all", "reset", table])
-
- log.info(self.fs.table_tool(["all", "show", "inode"]))
- log.info(self.fs.table_tool(["all", "show", "snap"]))
- log.info(self.fs.table_tool(["all", "show", "session"]))
-
- # Should see 0 sessions
- session_data = json.loads(self.fs.table_tool(
- ["all", "show", "session"]))
- self.assertEqual(len(session_data["0"]["data"]["Sessions"]), 0)
- self.assertEqual(session_data["0"]["result"], 0)
-
- # Should see entire inode range now marked free
- self.assertEqual(
- json.loads(self.fs.table_tool(["all", "show", "inode"])),
- {"0": {"data": {"version": 1,
- "inotable": {"projected_free": [
- {"start": 1099511627776,
- "len": 1099511627776}],
- "free": [
- {"start": 1099511627776,
- "len": 1099511627776}]}},
- "result": 0}}
- )
-
- # Should see no snaps
- self.assertEqual(
- json.loads(self.fs.table_tool(["all", "show", "snap"])),
- {"version": 1,
- "snapserver": {"last_snap": 1,
- "pending_noop": [],
- "snaps": [],
- "need_to_purge": {},
- "pending_update": [],
- "pending_destroy": []},
- "result": 0}
- )
-
- def test_table_tool_take_inos(self):
- initial_range_start = 1099511627776
- initial_range_len = 1099511627776
- # Initially a completely clear range
- self.assertEqual(
- json.loads(self.fs.table_tool(["all", "show", "inode"])),
- {"0": {"data": {"version": 0,
- "inotable": {"projected_free": [
- {"start": initial_range_start,
- "len": initial_range_len}],
- "free": [
- {"start": initial_range_start,
- "len": initial_range_len}]}},
- "result": 0}}
- )
-
- # Remove some
- self.assertEqual(
- json.loads(self.fs.table_tool(["all", "take_inos", "{0}".format(initial_range_start + 100)])),
- {"0": {"data": {"version": 1,
- "inotable": {"projected_free": [
- {"start": initial_range_start + 101,
- "len": initial_range_len - 101}],
- "free": [
- {"start": initial_range_start + 101,
- "len": initial_range_len - 101}]}},
- "result": 0}}
- )
-
- @for_teuthology # Hack: "for_teuthology" because .sh doesn't work outside teuth
- def test_journal_smoke(self):
- workunit(self.ctx, {
- 'clients': {
- "client.{0}".format(self.mount_a.client_id): [
- "fs/misc/trivial_sync.sh"],
- },
- "timeout": "1h"
- })
-
- for mount in self.mounts:
- mount.umount_wait()
-
- self.fs.mds_stop()
- self.fs.mds_fail()
-
- # journal tool smoke
- workunit(self.ctx, {
- 'clients': {
- "client.{0}".format(self.mount_a.client_id): [
- "suites/cephfs_journal_tool_smoke.sh"],
- },
- "timeout": "1h"
- })
-
-
-
- self.fs.mds_restart()
- self.fs.wait_for_daemons()
-
- self.mount_a.mount()
-
- # trivial sync moutn a
- workunit(self.ctx, {
- 'clients': {
- "client.{0}".format(self.mount_a.client_id): [
- "fs/misc/trivial_sync.sh"],
- },
- "timeout": "1h"
- })
-
diff --git a/src/ceph/qa/tasks/cephfs/test_mantle.py b/src/ceph/qa/tasks/cephfs/test_mantle.py
deleted file mode 100644
index 6cd86ad..0000000
--- a/src/ceph/qa/tasks/cephfs/test_mantle.py
+++ /dev/null
@@ -1,109 +0,0 @@
-from tasks.cephfs.cephfs_test_case import CephFSTestCase
-import json
-import logging
-
-log = logging.getLogger(__name__)
-failure = "using old balancer; mantle failed for balancer="
-success = "mantle balancer version changed: "
-
-class TestMantle(CephFSTestCase):
- def start_mantle(self):
- self.wait_for_health_clear(timeout=30)
- self.fs.set_max_mds(2)
- self.wait_until_equal(lambda: len(self.fs.get_active_names()), 2, 30,
- reject_fn=lambda v: v > 2 or v < 1)
-
- for m in self.fs.get_active_names():
- self.fs.mds_asok(['config', 'set', 'debug_objecter', '20'], mds_id=m)
- self.fs.mds_asok(['config', 'set', 'debug_ms', '0'], mds_id=m)
- self.fs.mds_asok(['config', 'set', 'debug_mds', '0'], mds_id=m)
- self.fs.mds_asok(['config', 'set', 'debug_mds_balancer', '5'], mds_id=m)
-
- def push_balancer(self, obj, lua_code, expect):
- self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', obj)
- self.fs.rados(["put", obj, "-"], stdin_data=lua_code)
- with self.assert_cluster_log(failure + obj + " " + expect):
- log.info("run a " + obj + " balancer that expects=" + expect)
-
- def test_version_empty(self):
- self.start_mantle()
- expect = " : (2) No such file or directory"
-
- ret = self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer')
- assert(ret == 22) # EINVAL
-
- self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', " ")
- with self.assert_cluster_log(failure + " " + expect): pass
-
- def test_version_not_in_rados(self):
- self.start_mantle()
- expect = failure + "ghost.lua : (2) No such file or directory"
- self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "ghost.lua")
- with self.assert_cluster_log(expect): pass
-
- def test_balancer_invalid(self):
- self.start_mantle()
- expect = ": (22) Invalid argument"
-
- lua_code = "this is invalid lua code!"
- self.push_balancer("invalid.lua", lua_code, expect)
-
- lua_code = "BAL_LOG()"
- self.push_balancer("invalid_log.lua", lua_code, expect)
-
- lua_code = "BAL_LOG(0)"
- self.push_balancer("invalid_log_again.lua", lua_code, expect)
-
- def test_balancer_valid(self):
- self.start_mantle()
- lua_code = "BAL_LOG(0, \"test\")\nreturn {3, 4}"
- self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "valid.lua")
- self.fs.rados(["put", "valid.lua", "-"], stdin_data=lua_code)
- with self.assert_cluster_log(success + "valid.lua"):
- log.info("run a valid.lua balancer")
-
- def test_return_invalid(self):
- self.start_mantle()
- expect = ": (22) Invalid argument"
-
- lua_code = "return \"hello\""
- self.push_balancer("string.lua", lua_code, expect)
-
- lua_code = "return 3"
- self.push_balancer("number.lua", lua_code, expect)
-
- lua_code = "return {}"
- self.push_balancer("dict_empty.lua", lua_code, expect)
-
- lua_code = "return {\"this\", \"is\", \"a\", \"test\"}"
- self.push_balancer("dict_of_strings.lua", lua_code, expect)
-
- lua_code = "return {3, \"test\"}"
- self.push_balancer("dict_of_mixed.lua", lua_code, expect)
-
- lua_code = "return {3}"
- self.push_balancer("not_enough_numbers.lua", lua_code, expect)
-
- lua_code = "return {3, 4, 5, 6, 7, 8, 9}"
- self.push_balancer("too_many_numbers.lua", lua_code, expect)
-
- def test_dead_osd(self):
- self.start_mantle()
- expect = " : (110) Connection timed out"
-
- # kill the OSDs so that the balancer pull from RADOS times out
- osd_map = json.loads(self.fs.mon_manager.raw_cluster_cmd('osd', 'dump', '--format=json-pretty'))
- for i in range(0, len(osd_map['osds'])):
- self.fs.mon_manager.raw_cluster_cmd_result('osd', 'down', str(i))
- self.fs.mon_manager.raw_cluster_cmd_result('osd', 'out', str(i))
-
- # trigger a pull from RADOS
- self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "valid.lua")
-
- # make the timeout a little longer since dead OSDs spam ceph -w
- with self.assert_cluster_log(failure + "valid.lua" + expect, timeout=30):
- log.info("run a balancer that should timeout")
-
- # cleanup
- for i in range(0, len(osd_map['osds'])):
- self.fs.mon_manager.raw_cluster_cmd_result('osd', 'in', str(i))
diff --git a/src/ceph/qa/tasks/cephfs/test_misc.py b/src/ceph/qa/tasks/cephfs/test_misc.py
deleted file mode 100644
index d857cfd..0000000
--- a/src/ceph/qa/tasks/cephfs/test_misc.py
+++ /dev/null
@@ -1,149 +0,0 @@
-
-from unittest import SkipTest
-from tasks.cephfs.fuse_mount import FuseMount
-from tasks.cephfs.cephfs_test_case import CephFSTestCase
-from teuthology.orchestra.run import CommandFailedError
-import errno
-import time
-import json
-
-
-class TestMisc(CephFSTestCase):
- CLIENTS_REQUIRED = 2
-
- LOAD_SETTINGS = ["mds_session_autoclose"]
- mds_session_autoclose = None
-
- def test_getattr_caps(self):
- """
- Check if MDS recognizes the 'mask' parameter of open request.
- The paramter allows client to request caps when opening file
- """
-
- if not isinstance(self.mount_a, FuseMount):
- raise SkipTest("Require FUSE client")
-
- # Enable debug. Client will requests CEPH_CAP_XATTR_SHARED
- # on lookup/open
- self.mount_b.umount_wait()
- self.set_conf('client', 'client debug getattr caps', 'true')
- self.mount_b.mount()
- self.mount_b.wait_until_mounted()
-
- # create a file and hold it open. MDS will issue CEPH_CAP_EXCL_*
- # to mount_a
- p = self.mount_a.open_background("testfile")
- self.mount_b.wait_for_visible("testfile")
-
- # this tiggers a lookup request and an open request. The debug
- # code will check if lookup/open reply contains xattrs
- self.mount_b.run_shell(["cat", "testfile"])
-
- self.mount_a.kill_background(p)
-
- def test_fs_new(self):
- data_pool_name = self.fs.get_data_pool_name()
-
- self.fs.mds_stop()
- self.fs.mds_fail()
-
- self.fs.mon_manager.raw_cluster_cmd('fs', 'rm', self.fs.name,
- '--yes-i-really-mean-it')
-
- self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete',
- self.fs.metadata_pool_name,
- self.fs.metadata_pool_name,
- '--yes-i-really-really-mean-it')
- self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
- self.fs.metadata_pool_name,
- self.fs.get_pgs_per_fs_pool().__str__())
-
- dummyfile = '/etc/fstab'
-
- self.fs.put_metadata_object_raw("key", dummyfile)
-
- def get_pool_df(fs, name):
- try:
- return fs.get_pool_df(name)['objects'] > 0
- except RuntimeError as e:
- return False
-
- self.wait_until_true(lambda: get_pool_df(self.fs, self.fs.metadata_pool_name), timeout=30)
-
- try:
- self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name,
- self.fs.metadata_pool_name,
- data_pool_name)
- except CommandFailedError as e:
- self.assertEqual(e.exitstatus, errno.EINVAL)
- else:
- raise AssertionError("Expected EINVAL")
-
- self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name,
- self.fs.metadata_pool_name,
- data_pool_name, "--force")
-
- self.fs.mon_manager.raw_cluster_cmd('fs', 'rm', self.fs.name,
- '--yes-i-really-mean-it')
-
-
- self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete',
- self.fs.metadata_pool_name,
- self.fs.metadata_pool_name,
- '--yes-i-really-really-mean-it')
- self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
- self.fs.metadata_pool_name,
- self.fs.get_pgs_per_fs_pool().__str__())
- self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name,
- self.fs.metadata_pool_name,
- data_pool_name)
-
- def test_evict_client(self):
- """
- Check that a slow client session won't get evicted if it's the
- only session
- """
-
- self.mount_b.umount_wait()
- ls_data = self.fs.mds_asok(['session', 'ls'])
- self.assert_session_count(1, ls_data)
-
- self.mount_a.kill()
- self.mount_a.kill_cleanup()
-
- time.sleep(self.mds_session_autoclose * 1.5)
- ls_data = self.fs.mds_asok(['session', 'ls'])
- self.assert_session_count(1, ls_data)
-
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
- self.mount_b.mount()
- self.mount_b.wait_until_mounted()
-
- ls_data = self._session_list()
- self.assert_session_count(2, ls_data)
-
- self.mount_a.kill()
- self.mount_a.kill_cleanup()
-
- time.sleep(self.mds_session_autoclose * 1.5)
- ls_data = self.fs.mds_asok(['session', 'ls'])
- self.assert_session_count(1, ls_data)
-
- def test_filtered_df(self):
- pool_name = self.fs.get_data_pool_name()
- raw_df = self.fs.get_pool_df(pool_name)
- raw_avail = float(raw_df["max_avail"])
- out = self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'get',
- pool_name, 'size',
- '-f', 'json-pretty')
- j = json.loads(out)
- pool_size = int(j['size'])
-
- proc = self.mount_a.run_shell(['df', '.'])
- output = proc.stdout.getvalue()
- fs_avail = output.split('\n')[1].split()[3]
- fs_avail = float(fs_avail) * 1024
-
- ratio = raw_avail / fs_avail
- assert 0.9 < ratio < 1.1
diff --git a/src/ceph/qa/tasks/cephfs/test_pool_perm.py b/src/ceph/qa/tasks/cephfs/test_pool_perm.py
deleted file mode 100644
index 22775e7..0000000
--- a/src/ceph/qa/tasks/cephfs/test_pool_perm.py
+++ /dev/null
@@ -1,113 +0,0 @@
-from textwrap import dedent
-from teuthology.exceptions import CommandFailedError
-from tasks.cephfs.cephfs_test_case import CephFSTestCase
-import os
-
-
-class TestPoolPerm(CephFSTestCase):
- def test_pool_perm(self):
- self.mount_a.run_shell(["touch", "test_file"])
-
- file_path = os.path.join(self.mount_a.mountpoint, "test_file")
-
- remote_script = dedent("""
- import os
- import errno
-
- fd = os.open("{path}", os.O_RDWR)
- try:
- if {check_read}:
- ret = os.read(fd, 1024)
- else:
- os.write(fd, 'content')
- except OSError, e:
- if e.errno != errno.EPERM:
- raise
- else:
- raise RuntimeError("client does not check permission of data pool")
- """)
-
- client_name = "client.{0}".format(self.mount_a.client_id)
-
- # set data pool read only
- self.fs.mon_manager.raw_cluster_cmd_result(
- 'auth', 'caps', client_name, 'mds', 'allow', 'mon', 'allow r', 'osd',
- 'allow r pool={0}'.format(self.fs.get_data_pool_name()))
-
- self.mount_a.umount_wait()
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
-
- # write should fail
- self.mount_a.run_python(remote_script.format(path=file_path, check_read=str(False)))
-
- # set data pool write only
- self.fs.mon_manager.raw_cluster_cmd_result(
- 'auth', 'caps', client_name, 'mds', 'allow', 'mon', 'allow r', 'osd',
- 'allow w pool={0}'.format(self.fs.get_data_pool_name()))
-
- self.mount_a.umount_wait()
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
-
- # read should fail
- self.mount_a.run_python(remote_script.format(path=file_path, check_read=str(True)))
-
- def test_forbidden_modification(self):
- """
- That a client who does not have the capability for setting
- layout pools is prevented from doing so.
- """
-
- # Set up
- client_name = "client.{0}".format(self.mount_a.client_id)
- new_pool_name = "data_new"
- self.fs.add_data_pool(new_pool_name)
-
- self.mount_a.run_shell(["touch", "layoutfile"])
- self.mount_a.run_shell(["mkdir", "layoutdir"])
-
- # Set MDS 'rw' perms: missing 'p' means no setting pool layouts
- self.fs.mon_manager.raw_cluster_cmd_result(
- 'auth', 'caps', client_name, 'mds', 'allow rw', 'mon', 'allow r',
- 'osd',
- 'allow rw pool={0},allow rw pool={1}'.format(
- self.fs.get_data_pool_names()[0],
- self.fs.get_data_pool_names()[1],
- ))
-
- self.mount_a.umount_wait()
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
-
- with self.assertRaises(CommandFailedError):
- self.mount_a.setfattr("layoutfile", "ceph.file.layout.pool",
- new_pool_name)
- with self.assertRaises(CommandFailedError):
- self.mount_a.setfattr("layoutdir", "ceph.dir.layout.pool",
- new_pool_name)
- self.mount_a.umount_wait()
-
- # Set MDS 'rwp' perms: should now be able to set layouts
- self.fs.mon_manager.raw_cluster_cmd_result(
- 'auth', 'caps', client_name, 'mds', 'allow rwp', 'mon', 'allow r',
- 'osd',
- 'allow rw pool={0},allow rw pool={1}'.format(
- self.fs.get_data_pool_names()[0],
- self.fs.get_data_pool_names()[1],
- ))
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
- self.mount_a.setfattr("layoutfile", "ceph.file.layout.pool",
- new_pool_name)
- self.mount_a.setfattr("layoutdir", "ceph.dir.layout.pool",
- new_pool_name)
- self.mount_a.umount_wait()
-
- def tearDown(self):
- self.fs.mon_manager.raw_cluster_cmd_result(
- 'auth', 'caps', "client.{0}".format(self.mount_a.client_id),
- 'mds', 'allow', 'mon', 'allow r', 'osd',
- 'allow rw pool={0}'.format(self.fs.get_data_pool_names()[0]))
- super(TestPoolPerm, self).tearDown()
-
diff --git a/src/ceph/qa/tasks/cephfs/test_quota.py b/src/ceph/qa/tasks/cephfs/test_quota.py
deleted file mode 100644
index ee11c58..0000000
--- a/src/ceph/qa/tasks/cephfs/test_quota.py
+++ /dev/null
@@ -1,106 +0,0 @@
-
-from cephfs_test_case import CephFSTestCase
-
-from teuthology.exceptions import CommandFailedError
-
-class TestQuota(CephFSTestCase):
- CLIENTS_REQUIRED = 2
- MDSS_REQUIRED = 1
-
- def test_remote_update_getfattr(self):
- """
- That quota changes made from one client are visible to another
- client looking at ceph.quota xattrs
- """
- self.mount_a.run_shell(["mkdir", "subdir"])
-
- self.assertEqual(
- self.mount_a.getfattr("./subdir", "ceph.quota.max_files"),
- None)
- self.assertEqual(
- self.mount_b.getfattr("./subdir", "ceph.quota.max_files"),
- None)
-
- self.mount_a.setfattr("./subdir", "ceph.quota.max_files", "10")
- self.assertEqual(
- self.mount_a.getfattr("./subdir", "ceph.quota.max_files"),
- "10")
-
- # Should be visible as soon as setxattr operation completes on
- # mds (we get here sooner because setfattr gets an early reply)
- self.wait_until_equal(
- lambda: self.mount_b.getfattr("./subdir", "ceph.quota.max_files"),
- "10", timeout=10)
-
- def test_remote_update_df(self):
- """
- That when a client modifies the quota on a directory used
- as another client's root, the other client sees the change
- reflected in their statfs output.
- """
-
- self.mount_b.umount_wait()
-
- self.mount_a.run_shell(["mkdir", "subdir"])
-
- size_before = 1024 * 1024 * 128
- self.mount_a.setfattr("./subdir", "ceph.quota.max_bytes",
- "%s" % size_before)
-
- self.mount_b.mount(mount_path="/subdir")
-
- self.assertDictEqual(
- self.mount_b.df(),
- {
- "total": size_before,
- "used": 0,
- "available": size_before
- })
-
- size_after = 1024 * 1024 * 256
- self.mount_a.setfattr("./subdir", "ceph.quota.max_bytes",
- "%s" % size_after)
-
- # Should be visible as soon as setxattr operation completes on
- # mds (we get here sooner because setfattr gets an early reply)
- self.wait_until_equal(
- lambda: self.mount_b.df(),
- {
- "total": size_after,
- "used": 0,
- "available": size_after
- },
- timeout=10
- )
-
- def test_remote_update_write(self):
- """
- That when a client modifies the quota on a directory used
- as another client's root, the other client sees the effect
- of the change when writing data.
- """
-
- self.mount_a.run_shell(["mkdir", "subdir_files"])
- self.mount_a.run_shell(["mkdir", "subdir_data"])
-
- # Set some nice high quotas that mount_b's initial operations
- # will be well within
- self.mount_a.setfattr("./subdir_files", "ceph.quota.max_files", "100")
- self.mount_a.setfattr("./subdir_data", "ceph.quota.max_bytes", "104857600")
-
- # Do some writes within my quota
- self.mount_b.create_n_files("subdir_files/file", 20)
- self.mount_b.write_n_mb("subdir_data/file", 20)
-
- # Set quotas lower than what mount_b already wrote, it should
- # refuse to write more once it's seen them
- self.mount_a.setfattr("./subdir_files", "ceph.quota.max_files", "10")
- self.mount_a.setfattr("./subdir_data", "ceph.quota.max_bytes", "1048576")
-
- # Do some writes that would have been okay within the old quota,
- # but are forbidden under the new quota
- with self.assertRaises(CommandFailedError):
- self.mount_b.create_n_files("subdir_files/file", 40)
- with self.assertRaises(CommandFailedError):
- self.mount_b.write_n_mb("subdir_data/file", 40)
-
diff --git a/src/ceph/qa/tasks/cephfs/test_readahead.py b/src/ceph/qa/tasks/cephfs/test_readahead.py
deleted file mode 100644
index 31e7bf1..0000000
--- a/src/ceph/qa/tasks/cephfs/test_readahead.py
+++ /dev/null
@@ -1,31 +0,0 @@
-import logging
-from tasks.cephfs.fuse_mount import FuseMount
-from tasks.cephfs.cephfs_test_case import CephFSTestCase
-
-log = logging.getLogger(__name__)
-
-
-class TestReadahead(CephFSTestCase):
- def test_flush(self):
- if not isinstance(self.mount_a, FuseMount):
- self.skipTest("FUSE needed for measuring op counts")
-
- # Create 32MB file
- self.mount_a.run_shell(["dd", "if=/dev/urandom", "of=foo", "bs=1M", "count=32"])
-
- # Unmount and remount the client to flush cache
- self.mount_a.umount_wait()
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
-
- initial_op_r = self.mount_a.admin_socket(['perf', 'dump', 'objecter'])['objecter']['op_r']
- self.mount_a.run_shell(["dd", "if=foo", "of=/dev/null", "bs=128k", "count=32"])
- op_r = self.mount_a.admin_socket(['perf', 'dump', 'objecter'])['objecter']['op_r']
- assert op_r >= initial_op_r
- op_r -= initial_op_r
- log.info("read operations: {0}".format(op_r))
-
- # with exponentially increasing readahead, we should see fewer than 10 operations
- # but this test simply checks if the client is doing a remote read for each local read
- if op_r >= 32:
- raise RuntimeError("readahead not working")
diff --git a/src/ceph/qa/tasks/cephfs/test_recovery_pool.py b/src/ceph/qa/tasks/cephfs/test_recovery_pool.py
deleted file mode 100644
index 097342a..0000000
--- a/src/ceph/qa/tasks/cephfs/test_recovery_pool.py
+++ /dev/null
@@ -1,220 +0,0 @@
-
-"""
-Test our tools for recovering metadata from the data pool into an alternate pool
-"""
-import json
-
-import logging
-import os
-from textwrap import dedent
-import traceback
-from collections import namedtuple, defaultdict
-
-from teuthology.orchestra.run import CommandFailedError
-from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
-
-log = logging.getLogger(__name__)
-
-
-ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
-
-
-class OverlayWorkload(object):
- def __init__(self, orig_fs, recovery_fs, orig_mount, recovery_mount):
- self._orig_fs = orig_fs
- self._recovery_fs = recovery_fs
- self._orig_mount = orig_mount
- self._recovery_mount = recovery_mount
- self._initial_state = None
-
- # Accumulate backtraces for every failed validation, and return them. Backtraces
- # are rather verbose, but we only see them when something breaks, and they
- # let us see which check failed without having to decorate each check with
- # a string
- self._errors = []
-
- def assert_equal(self, a, b):
- try:
- if a != b:
- raise AssertionError("{0} != {1}".format(a, b))
- except AssertionError as e:
- self._errors.append(
- ValidationError(e, traceback.format_exc(3))
- )
-
- def write(self):
- """
- Write the workload files to the mount
- """
- raise NotImplementedError()
-
- def validate(self):
- """
- Read from the mount and validate that the workload files are present (i.e. have
- survived or been reconstructed from the test scenario)
- """
- raise NotImplementedError()
-
- def damage(self):
- """
- Damage the filesystem pools in ways that will be interesting to recover from. By
- default just wipe everything in the metadata pool
- """
- # Delete every object in the metadata pool
- objects = self._orig_fs.rados(["ls"]).split("\n")
- for o in objects:
- self._orig_fs.rados(["rm", o])
-
- def flush(self):
- """
- Called after client unmount, after write: flush whatever you want
- """
- self._orig_fs.mds_asok(["flush", "journal"])
- self._recovery_fs.mds_asok(["flush", "journal"])
-
-
-class SimpleOverlayWorkload(OverlayWorkload):
- """
- Single file, single directory, check that it gets recovered and so does its size
- """
- def write(self):
- self._orig_mount.run_shell(["mkdir", "subdir"])
- self._orig_mount.write_n_mb("subdir/sixmegs", 6)
- self._initial_state = self._orig_mount.stat("subdir/sixmegs")
-
- def validate(self):
- self._recovery_mount.run_shell(["ls", "subdir"])
- st = self._recovery_mount.stat("subdir/sixmegs")
- self.assert_equal(st['st_size'], self._initial_state['st_size'])
- return self._errors
-
-class TestRecoveryPool(CephFSTestCase):
- MDSS_REQUIRED = 2
- CLIENTS_REQUIRED = 2
- REQUIRE_RECOVERY_FILESYSTEM = True
-
- def is_marked_damaged(self, rank):
- mds_map = self.fs.get_mds_map()
- return rank in mds_map['damaged']
-
- def _rebuild_metadata(self, workload, other_pool=None, workers=1):
- """
- That when all objects in metadata pool are removed, we can rebuild a metadata pool
- based on the contents of a data pool, and a client can see and read our files.
- """
-
- # First, inject some files
-
- workload.write()
-
- # Unmount the client and flush the journal: the tool should also cope with
- # situations where there is dirty metadata, but we'll test that separately
- self.mount_a.umount_wait()
- self.mount_b.umount_wait()
- workload.flush()
-
- # Create the alternate pool if requested
- recovery_fs = self.recovery_fs.name
- recovery_pool = self.recovery_fs.get_metadata_pool_name()
- self.recovery_fs.data_scan(['init', '--force-init',
- '--filesystem', recovery_fs,
- '--alternate-pool', recovery_pool])
- self.recovery_fs.mon_manager.raw_cluster_cmd('-s')
- self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "session"])
- self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "snap"])
- self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "inode"])
-
- # Stop the MDS
- self.fs.mds_stop()
- self.fs.mds_fail()
-
- # After recovery, we need the MDS to not be strict about stats (in production these options
- # are off by default, but in QA we need to explicitly disable them)
- self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
- self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
-
- # Apply any data damage the workload wants
- workload.damage()
-
- # Reset the MDS map in case multiple ranks were in play: recovery procedure
- # only understands how to rebuild metadata under rank 0
- self.fs.mon_manager.raw_cluster_cmd('fs', 'reset', self.fs.name,
- '--yes-i-really-mean-it')
-
- def get_state(mds_id):
- info = self.mds_cluster.get_mds_info(mds_id)
- return info['state'] if info is not None else None
-
- self.fs.table_tool([self.fs.name + ":0", "reset", "session"])
- self.fs.table_tool([self.fs.name + ":0", "reset", "snap"])
- self.fs.table_tool([self.fs.name + ":0", "reset", "inode"])
-
- # Run the recovery procedure
- if False:
- with self.assertRaises(CommandFailedError):
- # Normal reset should fail when no objects are present, we'll use --force instead
- self.fs.journal_tool(["journal", "reset"])
-
- self.fs.mds_stop()
- self.fs.data_scan(['scan_extents', '--alternate-pool',
- recovery_pool, '--filesystem', self.fs.name,
- self.fs.get_data_pool_name()])
- self.fs.data_scan(['scan_inodes', '--alternate-pool',
- recovery_pool, '--filesystem', self.fs.name,
- '--force-corrupt', '--force-init',
- self.fs.get_data_pool_name()])
- self.fs.journal_tool(['--rank=' + self.fs.name + ":0", 'event',
- 'recover_dentries', 'list',
- '--alternate-pool', recovery_pool])
-
- self.fs.data_scan(['init', '--force-init', '--filesystem',
- self.fs.name])
- self.fs.data_scan(['scan_inodes', '--filesystem', self.fs.name,
- '--force-corrupt', '--force-init',
- self.fs.get_data_pool_name()])
- self.fs.journal_tool(['--rank=' + self.fs.name + ":0", 'event',
- 'recover_dentries', 'list'])
-
- self.fs.journal_tool(['--rank=' + recovery_fs + ":0", 'journal',
- 'reset', '--force'])
- self.fs.journal_tool(['--rank=' + self.fs.name + ":0", 'journal',
- 'reset', '--force'])
- self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired',
- recovery_fs + ":0")
-
- # Mark the MDS repaired
- self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
-
- # Start the MDS
- self.fs.mds_restart()
- self.recovery_fs.mds_restart()
- self.fs.wait_for_daemons()
- self.recovery_fs.wait_for_daemons()
- for mds_id in self.recovery_fs.mds_ids:
- self.fs.mon_manager.raw_cluster_cmd('tell', "mds." + mds_id,
- 'injectargs', '--debug-mds=20')
- self.fs.mon_manager.raw_cluster_cmd('daemon', "mds." + mds_id,
- 'scrub_path', '/',
- 'recursive', 'repair')
- log.info(str(self.mds_cluster.status()))
-
- # Mount a client
- self.mount_a.mount()
- self.mount_b.mount(mount_fs_name=recovery_fs)
- self.mount_a.wait_until_mounted()
- self.mount_b.wait_until_mounted()
-
- # See that the files are present and correct
- errors = workload.validate()
- if errors:
- log.error("Validation errors found: {0}".format(len(errors)))
- for e in errors:
- log.error(e.exception)
- log.error(e.backtrace)
- raise AssertionError("Validation failed, first error: {0}\n{1}".format(
- errors[0].exception, errors[0].backtrace
- ))
-
- def test_rebuild_simple(self):
- self._rebuild_metadata(SimpleOverlayWorkload(self.fs, self.recovery_fs,
- self.mount_a, self.mount_b))
diff --git a/src/ceph/qa/tasks/cephfs/test_scrub_checks.py b/src/ceph/qa/tasks/cephfs/test_scrub_checks.py
deleted file mode 100644
index a2de527..0000000
--- a/src/ceph/qa/tasks/cephfs/test_scrub_checks.py
+++ /dev/null
@@ -1,245 +0,0 @@
-"""
-MDS admin socket scrubbing-related tests.
-"""
-import json
-import logging
-import errno
-import time
-from teuthology.exceptions import CommandFailedError
-import os
-from tasks.cephfs.cephfs_test_case import CephFSTestCase
-
-log = logging.getLogger(__name__)
-
-
-class TestScrubChecks(CephFSTestCase):
- """
- Run flush and scrub commands on the specified files in the filesystem. This
- task will run through a sequence of operations, but it is not comprehensive
- on its own -- it doesn't manipulate the mds cache state to test on both
- in- and out-of-memory parts of the hierarchy. So it's designed to be run
- multiple times within a single test run, so that the test can manipulate
- memory state.
-
- Usage:
- mds_scrub_checks:
- mds_rank: 0
- path: path/to/test/dir
- client: 0
- run_seq: [0-9]+
-
- Increment the run_seq on subsequent invocations within a single test run;
- it uses that value to generate unique folder and file names.
- """
-
- MDSS_REQUIRED = 1
- CLIENTS_REQUIRED = 1
-
- def test_scrub_checks(self):
- self._checks(0)
- self._checks(1)
-
- def _checks(self, run_seq):
- mds_rank = 0
- test_dir = "scrub_test_path"
-
- abs_test_path = "/{0}".format(test_dir)
-
- log.info("mountpoint: {0}".format(self.mount_a.mountpoint))
- client_path = os.path.join(self.mount_a.mountpoint, test_dir)
- log.info("client_path: {0}".format(client_path))
-
- log.info("Cloning repo into place")
- repo_path = self.clone_repo(self.mount_a, client_path)
-
- log.info("Initiating mds_scrub_checks on mds.{id_}, " +
- "test_path {path}, run_seq {seq}".format(
- id_=mds_rank, path=abs_test_path, seq=run_seq)
- )
-
-
- success_validator = lambda j, r: self.json_validator(j, r, "return_code", 0)
-
- nep = "{test_path}/i/dont/exist".format(test_path=abs_test_path)
- self.asok_command(mds_rank, "flush_path {nep}".format(nep=nep),
- lambda j, r: self.json_validator(j, r, "return_code", -errno.ENOENT))
- self.asok_command(mds_rank, "scrub_path {nep}".format(nep=nep),
- lambda j, r: self.json_validator(j, r, "return_code", -errno.ENOENT))
-
- test_repo_path = "{test_path}/ceph-qa-suite".format(test_path=abs_test_path)
- dirpath = "{repo_path}/suites".format(repo_path=test_repo_path)
-
- if run_seq == 0:
- log.info("First run: flushing {dirpath}".format(dirpath=dirpath))
- command = "flush_path {dirpath}".format(dirpath=dirpath)
- self.asok_command(mds_rank, command, success_validator)
- command = "scrub_path {dirpath}".format(dirpath=dirpath)
- self.asok_command(mds_rank, command, success_validator)
-
- filepath = "{repo_path}/suites/fs/verify/validater/valgrind.yaml".format(
- repo_path=test_repo_path)
- if run_seq == 0:
- log.info("First run: flushing {filepath}".format(filepath=filepath))
- command = "flush_path {filepath}".format(filepath=filepath)
- self.asok_command(mds_rank, command, success_validator)
- command = "scrub_path {filepath}".format(filepath=filepath)
- self.asok_command(mds_rank, command, success_validator)
-
- filepath = "{repo_path}/suites/fs/basic/clusters/fixed-3-cephfs.yaml". \
- format(repo_path=test_repo_path)
- command = "scrub_path {filepath}".format(filepath=filepath)
- self.asok_command(mds_rank, command,
- lambda j, r: self.json_validator(j, r, "performed_validation",
- False))
-
- if run_seq == 0:
- log.info("First run: flushing base dir /")
- command = "flush_path /"
- self.asok_command(mds_rank, command, success_validator)
- command = "scrub_path /"
- self.asok_command(mds_rank, command, success_validator)
-
- new_dir = "{repo_path}/new_dir_{i}".format(repo_path=repo_path, i=run_seq)
- test_new_dir = "{repo_path}/new_dir_{i}".format(repo_path=test_repo_path,
- i=run_seq)
- self.mount_a.run_shell(["mkdir", new_dir])
- command = "flush_path {dir}".format(dir=test_new_dir)
- self.asok_command(mds_rank, command, success_validator)
-
- new_file = "{repo_path}/new_file_{i}".format(repo_path=repo_path,
- i=run_seq)
- test_new_file = "{repo_path}/new_file_{i}".format(repo_path=test_repo_path,
- i=run_seq)
- self.mount_a.write_n_mb(new_file, 1)
-
- command = "flush_path {file}".format(file=test_new_file)
- self.asok_command(mds_rank, command, success_validator)
-
- # check that scrub fails on errors
- ino = self.mount_a.path_to_ino(new_file)
- rados_obj_name = "{ino:x}.00000000".format(ino=ino)
- command = "scrub_path {file}".format(file=test_new_file)
-
- # Missing parent xattr -> ENODATA
- self.fs.rados(["rmxattr", rados_obj_name, "parent"], pool=self.fs.get_data_pool_name())
- self.asok_command(mds_rank, command,
- lambda j, r: self.json_validator(j, r, "return_code", -errno.ENODATA))
-
- # Missing object -> ENOENT
- self.fs.rados(["rm", rados_obj_name], pool=self.fs.get_data_pool_name())
- self.asok_command(mds_rank, command,
- lambda j, r: self.json_validator(j, r, "return_code", -errno.ENOENT))
-
- command = "flush_path /"
- self.asok_command(mds_rank, command, success_validator)
-
- def test_scrub_repair(self):
- mds_rank = 0
- test_dir = "scrub_repair_path"
-
- self.mount_a.run_shell(["sudo", "mkdir", test_dir])
- self.mount_a.run_shell(["sudo", "touch", "{0}/file".format(test_dir)])
- dir_objname = "{:x}.00000000".format(self.mount_a.path_to_ino(test_dir))
-
- self.mount_a.umount_wait()
-
- # flush journal entries to dirfrag objects, and expire journal
- self.fs.mds_asok(['flush', 'journal'])
- self.fs.mds_stop()
-
- # remove the dentry from dirfrag, cause incorrect fragstat/rstat
- self.fs.rados(["rmomapkey", dir_objname, "file_head"],
- pool=self.fs.get_metadata_pool_name())
-
- self.fs.mds_fail_restart()
- self.fs.wait_for_daemons()
-
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
-
- # fragstat indicates the directory is not empty, rmdir should fail
- with self.assertRaises(CommandFailedError) as ar:
- self.mount_a.run_shell(["sudo", "rmdir", test_dir])
- self.assertEqual(ar.exception.exitstatus, 1)
-
- self.asok_command(mds_rank, "scrub_path /{0} repair".format(test_dir),
- lambda j, r: self.json_validator(j, r, "return_code", 0))
-
- # wait a few second for background repair
- time.sleep(10)
-
- # fragstat should be fixed
- self.mount_a.run_shell(["sudo", "rmdir", test_dir])
-
- @staticmethod
- def json_validator(json_out, rc, element, expected_value):
- if rc != 0:
- return False, "asok command returned error {rc}".format(rc=rc)
- element_value = json_out.get(element)
- if element_value != expected_value:
- return False, "unexpectedly got {jv} instead of {ev}!".format(
- jv=element_value, ev=expected_value)
- return True, "Succeeded"
-
- def asok_command(self, mds_rank, command, validator):
- log.info("Running command '{command}'".format(command=command))
-
- command_list = command.split()
-
- # we just assume there's an active mds for every rank
- mds_id = self.fs.get_active_names()[mds_rank]
- proc = self.fs.mon_manager.admin_socket('mds', mds_id,
- command_list, check_status=False)
- rout = proc.exitstatus
- sout = proc.stdout.getvalue()
-
- if sout.strip():
- jout = json.loads(sout)
- else:
- jout = None
-
- log.info("command '{command}' got response code " +
- "'{rout}' and stdout '{sout}'".format(
- command=command, rout=rout, sout=sout))
-
- success, errstring = validator(jout, rout)
-
- if not success:
- raise AsokCommandFailedError(command, rout, jout, errstring)
-
- return jout
-
- def clone_repo(self, client_mount, path):
- repo = "ceph-qa-suite"
- repo_path = os.path.join(path, repo)
- client_mount.run_shell(["mkdir", "-p", path])
-
- try:
- client_mount.stat(repo_path)
- except CommandFailedError:
- client_mount.run_shell([
- "git", "clone", '--branch', 'giant',
- "http://github.com/ceph/{repo}".format(repo=repo),
- "{path}/{repo}".format(path=path, repo=repo)
- ])
-
- return repo_path
-
-
-class AsokCommandFailedError(Exception):
- """
- Exception thrown when we get an unexpected response
- on an admin socket command
- """
-
- def __init__(self, command, rc, json_out, errstring):
- self.command = command
- self.rc = rc
- self.json = json_out
- self.errstring = errstring
-
- def __str__(self):
- return "Admin socket: {command} failed with rc={rc}," + \
- "json output={json}, because '{es}'".format(
- command=self.command, rc=self.rc,
- json=self.json, es=self.errstring)
diff --git a/src/ceph/qa/tasks/cephfs/test_sessionmap.py b/src/ceph/qa/tasks/cephfs/test_sessionmap.py
deleted file mode 100644
index 9d12ab6..0000000
--- a/src/ceph/qa/tasks/cephfs/test_sessionmap.py
+++ /dev/null
@@ -1,235 +0,0 @@
-from StringIO import StringIO
-import json
-import logging
-from unittest import SkipTest
-
-from tasks.cephfs.fuse_mount import FuseMount
-from teuthology.exceptions import CommandFailedError
-from tasks.cephfs.cephfs_test_case import CephFSTestCase
-
-log = logging.getLogger(__name__)
-
-
-class TestSessionMap(CephFSTestCase):
- CLIENTS_REQUIRED = 2
- MDSS_REQUIRED = 2
-
- def test_tell_session_drop(self):
- """
- That when a `tell` command is sent using the python CLI,
- its MDS session is gone after it terminates
- """
- self.mount_a.umount_wait()
- self.mount_b.umount_wait()
-
- mds_id = self.fs.get_lone_mds_id()
- self.fs.mon_manager.raw_cluster_cmd("tell", "mds.{0}".format(mds_id), "session", "ls")
-
- ls_data = self.fs.mds_asok(['session', 'ls'])
- self.assertEqual(len(ls_data), 0)
-
- def _get_thread_count(self, mds_id):
- remote = self.fs.mds_daemons[mds_id].remote
-
- ps_txt = remote.run(
- args=["ps", "-ww", "axo", "nlwp,cmd"],
- stdout=StringIO()
- ).stdout.getvalue().strip()
- lines = ps_txt.split("\n")[1:]
-
- for line in lines:
- if "ceph-mds" in line and not "daemon-helper" in line:
- if line.find("-i {0}".format(mds_id)) != -1:
- log.info("Found ps line for daemon: {0}".format(line))
- return int(line.split()[0])
-
- raise RuntimeError("No process found in ps output for MDS {0}: {1}".format(
- mds_id, ps_txt
- ))
-
- def test_tell_conn_close(self):
- """
- That when a `tell` command is sent using the python CLI,
- the thread count goes back to where it started (i.e. we aren't
- leaving connections open)
- """
- self.mount_a.umount_wait()
- self.mount_b.umount_wait()
-
- mds_id = self.fs.get_lone_mds_id()
-
- initial_thread_count = self._get_thread_count(mds_id)
- self.fs.mon_manager.raw_cluster_cmd("tell", "mds.{0}".format(mds_id), "session", "ls")
- final_thread_count = self._get_thread_count(mds_id)
-
- self.assertEqual(initial_thread_count, final_thread_count)
-
- def test_mount_conn_close(self):
- """
- That when a client unmounts, the thread count on the MDS goes back
- to what it was before the client mounted
- """
- self.mount_a.umount_wait()
- self.mount_b.umount_wait()
-
- mds_id = self.fs.get_lone_mds_id()
-
- initial_thread_count = self._get_thread_count(mds_id)
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
- self.assertGreater(self._get_thread_count(mds_id), initial_thread_count)
- self.mount_a.umount_wait()
- final_thread_count = self._get_thread_count(mds_id)
-
- self.assertEqual(initial_thread_count, final_thread_count)
-
- def test_version_splitting(self):
- """
- That when many sessions are updated, they are correctly
- split into multiple versions to obey mds_sessionmap_keys_per_op
- """
-
- # Start umounted
- self.mount_a.umount_wait()
- self.mount_b.umount_wait()
-
- # Configure MDS to write one OMAP key at once
- self.set_conf('mds', 'mds_sessionmap_keys_per_op', 1)
- self.fs.mds_fail_restart()
- self.fs.wait_for_daemons()
-
- # I would like two MDSs, so that I can do an export dir later
- self.fs.set_max_mds(2)
- self.fs.wait_for_daemons()
-
- active_mds_names = self.fs.get_active_names()
- rank_0_id = active_mds_names[0]
- rank_1_id = active_mds_names[1]
- log.info("Ranks 0 and 1 are {0} and {1}".format(
- rank_0_id, rank_1_id))
-
- # Bring the clients back
- self.mount_a.mount()
- self.mount_b.mount()
- self.mount_a.create_files() # Kick the client into opening sessions
- self.mount_b.create_files()
-
- # See that they've got sessions
- self.assert_session_count(2, mds_id=rank_0_id)
-
- # See that we persist their sessions
- self.fs.mds_asok(["flush", "journal"], rank_0_id)
- table_json = json.loads(self.fs.table_tool(["0", "show", "session"]))
- log.info("SessionMap: {0}".format(json.dumps(table_json, indent=2)))
- self.assertEqual(table_json['0']['result'], 0)
- self.assertEqual(len(table_json['0']['data']['Sessions']), 2)
-
- # Now, induce a "force_open_sessions" event by exporting a dir
- self.mount_a.run_shell(["mkdir", "bravo"])
- self.mount_a.run_shell(["touch", "bravo/file"])
- self.mount_b.run_shell(["ls", "-l", "bravo/file"])
-
- def get_omap_wrs():
- return self.fs.mds_asok(['perf', 'dump', 'objecter'], rank_1_id)['objecter']['omap_wr']
-
- # Flush so that there are no dirty sessions on rank 1
- self.fs.mds_asok(["flush", "journal"], rank_1_id)
-
- # Export so that we get a force_open to rank 1 for the two sessions from rank 0
- initial_omap_wrs = get_omap_wrs()
- self.fs.mds_asok(['export', 'dir', '/bravo', '1'], rank_0_id)
-
- # This is the critical (if rather subtle) check: that in the process of doing an export dir,
- # we hit force_open_sessions, and as a result we end up writing out the sessionmap. There
- # will be two sessions dirtied here, and because we have set keys_per_op to 1, we should see
- # a single session get written out (the first of the two, triggered by the second getting marked
- # dirty)
- # The number of writes is two per session, because the header (sessionmap version) update and
- # KV write both count.
- self.wait_until_true(
- lambda: get_omap_wrs() - initial_omap_wrs == 2,
- timeout=10 # Long enough for an export to get acked
- )
-
- # Now end our sessions and check the backing sessionmap is updated correctly
- self.mount_a.umount_wait()
- self.mount_b.umount_wait()
-
- # In-memory sessionmap check
- self.assert_session_count(0, mds_id=rank_0_id)
-
- # On-disk sessionmap check
- self.fs.mds_asok(["flush", "journal"], rank_0_id)
- table_json = json.loads(self.fs.table_tool(["0", "show", "session"]))
- log.info("SessionMap: {0}".format(json.dumps(table_json, indent=2)))
- self.assertEqual(table_json['0']['result'], 0)
- self.assertEqual(len(table_json['0']['data']['Sessions']), 0)
-
- def _sudo_write_file(self, remote, path, data):
- """
- Write data to a remote file as super user
-
- :param remote: Remote site.
- :param path: Path on the remote being written to.
- :param data: Data to be written.
-
- Both perms and owner are passed directly to chmod.
- """
- remote.run(
- args=[
- 'sudo',
- 'python',
- '-c',
- 'import shutil, sys; shutil.copyfileobj(sys.stdin, file(sys.argv[1], "wb"))',
- path,
- ],
- stdin=data,
- )
-
- def _configure_auth(self, mount, id_name, mds_caps, osd_caps=None, mon_caps=None):
- """
- Set up auth credentials for a client mount, and write out the keyring
- for the client to use.
- """
-
- if osd_caps is None:
- osd_caps = "allow rw"
-
- if mon_caps is None:
- mon_caps = "allow r"
-
- out = self.fs.mon_manager.raw_cluster_cmd(
- "auth", "get-or-create", "client.{name}".format(name=id_name),
- "mds", mds_caps,
- "osd", osd_caps,
- "mon", mon_caps
- )
- mount.client_id = id_name
- self._sudo_write_file(mount.client_remote, mount.get_keyring_path(), out)
- self.set_conf("client.{name}".format(name=id_name), "keyring", mount.get_keyring_path())
-
- def test_session_reject(self):
- if not isinstance(self.mount_a, FuseMount):
- raise SkipTest("Requires FUSE client to inject client metadata")
-
- self.mount_a.run_shell(["mkdir", "foo"])
- self.mount_a.run_shell(["mkdir", "foo/bar"])
- self.mount_a.umount_wait()
-
- # Mount B will be my rejected client
- self.mount_b.umount_wait()
-
- # Configure a client that is limited to /foo/bar
- self._configure_auth(self.mount_b, "badguy", "allow rw path=/foo/bar")
- # Check he can mount that dir and do IO
- self.mount_b.mount(mount_path="/foo/bar")
- self.mount_b.wait_until_mounted()
- self.mount_b.create_destroy()
- self.mount_b.umount_wait()
-
- # Configure the client to claim that its mount point metadata is /baz
- self.set_conf("client.badguy", "client_metadata", "root=/baz")
- # Try to mount the client, see that it fails
- with self.assert_cluster_log("client session with invalid root '/baz' denied"):
- with self.assertRaises(CommandFailedError):
- self.mount_b.mount(mount_path="/foo/bar")
diff --git a/src/ceph/qa/tasks/cephfs/test_strays.py b/src/ceph/qa/tasks/cephfs/test_strays.py
deleted file mode 100644
index b64f3e9..0000000
--- a/src/ceph/qa/tasks/cephfs/test_strays.py
+++ /dev/null
@@ -1,1049 +0,0 @@
-import json
-import time
-import logging
-from textwrap import dedent
-import datetime
-import gevent
-import datetime
-
-from teuthology.orchestra.run import CommandFailedError, Raw
-from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
-
-log = logging.getLogger(__name__)
-
-
-class TestStrays(CephFSTestCase):
- MDSS_REQUIRED = 2
-
- OPS_THROTTLE = 1
- FILES_THROTTLE = 2
-
- # Range of different file sizes used in throttle test's workload
- throttle_workload_size_range = 16
-
- @for_teuthology
- def test_ops_throttle(self):
- self._test_throttling(self.OPS_THROTTLE)
-
- @for_teuthology
- def test_files_throttle(self):
- self._test_throttling(self.FILES_THROTTLE)
-
- def test_dir_deletion(self):
- """
- That when deleting a bunch of dentries and the containing
- directory, everything gets purged.
- Catches cases where the client might e.g. fail to trim
- the unlinked dir from its cache.
- """
- file_count = 1000
- create_script = dedent("""
- import os
-
- mount_path = "{mount_path}"
- subdir = "delete_me"
- size = {size}
- file_count = {file_count}
- os.mkdir(os.path.join(mount_path, subdir))
- for i in xrange(0, file_count):
- filename = "{{0}}_{{1}}.bin".format(i, size)
- f = open(os.path.join(mount_path, subdir, filename), 'w')
- f.write(size * 'x')
- f.close()
- """.format(
- mount_path=self.mount_a.mountpoint,
- size=1024,
- file_count=file_count
- ))
-
- self.mount_a.run_python(create_script)
-
- # That the dirfrag object is created
- self.fs.mds_asok(["flush", "journal"])
- dir_ino = self.mount_a.path_to_ino("delete_me")
- self.assertTrue(self.fs.dirfrag_exists(dir_ino, 0))
-
- # Remove everything
- self.mount_a.run_shell(["rm", "-rf", "delete_me"])
- self.fs.mds_asok(["flush", "journal"])
-
- # That all the removed files get created as strays
- strays = self.get_mdc_stat("strays_created")
- self.assertEqual(strays, file_count + 1)
-
- # That the strays all get enqueued for purge
- self.wait_until_equal(
- lambda: self.get_mdc_stat("strays_enqueued"),
- strays,
- timeout=600
-
- )
-
- # That all the purge operations execute
- self.wait_until_equal(
- lambda: self.get_stat("purge_queue", "pq_executed"),
- strays,
- timeout=600
- )
-
- # That finally, the directory metadata object is gone
- self.assertFalse(self.fs.dirfrag_exists(dir_ino, 0))
-
- # That finally, the data objects are all gone
- self.await_data_pool_empty()
-
- def _test_throttling(self, throttle_type):
- self.data_log = []
- try:
- return self._do_test_throttling(throttle_type)
- except:
- for l in self.data_log:
- log.info(",".join([l_.__str__() for l_ in l]))
- raise
-
- def _do_test_throttling(self, throttle_type):
- """
- That the mds_max_purge_ops setting is respected
- """
-
- def set_throttles(files, ops):
- """
- Helper for updating ops/files limits, and calculating effective
- ops_per_pg setting to give the same ops limit.
- """
- self.set_conf('mds', 'mds_max_purge_files', "%d" % files)
- self.set_conf('mds', 'mds_max_purge_ops', "%d" % ops)
-
- pgs = self.fs.mon_manager.get_pool_property(
- self.fs.get_data_pool_name(),
- "pg_num"
- )
- ops_per_pg = float(ops) / pgs
- self.set_conf('mds', 'mds_max_purge_ops_per_pg', "%s" % ops_per_pg)
-
- # Test conditions depend on what we're going to be exercising.
- # * Lift the threshold on whatever throttle we are *not* testing, so
- # that the throttle of interest is the one that will be the bottleneck
- # * Create either many small files (test file count throttling) or fewer
- # large files (test op throttling)
- if throttle_type == self.OPS_THROTTLE:
- set_throttles(files=100000000, ops=16)
- size_unit = 1024 * 1024 # big files, generate lots of ops
- file_multiplier = 100
- elif throttle_type == self.FILES_THROTTLE:
- # The default value of file limit is pretty permissive, so to avoid
- # the test running too fast, create lots of files and set the limit
- # pretty low.
- set_throttles(ops=100000000, files=6)
- size_unit = 1024 # small, numerous files
- file_multiplier = 200
- else:
- raise NotImplemented(throttle_type)
-
- # Pick up config changes
- self.fs.mds_fail_restart()
- self.fs.wait_for_daemons()
-
- create_script = dedent("""
- import os
-
- mount_path = "{mount_path}"
- subdir = "delete_me"
- size_unit = {size_unit}
- file_multiplier = {file_multiplier}
- os.mkdir(os.path.join(mount_path, subdir))
- for i in xrange(0, file_multiplier):
- for size in xrange(0, {size_range}*size_unit, size_unit):
- filename = "{{0}}_{{1}}.bin".format(i, size / size_unit)
- f = open(os.path.join(mount_path, subdir, filename), 'w')
- f.write(size * 'x')
- f.close()
- """.format(
- mount_path=self.mount_a.mountpoint,
- size_unit=size_unit,
- file_multiplier=file_multiplier,
- size_range=self.throttle_workload_size_range
- ))
-
- self.mount_a.run_python(create_script)
-
- # We will run the deletion in the background, to reduce the risk of it completing before
- # we have started monitoring the stray statistics.
- def background():
- self.mount_a.run_shell(["rm", "-rf", "delete_me"])
- self.fs.mds_asok(["flush", "journal"])
-
- background_thread = gevent.spawn(background)
-
- total_inodes = file_multiplier * self.throttle_workload_size_range + 1
- mds_max_purge_ops = int(self.fs.get_config("mds_max_purge_ops", 'mds'))
- mds_max_purge_files = int(self.fs.get_config("mds_max_purge_files", 'mds'))
-
- # During this phase we look for the concurrent ops to exceed half
- # the limit (a heuristic) and not exceed the limit (a correctness
- # condition).
- purge_timeout = 600
- elapsed = 0
- files_high_water = 0
- ops_high_water = 0
-
- while True:
- stats = self.fs.mds_asok(['perf', 'dump'])
- mdc_stats = stats['mds_cache']
- pq_stats = stats['purge_queue']
- if elapsed >= purge_timeout:
- raise RuntimeError("Timeout waiting for {0} inodes to purge, stats:{1}".format(total_inodes, mdc_stats))
-
- num_strays = mdc_stats['num_strays']
- num_strays_purging = pq_stats['pq_executing']
- num_purge_ops = pq_stats['pq_executing_ops']
-
- self.data_log.append([datetime.datetime.now(), num_strays, num_strays_purging, num_purge_ops])
-
- files_high_water = max(files_high_water, num_strays_purging)
- ops_high_water = max(ops_high_water, num_purge_ops)
-
- total_strays_created = mdc_stats['strays_created']
- total_strays_purged = pq_stats['pq_executed']
-
- if total_strays_purged == total_inodes:
- log.info("Complete purge in {0} seconds".format(elapsed))
- break
- elif total_strays_purged > total_inodes:
- raise RuntimeError("Saw more strays than expected, mdc stats: {0}".format(mdc_stats))
- else:
- if throttle_type == self.OPS_THROTTLE:
- # 11 is filer_max_purge_ops plus one for the backtrace:
- # limit is allowed to be overshot by this much.
- if num_purge_ops > mds_max_purge_ops + 11:
- raise RuntimeError("num_purge_ops violates threshold {0}/{1}".format(
- num_purge_ops, mds_max_purge_ops
- ))
- elif throttle_type == self.FILES_THROTTLE:
- if num_strays_purging > mds_max_purge_files:
- raise RuntimeError("num_strays_purging violates threshold {0}/{1}".format(
- num_strays_purging, mds_max_purge_files
- ))
- else:
- raise NotImplemented(throttle_type)
-
- log.info("Waiting for purge to complete {0}/{1}, {2}/{3}".format(
- num_strays_purging, num_strays,
- total_strays_purged, total_strays_created
- ))
- time.sleep(1)
- elapsed += 1
-
- background_thread.join()
-
- # Check that we got up to a respectable rate during the purge. This is totally
- # racy, but should be safeish unless the cluster is pathologically slow, or
- # insanely fast such that the deletions all pass before we have polled the
- # statistics.
- if throttle_type == self.OPS_THROTTLE:
- if ops_high_water < mds_max_purge_ops / 2:
- raise RuntimeError("Ops in flight high water is unexpectedly low ({0} / {1})".format(
- ops_high_water, mds_max_purge_ops
- ))
- elif throttle_type == self.FILES_THROTTLE:
- if files_high_water < mds_max_purge_files / 2:
- raise RuntimeError("Files in flight high water is unexpectedly low ({0} / {1})".format(
- ops_high_water, mds_max_purge_files
- ))
-
- # Sanity check all MDC stray stats
- stats = self.fs.mds_asok(['perf', 'dump'])
- mdc_stats = stats['mds_cache']
- pq_stats = stats['purge_queue']
- self.assertEqual(mdc_stats['num_strays'], 0)
- self.assertEqual(mdc_stats['num_strays_delayed'], 0)
- self.assertEqual(pq_stats['pq_executing'], 0)
- self.assertEqual(pq_stats['pq_executing_ops'], 0)
- self.assertEqual(mdc_stats['strays_created'], total_inodes)
- self.assertEqual(mdc_stats['strays_enqueued'], total_inodes)
- self.assertEqual(pq_stats['pq_executed'], total_inodes)
-
- def get_mdc_stat(self, name, mds_id=None):
- return self.get_stat("mds_cache", name, mds_id)
-
- def get_stat(self, subsys, name, mds_id=None):
- return self.fs.mds_asok(['perf', 'dump', subsys, name],
- mds_id=mds_id)[subsys][name]
-
- def _wait_for_counter(self, subsys, counter, expect_val, timeout=60,
- mds_id=None):
- self.wait_until_equal(
- lambda: self.get_stat(subsys, counter, mds_id),
- expect_val=expect_val, timeout=timeout,
- reject_fn=lambda x: x > expect_val
- )
-
- def test_open_inode(self):
- """
- That the case of a dentry unlinked while a client holds an
- inode open is handled correctly.
-
- The inode should be moved into a stray dentry, while the original
- dentry and directory should be purged.
-
- The inode's data should be purged when the client eventually closes
- it.
- """
- mount_a_client_id = self.mount_a.get_global_id()
-
- # Write some bytes to a file
- size_mb = 8
-
- # Hold the file open
- p = self.mount_a.open_background("open_file")
- self.mount_a.write_n_mb("open_file", size_mb)
- open_file_ino = self.mount_a.path_to_ino("open_file")
-
- self.assertEqual(self.get_session(mount_a_client_id)['num_caps'], 2)
-
- # Unlink the dentry
- self.mount_a.run_shell(["rm", "-f", "open_file"])
-
- # Wait to see the stray count increment
- self.wait_until_equal(
- lambda: self.get_mdc_stat("num_strays"),
- expect_val=1, timeout=60, reject_fn=lambda x: x > 1)
-
- # See that while the stray count has incremented, none have passed
- # on to the purge queue
- self.assertEqual(self.get_mdc_stat("strays_created"), 1)
- self.assertEqual(self.get_mdc_stat("strays_enqueued"), 0)
-
- # See that the client still holds 2 caps
- self.assertEqual(self.get_session(mount_a_client_id)['num_caps'], 2)
-
- # See that the data objects remain in the data pool
- self.assertTrue(self.fs.data_objects_present(open_file_ino, size_mb * 1024 * 1024))
-
- # Now close the file
- self.mount_a.kill_background(p)
-
- # Wait to see the client cap count decrement
- self.wait_until_equal(
- lambda: self.get_session(mount_a_client_id)['num_caps'],
- expect_val=1, timeout=60, reject_fn=lambda x: x > 2 or x < 1
- )
- # Wait to see the purge counter increment, stray count go to zero
- self._wait_for_counter("mds_cache", "strays_enqueued", 1)
- self.wait_until_equal(
- lambda: self.get_mdc_stat("num_strays"),
- expect_val=0, timeout=6, reject_fn=lambda x: x > 1
- )
- self._wait_for_counter("purge_queue", "pq_executed", 1)
-
- # See that the data objects no longer exist
- self.assertTrue(self.fs.data_objects_absent(open_file_ino, size_mb * 1024 * 1024))
-
- self.await_data_pool_empty()
-
- def test_hardlink_reintegration(self):
- """
- That removal of primary dentry of hardlinked inode results
- in reintegration of inode into the previously-remote dentry,
- rather than lingering as a stray indefinitely.
- """
- # Write some bytes to file_a
- size_mb = 8
- self.mount_a.run_shell(["mkdir", "dir_1"])
- self.mount_a.write_n_mb("dir_1/file_a", size_mb)
- ino = self.mount_a.path_to_ino("dir_1/file_a")
-
- # Create a hardlink named file_b
- self.mount_a.run_shell(["mkdir", "dir_2"])
- self.mount_a.run_shell(["ln", "dir_1/file_a", "dir_2/file_b"])
- self.assertEqual(self.mount_a.path_to_ino("dir_2/file_b"), ino)
-
- # Flush journal
- self.fs.mds_asok(['flush', 'journal'])
-
- # See that backtrace for the file points to the file_a path
- pre_unlink_bt = self.fs.read_backtrace(ino)
- self.assertEqual(pre_unlink_bt['ancestors'][0]['dname'], "file_a")
-
- # empty mds cache. otherwise mds reintegrates stray when unlink finishes
- self.mount_a.umount_wait()
- self.fs.mds_asok(['flush', 'journal'])
- self.fs.mds_fail_restart()
- self.fs.wait_for_daemons()
- self.mount_a.mount()
-
- # Unlink file_a
- self.mount_a.run_shell(["rm", "-f", "dir_1/file_a"])
-
- # See that a stray was created
- self.assertEqual(self.get_mdc_stat("num_strays"), 1)
- self.assertEqual(self.get_mdc_stat("strays_created"), 1)
-
- # Wait, see that data objects are still present (i.e. that the
- # stray did not advance to purging given time)
- time.sleep(30)
- self.assertTrue(self.fs.data_objects_present(ino, size_mb * 1024 * 1024))
- self.assertEqual(self.get_mdc_stat("strays_enqueued"), 0)
-
- # See that before reintegration, the inode's backtrace points to a stray dir
- self.fs.mds_asok(['flush', 'journal'])
- self.assertTrue(self.get_backtrace_path(ino).startswith("stray"))
-
- last_reintegrated = self.get_mdc_stat("strays_reintegrated")
-
- # Do a metadata operation on the remaining link (mv is heavy handed, but
- # others like touch may be satisfied from caps without poking MDS)
- self.mount_a.run_shell(["mv", "dir_2/file_b", "dir_2/file_c"])
-
- # Stray reintegration should happen as a result of the eval_remote call
- # on responding to a client request.
- self.wait_until_equal(
- lambda: self.get_mdc_stat("num_strays"),
- expect_val=0,
- timeout=60
- )
-
- # See the reintegration counter increment
- curr_reintegrated = self.get_mdc_stat("strays_reintegrated")
- self.assertGreater(curr_reintegrated, last_reintegrated)
- last_reintegrated = curr_reintegrated
-
- # Flush the journal
- self.fs.mds_asok(['flush', 'journal'])
-
- # See that the backtrace for the file points to the remaining link's path
- post_reint_bt = self.fs.read_backtrace(ino)
- self.assertEqual(post_reint_bt['ancestors'][0]['dname'], "file_c")
-
- # mds should reintegrates stray when unlink finishes
- self.mount_a.run_shell(["ln", "dir_2/file_c", "dir_2/file_d"])
- self.mount_a.run_shell(["rm", "-f", "dir_2/file_c"])
-
- # Stray reintegration should happen as a result of the notify_stray call
- # on completion of unlink
- self.wait_until_equal(
- lambda: self.get_mdc_stat("num_strays"),
- expect_val=0,
- timeout=60
- )
-
- # See the reintegration counter increment
- curr_reintegrated = self.get_mdc_stat("strays_reintegrated")
- self.assertGreater(curr_reintegrated, last_reintegrated)
- last_reintegrated = curr_reintegrated
-
- # Flush the journal
- self.fs.mds_asok(['flush', 'journal'])
-
- # See that the backtrace for the file points to the newest link's path
- post_reint_bt = self.fs.read_backtrace(ino)
- self.assertEqual(post_reint_bt['ancestors'][0]['dname'], "file_d")
-
- # Now really delete it
- self.mount_a.run_shell(["rm", "-f", "dir_2/file_d"])
- self._wait_for_counter("mds_cache", "strays_enqueued", 1)
- self._wait_for_counter("purge_queue", "pq_executed", 1)
-
- self.assert_purge_idle()
- self.assertTrue(self.fs.data_objects_absent(ino, size_mb * 1024 * 1024))
-
- # We caused the inode to go stray 3 times
- self.assertEqual(self.get_mdc_stat("strays_created"), 3)
- # We purged it at the last
- self.assertEqual(self.get_mdc_stat("strays_enqueued"), 1)
-
- def test_mv_hardlink_cleanup(self):
- """
- That when doing a rename from A to B, and B has hardlinks,
- then we make a stray for B which is then reintegrated
- into one of his hardlinks.
- """
- # Create file_a, file_b, and a hardlink to file_b
- size_mb = 8
- self.mount_a.write_n_mb("file_a", size_mb)
- file_a_ino = self.mount_a.path_to_ino("file_a")
-
- self.mount_a.write_n_mb("file_b", size_mb)
- file_b_ino = self.mount_a.path_to_ino("file_b")
-
- self.mount_a.run_shell(["ln", "file_b", "linkto_b"])
- self.assertEqual(self.mount_a.path_to_ino("linkto_b"), file_b_ino)
-
- # mv file_a file_b
- self.mount_a.run_shell(["mv", "file_a", "file_b"])
-
- # Stray reintegration should happen as a result of the notify_stray call on
- # completion of rename
- self.wait_until_equal(
- lambda: self.get_mdc_stat("num_strays"),
- expect_val=0,
- timeout=60
- )
-
- self.assertEqual(self.get_mdc_stat("strays_created"), 1)
- self.assertGreaterEqual(self.get_mdc_stat("strays_reintegrated"), 1)
-
- # No data objects should have been deleted, as both files still have linkage.
- self.assertTrue(self.fs.data_objects_present(file_a_ino, size_mb * 1024 * 1024))
- self.assertTrue(self.fs.data_objects_present(file_b_ino, size_mb * 1024 * 1024))
-
- self.fs.mds_asok(['flush', 'journal'])
-
- post_reint_bt = self.fs.read_backtrace(file_b_ino)
- self.assertEqual(post_reint_bt['ancestors'][0]['dname'], "linkto_b")
-
- def _setup_two_ranks(self):
- # Set up two MDSs
- self.fs.set_max_mds(2)
-
- # See that we have two active MDSs
- self.wait_until_equal(lambda: len(self.fs.get_active_names()), 2, 30,
- reject_fn=lambda v: v > 2 or v < 1)
-
- active_mds_names = self.fs.get_active_names()
- rank_0_id = active_mds_names[0]
- rank_1_id = active_mds_names[1]
- log.info("Ranks 0 and 1 are {0} and {1}".format(
- rank_0_id, rank_1_id))
-
- # Get rid of other MDS daemons so that it's easier to know which
- # daemons to expect in which ranks after restarts
- for unneeded_mds in set(self.mds_cluster.mds_ids) - {rank_0_id, rank_1_id}:
- self.mds_cluster.mds_stop(unneeded_mds)
- self.mds_cluster.mds_fail(unneeded_mds)
-
- return rank_0_id, rank_1_id
-
- def _force_migrate(self, to_id, path, watch_ino):
- """
- :param to_id: MDS id to move it to
- :param path: Filesystem path (string) to move
- :param watch_ino: Inode number to look for at destination to confirm move
- :return: None
- """
- self.mount_a.run_shell(["setfattr", "-n", "ceph.dir.pin", "-v", "1", path])
-
- # Poll the MDS cache dump to watch for the export completing
- migrated = False
- migrate_timeout = 60
- migrate_elapsed = 0
- while not migrated:
- data = self.fs.mds_asok(["dump", "cache"], to_id)
- for inode_data in data:
- if inode_data['ino'] == watch_ino:
- log.debug("Found ino in cache: {0}".format(json.dumps(inode_data, indent=2)))
- if inode_data['is_auth'] is True:
- migrated = True
- break
-
- if not migrated:
- if migrate_elapsed > migrate_timeout:
- raise RuntimeError("Migration hasn't happened after {0}s!".format(migrate_elapsed))
- else:
- migrate_elapsed += 1
- time.sleep(1)
-
- def _is_stopped(self, rank):
- mds_map = self.fs.get_mds_map()
- return rank not in [i['rank'] for i in mds_map['info'].values()]
-
- def test_purge_on_shutdown(self):
- """
- That when an MDS rank is shut down, its purge queue is
- drained in the process.
- """
- rank_0_id, rank_1_id = self._setup_two_ranks()
-
- self.set_conf("mds.{0}".format(rank_1_id), 'mds_max_purge_files', "0")
- self.mds_cluster.mds_fail_restart(rank_1_id)
- self.fs.wait_for_daemons()
-
- file_count = 5
-
- self.mount_a.create_n_files("delete_me/file", file_count)
-
- self._force_migrate(rank_1_id, "delete_me",
- self.mount_a.path_to_ino("delete_me/file_0"))
-
- self.mount_a.run_shell(["rm", "-rf", Raw("delete_me/*")])
- self.mount_a.umount_wait()
-
- # See all the strays go into purge queue
- self._wait_for_counter("mds_cache", "strays_created", file_count, mds_id=rank_1_id)
- self._wait_for_counter("mds_cache", "strays_enqueued", file_count, mds_id=rank_1_id)
- self.assertEqual(self.get_stat("mds_cache", "num_strays", mds_id=rank_1_id), 0)
-
- # See nothing get purged from the purge queue (yet)
- time.sleep(10)
- self.assertEqual(self.get_stat("purge_queue", "pq_executed", mds_id=rank_1_id), 0)
-
- # Shut down rank 1
- self.fs.set_max_mds(1)
- self.fs.deactivate(1)
-
- # It shouldn't proceed past stopping because its still not allowed
- # to purge
- time.sleep(10)
- self.assertEqual(self.get_stat("purge_queue", "pq_executed", mds_id=rank_1_id), 0)
- self.assertFalse(self._is_stopped(1))
-
- # Permit the daemon to start purging again
- self.fs.mon_manager.raw_cluster_cmd('tell', 'mds.{0}'.format(rank_1_id),
- 'injectargs',
- "--mds_max_purge_files 100")
-
- # It should now proceed through shutdown
- self.wait_until_true(
- lambda: self._is_stopped(1),
- timeout=60
- )
-
- # ...and in the process purge all that data
- self.await_data_pool_empty()
-
- def test_migration_on_shutdown(self):
- """
- That when an MDS rank is shut down, any non-purgeable strays
- get migrated to another rank.
- """
-
- rank_0_id, rank_1_id = self._setup_two_ranks()
-
- # Create a non-purgeable stray in a ~mds1 stray directory
- # by doing a hard link and deleting the original file
- self.mount_a.run_shell(["mkdir", "dir_1", "dir_2"])
- self.mount_a.run_shell(["touch", "dir_1/original"])
- self.mount_a.run_shell(["ln", "dir_1/original", "dir_2/linkto"])
-
- self._force_migrate(rank_1_id, "dir_1",
- self.mount_a.path_to_ino("dir_1/original"))
-
- # empty mds cache. otherwise mds reintegrates stray when unlink finishes
- self.mount_a.umount_wait()
- self.fs.mds_asok(['flush', 'journal'], rank_0_id)
- self.fs.mds_asok(['flush', 'journal'], rank_1_id)
- self.fs.mds_fail_restart()
- self.fs.wait_for_daemons()
-
- active_mds_names = self.fs.get_active_names()
- rank_0_id = active_mds_names[0]
- rank_1_id = active_mds_names[1]
-
- self.mount_a.mount()
-
- self.mount_a.run_shell(["rm", "-f", "dir_1/original"])
- self.mount_a.umount_wait()
-
- self._wait_for_counter("mds_cache", "strays_created", 1,
- mds_id=rank_1_id)
-
- # Shut down rank 1
- self.fs.mon_manager.raw_cluster_cmd_result('mds', 'set', "max_mds", "1")
- self.fs.mon_manager.raw_cluster_cmd_result('mds', 'deactivate', "1")
-
- # Wait til we get to a single active MDS mdsmap state
- self.wait_until_true(lambda: self._is_stopped(1), timeout=120)
-
- # See that the stray counter on rank 0 has incremented
- self.assertEqual(self.get_mdc_stat("strays_created", rank_0_id), 1)
-
- def assert_backtrace(self, ino, expected_path):
- """
- Assert that the backtrace in the data pool for an inode matches
- an expected /foo/bar path.
- """
- expected_elements = expected_path.strip("/").split("/")
- bt = self.fs.read_backtrace(ino)
- actual_elements = list(reversed([dn['dname'] for dn in bt['ancestors']]))
- self.assertListEqual(expected_elements, actual_elements)
-
- def get_backtrace_path(self, ino):
- bt = self.fs.read_backtrace(ino)
- elements = reversed([dn['dname'] for dn in bt['ancestors']])
- return "/".join(elements)
-
- def assert_purge_idle(self):
- """
- Assert that the MDS perf counters indicate no strays exist and
- no ongoing purge activity. Sanity check for when PurgeQueue should
- be idle.
- """
- mdc_stats = self.fs.mds_asok(['perf', 'dump', "mds_cache"])['mds_cache']
- pq_stats = self.fs.mds_asok(['perf', 'dump', "purge_queue"])['purge_queue']
- self.assertEqual(mdc_stats["num_strays"], 0)
- self.assertEqual(mdc_stats["num_strays_delayed"], 0)
- self.assertEqual(pq_stats["pq_executing"], 0)
- self.assertEqual(pq_stats["pq_executing_ops"], 0)
-
- def test_mv_cleanup(self):
- """
- That when doing a rename from A to B, and B has no hardlinks,
- then we make a stray for B and purge him.
- """
- # Create file_a and file_b, write some to both
- size_mb = 8
- self.mount_a.write_n_mb("file_a", size_mb)
- file_a_ino = self.mount_a.path_to_ino("file_a")
- self.mount_a.write_n_mb("file_b", size_mb)
- file_b_ino = self.mount_a.path_to_ino("file_b")
-
- self.fs.mds_asok(['flush', 'journal'])
- self.assert_backtrace(file_a_ino, "file_a")
- self.assert_backtrace(file_b_ino, "file_b")
-
- # mv file_a file_b
- self.mount_a.run_shell(['mv', 'file_a', 'file_b'])
-
- # See that stray counter increments
- self.assertEqual(self.get_mdc_stat("strays_created"), 1)
- # Wait for purge counter to increment
- self._wait_for_counter("mds_cache", "strays_enqueued", 1)
- self._wait_for_counter("purge_queue", "pq_executed", 1)
-
- self.assert_purge_idle()
-
- # file_b should have been purged
- self.assertTrue(self.fs.data_objects_absent(file_b_ino, size_mb * 1024 * 1024))
-
- # Backtrace should have updated from file_a to file_b
- self.fs.mds_asok(['flush', 'journal'])
- self.assert_backtrace(file_a_ino, "file_b")
-
- # file_a's data should still exist
- self.assertTrue(self.fs.data_objects_present(file_a_ino, size_mb * 1024 * 1024))
-
- def _pool_df(self, pool_name):
- """
- Return a dict like
- {
- "kb_used": 0,
- "bytes_used": 0,
- "max_avail": 19630292406,
- "objects": 0
- }
-
- :param pool_name: Which pool (must exist)
- """
- out = self.fs.mon_manager.raw_cluster_cmd("df", "--format=json-pretty")
- for p in json.loads(out)['pools']:
- if p['name'] == pool_name:
- return p['stats']
-
- raise RuntimeError("Pool '{0}' not found".format(pool_name))
-
- def await_data_pool_empty(self):
- self.wait_until_true(
- lambda: self._pool_df(
- self.fs.get_data_pool_name()
- )['objects'] == 0,
- timeout=60)
-
- def test_snapshot_remove(self):
- """
- That removal of a snapshot that references a now-unlinked file results
- in purging on the stray for the file.
- """
- # Enable snapshots
- self.fs.mon_manager.raw_cluster_cmd("mds", "set", "allow_new_snaps", "true",
- "--yes-i-really-mean-it")
-
- # Create a dir with a file in it
- size_mb = 8
- self.mount_a.run_shell(["mkdir", "snapdir"])
- self.mount_a.run_shell(["mkdir", "snapdir/subdir"])
- self.mount_a.write_test_pattern("snapdir/subdir/file_a", size_mb * 1024 * 1024)
- file_a_ino = self.mount_a.path_to_ino("snapdir/subdir/file_a")
-
- # Snapshot the dir
- self.mount_a.run_shell(["mkdir", "snapdir/.snap/snap1"])
-
- # Cause the head revision to deviate from the snapshot
- self.mount_a.write_n_mb("snapdir/subdir/file_a", size_mb)
-
- # Flush the journal so that backtraces, dirfrag objects will actually be written
- self.fs.mds_asok(["flush", "journal"])
-
- # Unlink the file
- self.mount_a.run_shell(["rm", "-f", "snapdir/subdir/file_a"])
- self.mount_a.run_shell(["rmdir", "snapdir/subdir"])
-
- # Unmount the client because when I come back to check the data is still
- # in the file I don't want to just see what's in the page cache.
- self.mount_a.umount_wait()
-
- self.assertEqual(self.get_mdc_stat("strays_created"), 2)
-
- # FIXME: at this stage we see a purge and the stray count drops to
- # zero, but there's actually still a stray, so at the very
- # least the StrayManager stats code is slightly off
-
- self.mount_a.mount()
-
- # See that the data from the snapshotted revision of the file is still present
- # and correct
- self.mount_a.validate_test_pattern("snapdir/.snap/snap1/subdir/file_a", size_mb * 1024 * 1024)
-
- # Remove the snapshot
- self.mount_a.run_shell(["rmdir", "snapdir/.snap/snap1"])
-
- # Purging file_a doesn't happen until after we've flushed the journal, because
- # it is referenced by the snapshotted subdir, and the snapshot isn't really
- # gone until the journal references to it are gone
- self.fs.mds_asok(["flush", "journal"])
-
- # Wait for purging to complete, which requires the OSDMap to propagate to the OSDs.
- # See also: http://tracker.ceph.com/issues/20072
- self.wait_until_true(
- lambda: self.fs.data_objects_absent(file_a_ino, size_mb * 1024 * 1024),
- timeout=60
- )
-
- # See that a purge happens now
- self._wait_for_counter("mds_cache", "strays_enqueued", 2)
- self._wait_for_counter("purge_queue", "pq_executed", 2)
-
- self.await_data_pool_empty()
-
- def test_fancy_layout(self):
- """
- purge stray file with fancy layout
- """
-
- file_name = "fancy_layout_file"
- self.mount_a.run_shell(["touch", file_name])
-
- file_layout = "stripe_unit=1048576 stripe_count=4 object_size=8388608"
- self.mount_a.setfattr(file_name, "ceph.file.layout", file_layout)
-
- # 35MB requires 7 objects
- size_mb = 35
- self.mount_a.write_n_mb(file_name, size_mb)
-
- self.mount_a.run_shell(["rm", "-f", file_name])
- self.fs.mds_asok(["flush", "journal"])
-
- # can't use self.fs.data_objects_absent here, it does not support fancy layout
- self.await_data_pool_empty()
-
- def test_dirfrag_limit(self):
- """
- That the directory fragment size cannot exceed mds_bal_fragment_size_max (using a limit of 50 in all configurations).
-
- That fragmentation (forced) will allow more entries to be created.
-
- That unlinking fails when the stray directory fragment becomes too large and that unlinking may continue once those strays are purged.
- """
-
- self.fs.set_allow_dirfrags(True)
-
- LOW_LIMIT = 50
- for mds in self.fs.get_daemon_names():
- self.fs.mds_asok(["config", "set", "mds_bal_fragment_size_max", str(LOW_LIMIT)], mds)
-
- try:
- self.mount_a.run_python(dedent("""
- import os
- path = os.path.join("{path}", "subdir")
- os.mkdir(path)
- for n in range(0, {file_count}):
- open(os.path.join(path, "%s" % n), 'w').write("%s" % n)
- """.format(
- path=self.mount_a.mountpoint,
- file_count=LOW_LIMIT+1
- )))
- except CommandFailedError:
- pass # ENOSPAC
- else:
- raise RuntimeError("fragment size exceeded")
-
- # Now test that we can go beyond the limit if we fragment the directory
-
- self.mount_a.run_python(dedent("""
- import os
- path = os.path.join("{path}", "subdir2")
- os.mkdir(path)
- for n in range(0, {file_count}):
- open(os.path.join(path, "%s" % n), 'w').write("%s" % n)
- dfd = os.open(path, os.O_DIRECTORY)
- os.fsync(dfd)
- """.format(
- path=self.mount_a.mountpoint,
- file_count=LOW_LIMIT
- )))
-
- # Ensure that subdir2 is fragmented
- mds_id = self.fs.get_active_names()[0]
- self.fs.mds_asok(["dirfrag", "split", "/subdir2", "0/0", "1"], mds_id)
-
- # remount+flush (release client caps)
- self.mount_a.umount_wait()
- self.fs.mds_asok(["flush", "journal"], mds_id)
- self.mount_a.mount()
- self.mount_a.wait_until_mounted()
-
- # Create 50% more files than the current fragment limit
- self.mount_a.run_python(dedent("""
- import os
- path = os.path.join("{path}", "subdir2")
- for n in range({file_count}, ({file_count}*3)//2):
- open(os.path.join(path, "%s" % n), 'w').write("%s" % n)
- """.format(
- path=self.mount_a.mountpoint,
- file_count=LOW_LIMIT
- )))
-
- # Now test the stray directory size is limited and recovers
- strays_before = self.get_mdc_stat("strays_created")
- try:
- self.mount_a.run_python(dedent("""
- import os
- path = os.path.join("{path}", "subdir3")
- os.mkdir(path)
- for n in range({file_count}):
- fpath = os.path.join(path, "%s" % n)
- f = open(fpath, 'w')
- f.write("%s" % n)
- f.close()
- os.unlink(fpath)
- """.format(
- path=self.mount_a.mountpoint,
- file_count=LOW_LIMIT*10 # 10 stray directories, should collide before this count
- )))
- except CommandFailedError:
- pass # ENOSPAC
- else:
- raise RuntimeError("fragment size exceeded")
-
- strays_after = self.get_mdc_stat("strays_created")
- self.assertGreaterEqual(strays_after-strays_before, LOW_LIMIT)
-
- self._wait_for_counter("mds_cache", "strays_enqueued", strays_after)
- self._wait_for_counter("purge_queue", "pq_executed", strays_after)
-
- self.mount_a.run_python(dedent("""
- import os
- path = os.path.join("{path}", "subdir4")
- os.mkdir(path)
- for n in range({file_count}):
- fpath = os.path.join(path, "%s" % n)
- f = open(fpath, 'w')
- f.write("%s" % n)
- f.close()
- os.unlink(fpath)
- """.format(
- path=self.mount_a.mountpoint,
- file_count=LOW_LIMIT
- )))
-
- def test_purge_queue_upgrade(self):
- """
- That when starting on a system with no purge queue in the metadata
- pool, we silently create one.
- :return:
- """
-
- self.mds_cluster.mds_stop()
- self.mds_cluster.mds_fail()
- self.fs.rados(["rm", "500.00000000"])
- self.mds_cluster.mds_restart()
- self.fs.wait_for_daemons()
-
- def test_purge_queue_op_rate(self):
- """
- A busy purge queue is meant to aggregate operations sufficiently
- that our RADOS ops to the metadata pool are not O(files). Check
- that that is so.
- :return:
- """
-
- # For low rates of deletion, the rate of metadata ops actually
- # will be o(files), so to see the desired behaviour we have to give
- # the system a significant quantity, i.e. an order of magnitude
- # more than the number of files it will purge at one time.
-
- max_purge_files = 2
-
- self.set_conf('mds', 'mds_bal_frag', 'false')
- self.set_conf('mds', 'mds_max_purge_files', "%d" % max_purge_files)
- self.fs.mds_fail_restart()
- self.fs.wait_for_daemons()
-
- phase_1_files = 256
- phase_2_files = 512
-
- self.mount_a.run_shell(["mkdir", "phase1"])
- self.mount_a.create_n_files("phase1/file", phase_1_files)
-
- self.mount_a.run_shell(["mkdir", "phase2"])
- self.mount_a.create_n_files("phase2/file", phase_2_files)
-
- def unlink_and_count_ops(path, expected_deletions):
- initial_ops = self.get_stat("objecter", "op")
- initial_pq_executed = self.get_stat("purge_queue", "pq_executed")
-
- self.mount_a.run_shell(["rm", "-rf", path])
-
- self._wait_for_counter(
- "purge_queue", "pq_executed", initial_pq_executed + expected_deletions
- )
-
- final_ops = self.get_stat("objecter", "op")
-
- # Calculation of the *overhead* operations, i.e. do not include
- # the operations where we actually delete files.
- return final_ops - initial_ops - expected_deletions
-
- self.fs.mds_asok(['flush', 'journal'])
- phase1_ops = unlink_and_count_ops("phase1/", phase_1_files + 1)
-
- self.fs.mds_asok(['flush', 'journal'])
- phase2_ops = unlink_and_count_ops("phase2/", phase_2_files + 1)
-
- log.info("Phase 1: {0}".format(phase1_ops))
- log.info("Phase 2: {0}".format(phase2_ops))
-
- # The success criterion is that deleting double the number
- # of files doesn't generate double the number of overhead ops
- # -- this comparison is a rough approximation of that rule.
- self.assertTrue(phase2_ops < phase1_ops * 1.25)
-
- # Finally, check that our activity did include properly quiescing
- # the queue (i.e. call to Journaler::write_head in the right place),
- # by restarting the MDS and checking that it doesn't try re-executing
- # any of the work we did.
- self.fs.mds_asok(['flush', 'journal']) # flush to ensure no strays
- # hanging around
- self.fs.mds_fail_restart()
- self.fs.wait_for_daemons()
- time.sleep(10)
- self.assertEqual(self.get_stat("purge_queue", "pq_executed"), 0)
-
- def test_replicated_delete_speed(self):
- """
- That deletions of replicated metadata are not pathologically slow
- """
- rank_0_id, rank_1_id = self._setup_two_ranks()
-
- self.set_conf("mds.{0}".format(rank_1_id), 'mds_max_purge_files', "0")
- self.mds_cluster.mds_fail_restart(rank_1_id)
- self.fs.wait_for_daemons()
-
- file_count = 10
-
- self.mount_a.create_n_files("delete_me/file", file_count)
-
- self._force_migrate(rank_1_id, "delete_me",
- self.mount_a.path_to_ino("delete_me/file_0"))
-
- begin = datetime.datetime.now()
- self.mount_a.run_shell(["rm", "-rf", Raw("delete_me/*")])
- end = datetime.datetime.now()
-
- # What we're really checking here is that we are completing client
- # operations immediately rather than delaying until the next tick.
- tick_period = float(self.fs.get_config("mds_tick_interval",
- service_type="mds"))
-
- duration = (end - begin).total_seconds()
- self.assertLess(duration, (file_count * tick_period) * 0.25)
-
diff --git a/src/ceph/qa/tasks/cephfs/test_volume_client.py b/src/ceph/qa/tasks/cephfs/test_volume_client.py
deleted file mode 100644
index 0876af9..0000000
--- a/src/ceph/qa/tasks/cephfs/test_volume_client.py
+++ /dev/null
@@ -1,1016 +0,0 @@
-import json
-import logging
-import time
-import os
-from textwrap import dedent
-from tasks.cephfs.cephfs_test_case import CephFSTestCase
-from tasks.cephfs.fuse_mount import FuseMount
-from teuthology.exceptions import CommandFailedError
-
-log = logging.getLogger(__name__)
-
-
-class TestVolumeClient(CephFSTestCase):
- # One for looking at the global filesystem, one for being
- # the VolumeClient, two for mounting the created shares
- CLIENTS_REQUIRED = 4
-
- def _volume_client_python(self, client, script, vol_prefix=None, ns_prefix=None):
- # Can't dedent this *and* the script we pass in, because they might have different
- # levels of indentation to begin with, so leave this string zero-indented
- if vol_prefix:
- vol_prefix = "\"" + vol_prefix + "\""
- if ns_prefix:
- ns_prefix = "\"" + ns_prefix + "\""
- return client.run_python("""
-from ceph_volume_client import CephFSVolumeClient, VolumePath
-import logging
-log = logging.getLogger("ceph_volume_client")
-log.addHandler(logging.StreamHandler())
-log.setLevel(logging.DEBUG)
-vc = CephFSVolumeClient("manila", "{conf_path}", "ceph", {vol_prefix}, {ns_prefix})
-vc.connect()
-{payload}
-vc.disconnect()
- """.format(payload=script, conf_path=client.config_path, vol_prefix=vol_prefix, ns_prefix=ns_prefix))
-
- def _sudo_write_file(self, remote, path, data):
- """
- Write data to a remote file as super user
-
- :param remote: Remote site.
- :param path: Path on the remote being written to.
- :param data: Data to be written.
-
- Both perms and owner are passed directly to chmod.
- """
- remote.run(
- args=[
- 'sudo',
- 'python',
- '-c',
- 'import shutil, sys; shutil.copyfileobj(sys.stdin, file(sys.argv[1], "wb"))',
- path,
- ],
- stdin=data,
- )
-
- def _configure_vc_auth(self, mount, id_name):
- """
- Set up auth credentials for the VolumeClient user
- """
- out = self.fs.mon_manager.raw_cluster_cmd(
- "auth", "get-or-create", "client.{name}".format(name=id_name),
- "mds", "allow *",
- "osd", "allow rw",
- "mon", "allow *"
- )
- mount.client_id = id_name
- self._sudo_write_file(mount.client_remote, mount.get_keyring_path(), out)
- self.set_conf("client.{name}".format(name=id_name), "keyring", mount.get_keyring_path())
-
- def _configure_guest_auth(self, volumeclient_mount, guest_mount,
- guest_entity, mount_path,
- namespace_prefix=None, readonly=False,
- tenant_id=None):
- """
- Set up auth credentials for the guest client to mount a volume.
-
- :param volumeclient_mount: mount used as the handle for driving
- volumeclient.
- :param guest_mount: mount used by the guest client.
- :param guest_entity: auth ID used by the guest client.
- :param mount_path: path of the volume.
- :param namespace_prefix: name prefix of the RADOS namespace, which
- is used for the volume's layout.
- :param readonly: defaults to False. If set to 'True' only read-only
- mount access is granted to the guest.
- :param tenant_id: (OpenStack) tenant ID of the guest client.
- """
-
- head, volume_id = os.path.split(mount_path)
- head, group_id = os.path.split(head)
- head, volume_prefix = os.path.split(head)
- volume_prefix = "/" + volume_prefix
-
- # Authorize the guest client's auth ID to mount the volume.
- key = self._volume_client_python(volumeclient_mount, dedent("""
- vp = VolumePath("{group_id}", "{volume_id}")
- auth_result = vc.authorize(vp, "{guest_entity}", readonly={readonly},
- tenant_id="{tenant_id}")
- print auth_result['auth_key']
- """.format(
- group_id=group_id,
- volume_id=volume_id,
- guest_entity=guest_entity,
- readonly=readonly,
- tenant_id=tenant_id)), volume_prefix, namespace_prefix
- )
-
- # CephFSVolumeClient's authorize() does not return the secret
- # key to a caller who isn't multi-tenant aware. Explicitly
- # query the key for such a client.
- if not tenant_id:
- key = self.fs.mon_manager.raw_cluster_cmd(
- "auth", "get-key", "client.{name}".format(name=guest_entity),
- )
-
- # The guest auth ID should exist.
- existing_ids = [a['entity'] for a in self.auth_list()]
- self.assertIn("client.{0}".format(guest_entity), existing_ids)
-
- # Create keyring file for the guest client.
- keyring_txt = dedent("""
- [client.{guest_entity}]
- key = {key}
-
- """.format(
- guest_entity=guest_entity,
- key=key
- ))
- guest_mount.client_id = guest_entity
- self._sudo_write_file(guest_mount.client_remote,
- guest_mount.get_keyring_path(),
- keyring_txt)
-
- # Add a guest client section to the ceph config file.
- self.set_conf("client.{0}".format(guest_entity), "client quota", "True")
- self.set_conf("client.{0}".format(guest_entity), "debug client", "20")
- self.set_conf("client.{0}".format(guest_entity), "debug objecter", "20")
- self.set_conf("client.{0}".format(guest_entity),
- "keyring", guest_mount.get_keyring_path())
-
- def test_default_prefix(self):
- group_id = "grpid"
- volume_id = "volid"
- DEFAULT_VOL_PREFIX = "volumes"
- DEFAULT_NS_PREFIX = "fsvolumens_"
-
- self.mount_b.umount_wait()
- self._configure_vc_auth(self.mount_b, "manila")
-
- #create a volume with default prefix
- self._volume_client_python(self.mount_b, dedent("""
- vp = VolumePath("{group_id}", "{volume_id}")
- vc.create_volume(vp, 10, data_isolated=True)
- """.format(
- group_id=group_id,
- volume_id=volume_id,
- )))
-
- # The dir should be created
- self.mount_a.stat(os.path.join(DEFAULT_VOL_PREFIX, group_id, volume_id))
-
- #namespace should be set
- ns_in_attr = self.mount_a.getfattr(os.path.join(DEFAULT_VOL_PREFIX, group_id, volume_id), "ceph.dir.layout.pool_namespace")
- namespace = "{0}{1}".format(DEFAULT_NS_PREFIX, volume_id)
- self.assertEqual(namespace, ns_in_attr)
-
-
- def test_lifecycle(self):
- """
- General smoke test for create, extend, destroy
- """
-
- # I'm going to use mount_c later as a guest for mounting the created
- # shares
- self.mounts[2].umount_wait()
-
- # I'm going to leave mount_b unmounted and just use it as a handle for
- # driving volumeclient. It's a little hacky but we don't have a more
- # general concept for librados/libcephfs clients as opposed to full
- # blown mounting clients.
- self.mount_b.umount_wait()
- self._configure_vc_auth(self.mount_b, "manila")
-
- guest_entity = "guest"
- group_id = "grpid"
- volume_id = "volid"
-
- volume_prefix = "/myprefix"
- namespace_prefix = "mynsprefix_"
-
- # Create a 100MB volume
- volume_size = 100
- mount_path = self._volume_client_python(self.mount_b, dedent("""
- vp = VolumePath("{group_id}", "{volume_id}")
- create_result = vc.create_volume(vp, 1024*1024*{volume_size})
- print create_result['mount_path']
- """.format(
- group_id=group_id,
- volume_id=volume_id,
- volume_size=volume_size
- )), volume_prefix, namespace_prefix)
-
- # The dir should be created
- self.mount_a.stat(os.path.join("myprefix", group_id, volume_id))
-
- # Authorize and configure credentials for the guest to mount the
- # the volume.
- self._configure_guest_auth(self.mount_b, self.mounts[2], guest_entity,
- mount_path, namespace_prefix)
- self.mounts[2].mount(mount_path=mount_path)
-
- # The kernel client doesn't have the quota-based df behaviour,
- # or quotas at all, so only exercise the client behaviour when
- # running fuse.
- if isinstance(self.mounts[2], FuseMount):
- # df should see volume size, same as the quota set on volume's dir
- self.assertEqual(self.mounts[2].df()['total'],
- volume_size * 1024 * 1024)
- self.assertEqual(
- self.mount_a.getfattr(
- os.path.join(volume_prefix.strip("/"), group_id, volume_id),
- "ceph.quota.max_bytes"),
- "%s" % (volume_size * 1024 * 1024))
-
- # df granularity is 4MB block so have to write at least that much
- data_bin_mb = 4
- self.mounts[2].write_n_mb("data.bin", data_bin_mb)
-
- # Write something outside volume to check this space usage is
- # not reported in the volume's DF.
- other_bin_mb = 8
- self.mount_a.write_n_mb("other.bin", other_bin_mb)
-
- # global: df should see all the writes (data + other). This is a >
- # rather than a == because the global spaced used includes all pools
- def check_df():
- used = self.mount_a.df()['used']
- return used >= (other_bin_mb * 1024 * 1024)
-
- self.wait_until_true(check_df, timeout=30)
-
- # Hack: do a metadata IO to kick rstats
- self.mounts[2].run_shell(["touch", "foo"])
-
- # volume: df should see the data_bin_mb consumed from quota, same
- # as the rbytes for the volume's dir
- self.wait_until_equal(
- lambda: self.mounts[2].df()['used'],
- data_bin_mb * 1024 * 1024, timeout=60)
- self.wait_until_equal(
- lambda: self.mount_a.getfattr(
- os.path.join(volume_prefix.strip("/"), group_id, volume_id),
- "ceph.dir.rbytes"),
- "%s" % (data_bin_mb * 1024 * 1024), timeout=60)
-
- # sync so that file data are persist to rados
- self.mounts[2].run_shell(["sync"])
-
- # Our data should stay in particular rados namespace
- pool_name = self.mount_a.getfattr(os.path.join("myprefix", group_id, volume_id), "ceph.dir.layout.pool")
- namespace = "{0}{1}".format(namespace_prefix, volume_id)
- ns_in_attr = self.mount_a.getfattr(os.path.join("myprefix", group_id, volume_id), "ceph.dir.layout.pool_namespace")
- self.assertEqual(namespace, ns_in_attr)
-
- objects_in_ns = set(self.fs.rados(["ls"], pool=pool_name, namespace=namespace).split("\n"))
- self.assertNotEqual(objects_in_ns, set())
-
- # De-authorize the guest
- self._volume_client_python(self.mount_b, dedent("""
- vp = VolumePath("{group_id}", "{volume_id}")
- vc.deauthorize(vp, "{guest_entity}")
- vc.evict("{guest_entity}")
- """.format(
- group_id=group_id,
- volume_id=volume_id,
- guest_entity=guest_entity
- )), volume_prefix, namespace_prefix)
-
- # Once deauthorized, the client should be unable to do any more metadata ops
- # The way that the client currently behaves here is to block (it acts like
- # it has lost network, because there is nothing to tell it that is messages
- # are being dropped because it's identity is gone)
- background = self.mounts[2].write_n_mb("rogue.bin", 1, wait=False)
- time.sleep(10) # Approximate check for 'stuck' as 'still running after 10s'
- self.assertFalse(background.finished)
-
- # After deauthorisation, the client ID should be gone (this was the only
- # volume it was authorised for)
- self.assertNotIn("client.{0}".format(guest_entity), [e['entity'] for e in self.auth_list()])
-
- # Clean up the dead mount (ceph-fuse's behaviour here is a bit undefined)
- self.mounts[2].kill()
- self.mounts[2].kill_cleanup()
- try:
- background.wait()
- except CommandFailedError:
- # We killed the mount out from under you
- pass
-
- self._volume_client_python(self.mount_b, dedent("""
- vp = VolumePath("{group_id}", "{volume_id}")
- vc.delete_volume(vp)
- vc.purge_volume(vp)
- """.format(
- group_id=group_id,
- volume_id=volume_id,
- )), volume_prefix, namespace_prefix)
-
- def test_idempotency(self):
- """
- That the volumeclient interface works when calling everything twice
- """
- self.mount_b.umount_wait()
- self._configure_vc_auth(self.mount_b, "manila")
-
- guest_entity = "guest"
- group_id = "grpid"
- volume_id = "volid"
- self._volume_client_python(self.mount_b, dedent("""
- vp = VolumePath("{group_id}", "{volume_id}")
- vc.create_volume(vp, 10)
- vc.create_volume(vp, 10)
- vc.authorize(vp, "{guest_entity}")
- vc.authorize(vp, "{guest_entity}")
- vc.deauthorize(vp, "{guest_entity}")
- vc.deauthorize(vp, "{guest_entity}")
- vc.delete_volume(vp)
- vc.delete_volume(vp)
- vc.purge_volume(vp)
- vc.purge_volume(vp)
-
- vc.create_volume(vp, 10, data_isolated=True)
- vc.create_volume(vp, 10, data_isolated=True)
- vc.authorize(vp, "{guest_entity}")
- vc.authorize(vp, "{guest_entity}")
- vc.deauthorize(vp, "{guest_entity}")
- vc.deauthorize(vp, "{guest_entity}")
- vc.evict("{guest_entity}")
- vc.evict("{guest_entity}")
- vc.delete_volume(vp, data_isolated=True)
- vc.delete_volume(vp, data_isolated=True)
- vc.purge_volume(vp, data_isolated=True)
- vc.purge_volume(vp, data_isolated=True)
- """.format(
- group_id=group_id,
- volume_id=volume_id,
- guest_entity=guest_entity
- )))
-
- def test_data_isolated(self):
- """
- That data isolated shares get their own pool
- :return:
- """
-
- # Because the teuthology config template sets mon_max_pg_per_osd to
- # 10000 (i.e. it just tries to ignore health warnings), reset it to something
- # sane before using volume_client, to avoid creating pools with absurdly large
- # numbers of PGs.
- self.set_conf("global", "mon max pg per osd", "300")
- for mon_daemon_state in self.ctx.daemons.iter_daemons_of_role('mon'):
- mon_daemon_state.restart()
-
- self.mount_b.umount_wait()
- self._configure_vc_auth(self.mount_b, "manila")
-
- # Calculate how many PGs we'll expect the new volume pool to have
- osd_map = json.loads(self.fs.mon_manager.raw_cluster_cmd('osd', 'dump', '--format=json-pretty'))
- max_per_osd = int(self.fs.get_config('mon_max_pg_per_osd'))
- osd_count = len(osd_map['osds'])
- max_overall = osd_count * max_per_osd
-
- existing_pg_count = 0
- for p in osd_map['pools']:
- existing_pg_count += p['pg_num']
-
- expected_pg_num = (max_overall - existing_pg_count) / 10
- log.info("max_per_osd {0}".format(max_per_osd))
- log.info("osd_count {0}".format(osd_count))
- log.info("max_overall {0}".format(max_overall))
- log.info("existing_pg_count {0}".format(existing_pg_count))
- log.info("expected_pg_num {0}".format(expected_pg_num))
-
- pools_a = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['pools']
-
- group_id = "grpid"
- volume_id = "volid"
- self._volume_client_python(self.mount_b, dedent("""
- vp = VolumePath("{group_id}", "{volume_id}")
- vc.create_volume(vp, 10, data_isolated=True)
- """.format(
- group_id=group_id,
- volume_id=volume_id,
- )))
-
- pools_b = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['pools']
-
- # Should have created one new pool
- new_pools = set(p['pool_name'] for p in pools_b) - set([p['pool_name'] for p in pools_a])
- self.assertEqual(len(new_pools), 1)
-
- # It should have followed the heuristic for PG count
- # (this is an overly strict test condition, so we may want to remove
- # it at some point as/when the logic gets fancier)
- created_pg_num = self.fs.mon_manager.get_pool_property(list(new_pools)[0], "pg_num")
- self.assertEqual(expected_pg_num, created_pg_num)
-
- def test_15303(self):
- """
- Reproducer for #15303 "Client holds incorrect complete flag on dir
- after losing caps" (http://tracker.ceph.com/issues/15303)
- """
- for m in self.mounts:
- m.umount_wait()
-
- # Create a dir on mount A
- self.mount_a.mount()
- self.mount_a.run_shell(["mkdir", "parent1"])
- self.mount_a.run_shell(["mkdir", "parent2"])
- self.mount_a.run_shell(["mkdir", "parent1/mydir"])
-
- # Put some files in it from mount B
- self.mount_b.mount()
- self.mount_b.run_shell(["touch", "parent1/mydir/afile"])
- self.mount_b.umount_wait()
-
- # List the dir's contents on mount A
- self.assertListEqual(self.mount_a.ls("parent1/mydir"),
- ["afile"])
-
- def test_evict_client(self):
- """
- That a volume client can be evicted based on its auth ID and the volume
- path it has mounted.
- """
-
- if not isinstance(self.mount_a, FuseMount):
- self.skipTest("Requires FUSE client to inject client metadata")
-
- # mounts[1] would be used as handle for driving VolumeClient. mounts[2]
- # and mounts[3] would be used as guests to mount the volumes/shares.
-
- for i in range(1, 4):
- self.mounts[i].umount_wait()
-
- volumeclient_mount = self.mounts[1]
- self._configure_vc_auth(volumeclient_mount, "manila")
- guest_mounts = (self.mounts[2], self.mounts[3])
-
- guest_entity = "guest"
- group_id = "grpid"
- mount_paths = []
- volume_ids = []
-
- # Create two volumes. Authorize 'guest' auth ID to mount the two
- # volumes. Mount the two volumes. Write data to the volumes.
- for i in range(2):
- # Create volume.
- volume_ids.append("volid_{0}".format(str(i)))
- mount_paths.append(
- self._volume_client_python(volumeclient_mount, dedent("""
- vp = VolumePath("{group_id}", "{volume_id}")
- create_result = vc.create_volume(vp, 10 * 1024 * 1024)
- print create_result['mount_path']
- """.format(
- group_id=group_id,
- volume_id=volume_ids[i]
- ))))
-
- # Authorize 'guest' auth ID to mount the volume.
- self._configure_guest_auth(volumeclient_mount, guest_mounts[i],
- guest_entity, mount_paths[i])
-
- # Mount the volume.
- guest_mounts[i].mountpoint_dir_name = 'mnt.{id}.{suffix}'.format(
- id=guest_entity, suffix=str(i))
- guest_mounts[i].mount(mount_path=mount_paths[i])
- guest_mounts[i].write_n_mb("data.bin", 1)
-
-
- # Evict client, guest_mounts[0], using auth ID 'guest' and has mounted
- # one volume.
- self._volume_client_python(self.mount_b, dedent("""
- vp = VolumePath("{group_id}", "{volume_id}")
- vc.deauthorize(vp, "{guest_entity}")
- vc.evict("{guest_entity}", volume_path=vp)
- """.format(
- group_id=group_id,
- volume_id=volume_ids[0],
- guest_entity=guest_entity
- )))
-
- # Evicted guest client, guest_mounts[0], should not be able to do
- # anymore metadata ops. It should start failing all operations
- # when it sees that its own address is in the blacklist.
- try:
- guest_mounts[0].write_n_mb("rogue.bin", 1)
- except CommandFailedError:
- pass
- else:
- raise RuntimeError("post-eviction write should have failed!")
-
- # The blacklisted guest client should now be unmountable
- guest_mounts[0].umount_wait()
-
- # Guest client, guest_mounts[1], using the same auth ID 'guest', but
- # has mounted the other volume, should be able to use its volume
- # unaffected.
- guest_mounts[1].write_n_mb("data.bin.1", 1)
-
- # Cleanup.
- for i in range(2):
- self._volume_client_python(volumeclient_mount, dedent("""
- vp = VolumePath("{group_id}", "{volume_id}")
- vc.deauthorize(vp, "{guest_entity}")
- vc.delete_volume(vp)
- vc.purge_volume(vp)
- """.format(
- group_id=group_id,
- volume_id=volume_ids[i],
- guest_entity=guest_entity
- )))
-
-
- def test_purge(self):
- """
- Reproducer for #15266, exception trying to purge volumes that
- contain non-ascii filenames.
-
- Additionally test any other purge corner cases here.
- """
- # I'm going to leave mount_b unmounted and just use it as a handle for
- # driving volumeclient. It's a little hacky but we don't have a more
- # general concept for librados/libcephfs clients as opposed to full
- # blown mounting clients.
- self.mount_b.umount_wait()
- self._configure_vc_auth(self.mount_b, "manila")
-
- group_id = "grpid"
- # Use a unicode volume ID (like Manila), to reproduce #15266
- volume_id = u"volid"
-
- # Create
- mount_path = self._volume_client_python(self.mount_b, dedent("""
- vp = VolumePath("{group_id}", u"{volume_id}")
- create_result = vc.create_volume(vp, 10)
- print create_result['mount_path']
- """.format(
- group_id=group_id,
- volume_id=volume_id
- )))
-
- # Strip leading "/"
- mount_path = mount_path[1:]
-
- # A file with non-ascii characters
- self.mount_a.run_shell(["touch", os.path.join(mount_path, u"b\u00F6b")])
-
- # A file with no permissions to do anything
- self.mount_a.run_shell(["touch", os.path.join(mount_path, "noperms")])
- self.mount_a.run_shell(["chmod", "0000", os.path.join(mount_path, "noperms")])
-
- self._volume_client_python(self.mount_b, dedent("""
- vp = VolumePath("{group_id}", u"{volume_id}")
- vc.delete_volume(vp)
- vc.purge_volume(vp)
- """.format(
- group_id=group_id,
- volume_id=volume_id
- )))
-
- # Check it's really gone
- self.assertEqual(self.mount_a.ls("volumes/_deleting"), [])
- self.assertEqual(self.mount_a.ls("volumes/"), ["_deleting", group_id])
-
- def test_readonly_authorization(self):
- """
- That guest clients can be restricted to read-only mounts of volumes.
- """
-
- volumeclient_mount = self.mounts[1]
- guest_mount = self.mounts[2]
- volumeclient_mount.umount_wait()
- guest_mount.umount_wait()
-
- # Configure volumeclient_mount as the handle for driving volumeclient.
- self._configure_vc_auth(volumeclient_mount, "manila")
-
- guest_entity = "guest"
- group_id = "grpid"
- volume_id = "volid"
-
- # Create a volume.
- mount_path = self._volume_client_python(volumeclient_mount, dedent("""
- vp = VolumePath("{group_id}", "{volume_id}")
- create_result = vc.create_volume(vp, 1024*1024*10)
- print create_result['mount_path']
- """.format(
- group_id=group_id,
- volume_id=volume_id,
- )))
-
- # Authorize and configure credentials for the guest to mount the
- # the volume with read-write access.
- self._configure_guest_auth(volumeclient_mount, guest_mount, guest_entity,
- mount_path, readonly=False)
-
- # Mount the volume, and write to it.
- guest_mount.mount(mount_path=mount_path)
- guest_mount.write_n_mb("data.bin", 1)
-
- # Change the guest auth ID's authorization to read-only mount access.
- self._volume_client_python(volumeclient_mount, dedent("""
- vp = VolumePath("{group_id}", "{volume_id}")
- vc.deauthorize(vp, "{guest_entity}")
- """.format(
- group_id=group_id,
- volume_id=volume_id,
- guest_entity=guest_entity
- )))
- self._configure_guest_auth(volumeclient_mount, guest_mount, guest_entity,
- mount_path, readonly=True)
-
- # The effect of the change in access level to read-only is not
- # immediate. The guest sees the change only after a remount of
- # the volume.
- guest_mount.umount_wait()
- guest_mount.mount(mount_path=mount_path)
-
- # Read existing content of the volume.
- self.assertListEqual(guest_mount.ls(guest_mount.mountpoint), ["data.bin"])
- # Cannot write into read-only volume.
- with self.assertRaises(CommandFailedError):
- guest_mount.write_n_mb("rogue.bin", 1)
-
- def test_get_authorized_ids(self):
- """
- That for a volume, the authorized IDs and their access levels
- can be obtained using CephFSVolumeClient's get_authorized_ids().
- """
- volumeclient_mount = self.mounts[1]
- volumeclient_mount.umount_wait()
-
- # Configure volumeclient_mount as the handle for driving volumeclient.
- self._configure_vc_auth(volumeclient_mount, "manila")
-
- group_id = "grpid"
- volume_id = "volid"
- guest_entity_1 = "guest1"
- guest_entity_2 = "guest2"
-
- log.info("print group ID: {0}".format(group_id))
-
- # Create a volume.
- auths = self._volume_client_python(volumeclient_mount, dedent("""
- vp = VolumePath("{group_id}", "{volume_id}")
- vc.create_volume(vp, 1024*1024*10)
- auths = vc.get_authorized_ids(vp)
- print auths
- """.format(
- group_id=group_id,
- volume_id=volume_id,
- )))
- # Check the list of authorized IDs for the volume.
- expected_result = None
- self.assertEqual(str(expected_result), auths)
-
- # Allow two auth IDs access to the volume.
- auths = self._volume_client_python(volumeclient_mount, dedent("""
- vp = VolumePath("{group_id}", "{volume_id}")
- vc.authorize(vp, "{guest_entity_1}", readonly=False)
- vc.authorize(vp, "{guest_entity_2}", readonly=True)
- auths = vc.get_authorized_ids(vp)
- print auths
- """.format(
- group_id=group_id,
- volume_id=volume_id,
- guest_entity_1=guest_entity_1,
- guest_entity_2=guest_entity_2,
- )))
- # Check the list of authorized IDs and their access levels.
- expected_result = [(u'guest1', u'rw'), (u'guest2', u'r')]
- self.assertItemsEqual(str(expected_result), auths)
-
- # Disallow both the auth IDs' access to the volume.
- auths = self._volume_client_python(volumeclient_mount, dedent("""
- vp = VolumePath("{group_id}", "{volume_id}")
- vc.deauthorize(vp, "{guest_entity_1}")
- vc.deauthorize(vp, "{guest_entity_2}")
- auths = vc.get_authorized_ids(vp)
- print auths
- """.format(
- group_id=group_id,
- volume_id=volume_id,
- guest_entity_1=guest_entity_1,
- guest_entity_2=guest_entity_2,
- )))
- # Check the list of authorized IDs for the volume.
- expected_result = None
- self.assertItemsEqual(str(expected_result), auths)
-
- def test_multitenant_volumes(self):
- """
- That volume access can be restricted to a tenant.
-
- That metadata used to enforce tenant isolation of
- volumes is stored as a two-way mapping between auth
- IDs and volumes that they're authorized to access.
- """
- volumeclient_mount = self.mounts[1]
- volumeclient_mount.umount_wait()
-
- # Configure volumeclient_mount as the handle for driving volumeclient.
- self._configure_vc_auth(volumeclient_mount, "manila")
-
- group_id = "groupid"
- volume_id = "volumeid"
-
- # Guest clients belonging to different tenants, but using the same
- # auth ID.
- auth_id = "guest"
- guestclient_1 = {
- "auth_id": auth_id,
- "tenant_id": "tenant1",
- }
- guestclient_2 = {
- "auth_id": auth_id,
- "tenant_id": "tenant2",
- }
-
- # Create a volume.
- self._volume_client_python(volumeclient_mount, dedent("""
- vp = VolumePath("{group_id}", "{volume_id}")
- vc.create_volume(vp, 1024*1024*10)
- """.format(
- group_id=group_id,
- volume_id=volume_id,
- )))
-
- # Check that volume metadata file is created on volume creation.
- vol_metadata_filename = "_{0}:{1}.meta".format(group_id, volume_id)
- self.assertIn(vol_metadata_filename, self.mounts[0].ls("volumes"))
-
- # Authorize 'guestclient_1', using auth ID 'guest' and belonging to
- # 'tenant1', with 'rw' access to the volume.
- self._volume_client_python(volumeclient_mount, dedent("""
- vp = VolumePath("{group_id}", "{volume_id}")
- vc.authorize(vp, "{auth_id}", tenant_id="{tenant_id}")
- """.format(
- group_id=group_id,
- volume_id=volume_id,
- auth_id=guestclient_1["auth_id"],
- tenant_id=guestclient_1["tenant_id"]
- )))
-
- # Check that auth metadata file for auth ID 'guest', is
- # created on authorizing 'guest' access to the volume.
- auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"])
- self.assertIn(auth_metadata_filename, self.mounts[0].ls("volumes"))
-
- # Verify that the auth metadata file stores the tenant ID that the
- # auth ID belongs to, the auth ID's authorized access levels
- # for different volumes, versioning details, etc.
- expected_auth_metadata = {
- u"version": 2,
- u"compat_version": 1,
- u"dirty": False,
- u"tenant_id": u"tenant1",
- u"volumes": {
- u"groupid/volumeid": {
- u"dirty": False,
- u"access_level": u"rw",
- }
- }
- }
-
- auth_metadata = self._volume_client_python(volumeclient_mount, dedent("""
- vp = VolumePath("{group_id}", "{volume_id}")
- auth_metadata = vc._auth_metadata_get("{auth_id}")
- print auth_metadata
- """.format(
- group_id=group_id,
- volume_id=volume_id,
- auth_id=guestclient_1["auth_id"],
- )))
-
- self.assertItemsEqual(str(expected_auth_metadata), auth_metadata)
-
- # Verify that the volume metadata file stores info about auth IDs
- # and their access levels to the volume, versioning details, etc.
- expected_vol_metadata = {
- u"version": 2,
- u"compat_version": 1,
- u"auths": {
- u"guest": {
- u"dirty": False,
- u"access_level": u"rw"
- }
- }
- }
-
- vol_metadata = self._volume_client_python(volumeclient_mount, dedent("""
- vp = VolumePath("{group_id}", "{volume_id}")
- volume_metadata = vc._volume_metadata_get(vp)
- print volume_metadata
- """.format(
- group_id=group_id,
- volume_id=volume_id,
- )))
- self.assertItemsEqual(str(expected_vol_metadata), vol_metadata)
-
- # Cannot authorize 'guestclient_2' to access the volume.
- # It uses auth ID 'guest', which has already been used by a
- # 'guestclient_1' belonging to an another tenant for accessing
- # the volume.
- with self.assertRaises(CommandFailedError):
- self._volume_client_python(volumeclient_mount, dedent("""
- vp = VolumePath("{group_id}", "{volume_id}")
- vc.authorize(vp, "{auth_id}", tenant_id="{tenant_id}")
- """.format(
- group_id=group_id,
- volume_id=volume_id,
- auth_id=guestclient_2["auth_id"],
- tenant_id=guestclient_2["tenant_id"]
- )))
-
- # Check that auth metadata file is cleaned up on removing
- # auth ID's only access to a volume.
- self._volume_client_python(volumeclient_mount, dedent("""
- vp = VolumePath("{group_id}", "{volume_id}")
- vc.deauthorize(vp, "{guest_entity}")
- """.format(
- group_id=group_id,
- volume_id=volume_id,
- guest_entity=guestclient_1["auth_id"]
- )))
-
- self.assertNotIn(auth_metadata_filename, self.mounts[0].ls("volumes"))
-
- # Check that volume metadata file is cleaned up on volume deletion.
- self._volume_client_python(volumeclient_mount, dedent("""
- vp = VolumePath("{group_id}", "{volume_id}")
- vc.delete_volume(vp)
- """.format(
- group_id=group_id,
- volume_id=volume_id,
- )))
- self.assertNotIn(vol_metadata_filename, self.mounts[0].ls("volumes"))
-
- def test_recover_metadata(self):
- """
- That volume client can recover from partial auth updates using
- metadata files, which store auth info and its update status info.
- """
- volumeclient_mount = self.mounts[1]
- volumeclient_mount.umount_wait()
-
- # Configure volumeclient_mount as the handle for driving volumeclient.
- self._configure_vc_auth(volumeclient_mount, "manila")
-
- group_id = "groupid"
- volume_id = "volumeid"
-
- guestclient = {
- "auth_id": "guest",
- "tenant_id": "tenant",
- }
-
- # Create a volume.
- self._volume_client_python(volumeclient_mount, dedent("""
- vp = VolumePath("{group_id}", "{volume_id}")
- vc.create_volume(vp, 1024*1024*10)
- """.format(
- group_id=group_id,
- volume_id=volume_id,
- )))
-
- # Authorize 'guestclient' access to the volume.
- self._volume_client_python(volumeclient_mount, dedent("""
- vp = VolumePath("{group_id}", "{volume_id}")
- vc.authorize(vp, "{auth_id}", tenant_id="{tenant_id}")
- """.format(
- group_id=group_id,
- volume_id=volume_id,
- auth_id=guestclient["auth_id"],
- tenant_id=guestclient["tenant_id"]
- )))
-
- # Check that auth metadata file for auth ID 'guest' is created.
- auth_metadata_filename = "${0}.meta".format(guestclient["auth_id"])
- self.assertIn(auth_metadata_filename, self.mounts[0].ls("volumes"))
-
- # Induce partial auth update state by modifying the auth metadata file,
- # and then run recovery procedure.
- self._volume_client_python(volumeclient_mount, dedent("""
- vp = VolumePath("{group_id}", "{volume_id}")
- auth_metadata = vc._auth_metadata_get("{auth_id}")
- auth_metadata['dirty'] = True
- vc._auth_metadata_set("{auth_id}", auth_metadata)
- vc.recover()
- """.format(
- group_id=group_id,
- volume_id=volume_id,
- auth_id=guestclient["auth_id"],
- )))
-
- def test_put_object(self):
- vc_mount = self.mounts[1]
- vc_mount.umount_wait()
- self._configure_vc_auth(vc_mount, "manila")
-
- obj_data = 'test data'
- obj_name = 'test_vc_obj_1'
- pool_name = self.fs.get_data_pool_names()[0]
-
- self._volume_client_python(vc_mount, dedent("""
- vc.put_object("{pool_name}", "{obj_name}", b"{obj_data}")
- """.format(
- pool_name = pool_name,
- obj_name = obj_name,
- obj_data = obj_data
- )))
-
- read_data = self.fs.rados(['get', obj_name, '-'], pool=pool_name)
- self.assertEqual(obj_data, read_data)
-
- def test_get_object(self):
- vc_mount = self.mounts[1]
- vc_mount.umount_wait()
- self._configure_vc_auth(vc_mount, "manila")
-
- obj_data = 'test_data'
- obj_name = 'test_vc_ob_2'
- pool_name = self.fs.get_data_pool_names()[0]
-
- self.fs.rados(['put', obj_name, '-'], pool=pool_name, stdin_data=obj_data)
-
- self._volume_client_python(vc_mount, dedent("""
- data_read = vc.get_object("{pool_name}", "{obj_name}")
- assert data_read == b"{obj_data}"
- """.format(
- pool_name = pool_name,
- obj_name = obj_name,
- obj_data = obj_data
- )))
-
- def test_delete_object(self):
- vc_mount = self.mounts[1]
- vc_mount.umount_wait()
- self._configure_vc_auth(vc_mount, "manila")
-
- obj_data = 'test data'
- obj_name = 'test_vc_obj_3'
- pool_name = self.fs.get_data_pool_names()[0]
-
- self.fs.rados(['put', obj_name, '-'], pool=pool_name, stdin_data=obj_data)
-
- self._volume_client_python(vc_mount, dedent("""
- data_read = vc.delete_object("{pool_name}", "{obj_name}")
- """.format(
- pool_name = pool_name,
- obj_name = obj_name,
- )))
-
- with self.assertRaises(CommandFailedError):
- self.fs.rados(['stat', obj_name], pool=pool_name)
-
- # Check idempotency -- no error raised trying to delete non-existent
- # object
- self._volume_client_python(vc_mount, dedent("""
- data_read = vc.delete_object("{pool_name}", "{obj_name}")
- """.format(
- pool_name = pool_name,
- obj_name = obj_name,
- )))
-
- def test_21501(self):
- """
- Reproducer for #21501 "ceph_volume_client: sets invalid caps for
- existing IDs with no caps" (http://tracker.ceph.com/issues/21501)
- """
-
- vc_mount = self.mounts[1]
- vc_mount.umount_wait()
-
- # Configure vc_mount as the handle for driving volumeclient
- self._configure_vc_auth(vc_mount, "manila")
-
- # Create a volume
- group_id = "grpid"
- volume_id = "volid"
- mount_path = self._volume_client_python(vc_mount, dedent("""
- vp = VolumePath("{group_id}", "{volume_id}")
- create_result = vc.create_volume(vp, 1024*1024*10)
- print create_result['mount_path']
- """.format(
- group_id=group_id,
- volume_id=volume_id
- )))
-
- # Create an auth ID with no caps
- guest_id = '21501'
- self.fs.mon_manager.raw_cluster_cmd_result(
- 'auth', 'get-or-create', 'client.{0}'.format(guest_id))
-
- guest_mount = self.mounts[2]
- guest_mount.umount_wait()
-
- # Set auth caps for the auth ID using the volumeclient
- self._configure_guest_auth(vc_mount, guest_mount, guest_id, mount_path)
-
- # Mount the volume in the guest using the auth ID to assert that the
- # auth caps are valid
- guest_mount.mount(mount_path=mount_path)