diff options
Diffstat (limited to 'src/ceph/qa/tasks/cephfs')
33 files changed, 0 insertions, 10708 deletions
diff --git a/src/ceph/qa/tasks/cephfs/__init__.py b/src/ceph/qa/tasks/cephfs/__init__.py deleted file mode 100644 index e69de29..0000000 --- a/src/ceph/qa/tasks/cephfs/__init__.py +++ /dev/null diff --git a/src/ceph/qa/tasks/cephfs/cephfs_test_case.py b/src/ceph/qa/tasks/cephfs/cephfs_test_case.py deleted file mode 100644 index 801d0d3..0000000 --- a/src/ceph/qa/tasks/cephfs/cephfs_test_case.py +++ /dev/null @@ -1,315 +0,0 @@ -import json -import logging -from unittest import case -from tasks.ceph_test_case import CephTestCase -import os -import re -from StringIO import StringIO - -from tasks.cephfs.fuse_mount import FuseMount - -from teuthology.orchestra import run -from teuthology.orchestra.run import CommandFailedError - - -log = logging.getLogger(__name__) - - -def for_teuthology(f): - """ - Decorator that adds an "is_for_teuthology" attribute to the wrapped function - """ - f.is_for_teuthology = True - return f - - -def needs_trimming(f): - """ - Mark fn as requiring a client capable of trimming its cache (i.e. for ceph-fuse - this means it needs to be able to run as root, currently) - """ - f.needs_trimming = True - return f - - -class CephFSTestCase(CephTestCase): - """ - Test case for Ceph FS, requires caller to populate Filesystem and Mounts, - into the fs, mount_a, mount_b class attributes (setting mount_b is optional) - - Handles resetting the cluster under test between tests. - """ - - # FIXME weird explicit naming - mount_a = None - mount_b = None - recovery_mount = None - - # Declarative test requirements: subclasses should override these to indicate - # their special needs. If not met, tests will be skipped. - CLIENTS_REQUIRED = 1 - MDSS_REQUIRED = 1 - REQUIRE_KCLIENT_REMOTE = False - REQUIRE_ONE_CLIENT_REMOTE = False - REQUIRE_MEMSTORE = False - - # Whether to create the default filesystem during setUp - REQUIRE_FILESYSTEM = True - - # requires REQUIRE_FILESYSTEM = True - REQUIRE_RECOVERY_FILESYSTEM = False - - LOAD_SETTINGS = [] - - def setUp(self): - super(CephFSTestCase, self).setUp() - - if len(self.mds_cluster.mds_ids) < self.MDSS_REQUIRED: - raise case.SkipTest("Only have {0} MDSs, require {1}".format( - len(self.mds_cluster.mds_ids), self.MDSS_REQUIRED - )) - - if len(self.mounts) < self.CLIENTS_REQUIRED: - raise case.SkipTest("Only have {0} clients, require {1}".format( - len(self.mounts), self.CLIENTS_REQUIRED - )) - - if self.REQUIRE_KCLIENT_REMOTE: - if not isinstance(self.mounts[0], FuseMount) or not isinstance(self.mounts[1], FuseMount): - # kclient kill() power cycles nodes, so requires clients to each be on - # their own node - if self.mounts[0].client_remote.hostname == self.mounts[1].client_remote.hostname: - raise case.SkipTest("kclient clients must be on separate nodes") - - if self.REQUIRE_ONE_CLIENT_REMOTE: - if self.mounts[0].client_remote.hostname in self.mds_cluster.get_mds_hostnames(): - raise case.SkipTest("Require first client to be on separate server from MDSs") - - if self.REQUIRE_MEMSTORE: - objectstore = self.mds_cluster.get_config("osd_objectstore", "osd") - if objectstore != "memstore": - # You certainly *could* run this on a real OSD, but you don't want to sit - # here for hours waiting for the test to fill up a 1TB drive! - raise case.SkipTest("Require `memstore` OSD backend to simulate full drives") - - # Create friendly mount_a, mount_b attrs - for i in range(0, self.CLIENTS_REQUIRED): - setattr(self, "mount_{0}".format(chr(ord('a') + i)), self.mounts[i]) - - self.mds_cluster.clear_firewall() - - # Unmount all clients, we are about to blow away the filesystem - for mount in self.mounts: - if mount.is_mounted(): - mount.umount_wait(force=True) - - # To avoid any issues with e.g. unlink bugs, we destroy and recreate - # the filesystem rather than just doing a rm -rf of files - self.mds_cluster.mds_stop() - self.mds_cluster.mds_fail() - self.mds_cluster.delete_all_filesystems() - self.fs = None # is now invalid! - self.recovery_fs = None - - # In case the previous filesystem had filled up the RADOS cluster, wait for that - # flag to pass. - osd_mon_report_interval_max = int(self.mds_cluster.get_config("osd_mon_report_interval_max", service_type='osd')) - self.wait_until_true(lambda: not self.mds_cluster.is_full(), - timeout=osd_mon_report_interval_max * 5) - - # In case anything is in the OSD blacklist list, clear it out. This is to avoid - # the OSD map changing in the background (due to blacklist expiry) while tests run. - try: - self.mds_cluster.mon_manager.raw_cluster_cmd("osd", "blacklist", "clear") - except CommandFailedError: - # Fallback for older Ceph cluster - blacklist = json.loads(self.mds_cluster.mon_manager.raw_cluster_cmd("osd", - "dump", "--format=json-pretty"))['blacklist'] - log.info("Removing {0} blacklist entries".format(len(blacklist))) - for addr, blacklisted_at in blacklist.items(): - self.mds_cluster.mon_manager.raw_cluster_cmd("osd", "blacklist", "rm", addr) - - client_mount_ids = [m.client_id for m in self.mounts] - # In case the test changes the IDs of clients, stash them so that we can - # reset in tearDown - self._original_client_ids = client_mount_ids - log.info(client_mount_ids) - - # In case there were any extra auth identities around from a previous - # test, delete them - for entry in self.auth_list(): - ent_type, ent_id = entry['entity'].split(".") - if ent_type == "client" and ent_id not in client_mount_ids and ent_id != "admin": - self.mds_cluster.mon_manager.raw_cluster_cmd("auth", "del", entry['entity']) - - if self.REQUIRE_FILESYSTEM: - self.fs = self.mds_cluster.newfs(create=True) - self.fs.mds_restart() - - # In case some test messed with auth caps, reset them - for client_id in client_mount_ids: - self.mds_cluster.mon_manager.raw_cluster_cmd_result( - 'auth', 'caps', "client.{0}".format(client_id), - 'mds', 'allow', - 'mon', 'allow r', - 'osd', 'allow rw pool={0}'.format(self.fs.get_data_pool_name())) - - # wait for mds restart to complete... - self.fs.wait_for_daemons() - - # Mount the requested number of clients - for i in range(0, self.CLIENTS_REQUIRED): - self.mounts[i].mount() - self.mounts[i].wait_until_mounted() - - if self.REQUIRE_RECOVERY_FILESYSTEM: - if not self.REQUIRE_FILESYSTEM: - raise case.SkipTest("Recovery filesystem requires a primary filesystem as well") - self.fs.mon_manager.raw_cluster_cmd('fs', 'flag', 'set', - 'enable_multiple', 'true', - '--yes-i-really-mean-it') - self.recovery_fs = self.mds_cluster.newfs(name="recovery_fs", create=False) - self.recovery_fs.set_metadata_overlay(True) - self.recovery_fs.set_data_pool_name(self.fs.get_data_pool_name()) - self.recovery_fs.create() - self.recovery_fs.getinfo(refresh=True) - self.recovery_fs.mds_restart() - self.recovery_fs.wait_for_daemons() - - # Load an config settings of interest - for setting in self.LOAD_SETTINGS: - setattr(self, setting, float(self.fs.mds_asok( - ['config', 'get', setting], self.mds_cluster.mds_ids[0] - )[setting])) - - self.configs_set = set() - - def tearDown(self): - super(CephFSTestCase, self).tearDown() - - self.mds_cluster.clear_firewall() - for m in self.mounts: - m.teardown() - - for i, m in enumerate(self.mounts): - m.client_id = self._original_client_ids[i] - - for subsys, key in self.configs_set: - self.mds_cluster.clear_ceph_conf(subsys, key) - - def set_conf(self, subsys, key, value): - self.configs_set.add((subsys, key)) - self.mds_cluster.set_ceph_conf(subsys, key, value) - - def auth_list(self): - """ - Convenience wrapper on "ceph auth ls" - """ - return json.loads(self.mds_cluster.mon_manager.raw_cluster_cmd( - "auth", "ls", "--format=json-pretty" - ))['auth_dump'] - - def assert_session_count(self, expected, ls_data=None, mds_id=None): - if ls_data is None: - ls_data = self.fs.mds_asok(['session', 'ls'], mds_id=mds_id) - - alive_count = len([s for s in ls_data if s['state'] != 'killing']) - - self.assertEqual(expected, alive_count, "Expected {0} sessions, found {1}".format( - expected, alive_count - )) - - def assert_session_state(self, client_id, expected_state): - self.assertEqual( - self._session_by_id( - self.fs.mds_asok(['session', 'ls'])).get(client_id, {'state': None})['state'], - expected_state) - - def get_session_data(self, client_id): - return self._session_by_id(client_id) - - def _session_list(self): - ls_data = self.fs.mds_asok(['session', 'ls']) - ls_data = [s for s in ls_data if s['state'] not in ['stale', 'closed']] - return ls_data - - def get_session(self, client_id, session_ls=None): - if session_ls is None: - session_ls = self.fs.mds_asok(['session', 'ls']) - - return self._session_by_id(session_ls)[client_id] - - def _session_by_id(self, session_ls): - return dict([(s['id'], s) for s in session_ls]) - - def wait_for_daemon_start(self, daemon_ids=None): - """ - Wait until all the daemons appear in the FSMap, either assigned - MDS ranks or in the list of standbys - """ - def get_daemon_names(): - return [info['name'] for info in self.mds_cluster.status().get_all()] - - if daemon_ids is None: - daemon_ids = self.mds_cluster.mds_ids - - try: - self.wait_until_true( - lambda: set(daemon_ids) & set(get_daemon_names()) == set(daemon_ids), - timeout=30 - ) - except RuntimeError: - log.warn("Timeout waiting for daemons {0}, while we have {1}".format( - daemon_ids, get_daemon_names() - )) - raise - - def assert_mds_crash(self, daemon_id): - """ - Assert that the a particular MDS daemon crashes (block until - it does) - """ - try: - self.mds_cluster.mds_daemons[daemon_id].proc.wait() - except CommandFailedError as e: - log.info("MDS '{0}' crashed with status {1} as expected".format(daemon_id, e.exitstatus)) - self.mds_cluster.mds_daemons[daemon_id].proc = None - - # Go remove the coredump from the crash, otherwise teuthology.internal.coredump will - # catch it later and treat it as a failure. - p = self.mds_cluster.mds_daemons[daemon_id].remote.run(args=[ - "sudo", "sysctl", "-n", "kernel.core_pattern"], stdout=StringIO()) - core_pattern = p.stdout.getvalue().strip() - if os.path.dirname(core_pattern): # Non-default core_pattern with a directory in it - # We have seen a core_pattern that looks like it's from teuthology's coredump - # task, so proceed to clear out the core file - log.info("Clearing core from pattern: {0}".format(core_pattern)) - - # Determine the PID of the crashed MDS by inspecting the MDSMap, it had - # to talk to the mons to get assigned a rank to reach the point of crashing - addr = self.mds_cluster.mon_manager.get_mds_status(daemon_id)['addr'] - pid_str = addr.split("/")[1] - log.info("Determined crasher PID was {0}".format(pid_str)) - - # Substitute PID into core_pattern to get a glob - core_glob = core_pattern.replace("%p", pid_str) - core_glob = re.sub("%[a-z]", "*", core_glob) # Match all for all other % tokens - - # Verify that we see the expected single coredump matching the expected pattern - ls_proc = self.mds_cluster.mds_daemons[daemon_id].remote.run(args=[ - "sudo", "ls", run.Raw(core_glob) - ], stdout=StringIO()) - cores = [f for f in ls_proc.stdout.getvalue().strip().split("\n") if f] - log.info("Enumerated cores: {0}".format(cores)) - self.assertEqual(len(cores), 1) - - log.info("Found core file {0}, deleting it".format(cores[0])) - - self.mds_cluster.mds_daemons[daemon_id].remote.run(args=[ - "sudo", "rm", "-f", cores[0] - ]) - else: - log.info("No core_pattern directory set, nothing to clear (internal.coredump not enabled?)") - - else: - raise AssertionError("MDS daemon '{0}' did not crash as expected".format(daemon_id)) diff --git a/src/ceph/qa/tasks/cephfs/filesystem.py b/src/ceph/qa/tasks/cephfs/filesystem.py deleted file mode 100644 index 9638fd5..0000000 --- a/src/ceph/qa/tasks/cephfs/filesystem.py +++ /dev/null @@ -1,1213 +0,0 @@ - -from StringIO import StringIO -import json -import logging -from gevent import Greenlet -import os -import time -import datetime -import re -import errno -import random - -from teuthology.exceptions import CommandFailedError -from teuthology import misc -from teuthology.nuke import clear_firewall -from teuthology.parallel import parallel -from tasks.ceph_manager import write_conf -from tasks import ceph_manager - - -log = logging.getLogger(__name__) - - -DAEMON_WAIT_TIMEOUT = 120 -ROOT_INO = 1 - - -class ObjectNotFound(Exception): - def __init__(self, object_name): - self._object_name = object_name - - def __str__(self): - return "Object not found: '{0}'".format(self._object_name) - -class FSStatus(object): - """ - Operations on a snapshot of the FSMap. - """ - def __init__(self, mon_manager): - self.mon = mon_manager - self.map = json.loads(self.mon.raw_cluster_cmd("fs", "dump", "--format=json")) - - def __str__(self): - return json.dumps(self.map, indent = 2, sort_keys = True) - - # Expose the fsmap for manual inspection. - def __getitem__(self, key): - """ - Get a field from the fsmap. - """ - return self.map[key] - - def get_filesystems(self): - """ - Iterator for all filesystems. - """ - for fs in self.map['filesystems']: - yield fs - - def get_all(self): - """ - Iterator for all the mds_info components in the FSMap. - """ - for info in self.get_standbys(): - yield info - for fs in self.map['filesystems']: - for info in fs['mdsmap']['info'].values(): - yield info - - def get_standbys(self): - """ - Iterator for all standbys. - """ - for info in self.map['standbys']: - yield info - - def get_fsmap(self, fscid): - """ - Get the fsmap for the given FSCID. - """ - for fs in self.map['filesystems']: - if fscid is None or fs['id'] == fscid: - return fs - raise RuntimeError("FSCID {0} not in map".format(fscid)) - - def get_fsmap_byname(self, name): - """ - Get the fsmap for the given file system name. - """ - for fs in self.map['filesystems']: - if name is None or fs['mdsmap']['fs_name'] == name: - return fs - raise RuntimeError("FS {0} not in map".format(name)) - - def get_replays(self, fscid): - """ - Get the standby:replay MDS for the given FSCID. - """ - fs = self.get_fsmap(fscid) - for info in fs['mdsmap']['info'].values(): - if info['state'] == 'up:standby-replay': - yield info - - def get_ranks(self, fscid): - """ - Get the ranks for the given FSCID. - """ - fs = self.get_fsmap(fscid) - for info in fs['mdsmap']['info'].values(): - if info['rank'] >= 0: - yield info - - def get_rank(self, fscid, rank): - """ - Get the rank for the given FSCID. - """ - for info in self.get_ranks(fscid): - if info['rank'] == rank: - return info - raise RuntimeError("FSCID {0} has no rank {1}".format(fscid, rank)) - - def get_mds(self, name): - """ - Get the info for the given MDS name. - """ - for info in self.get_all(): - if info['name'] == name: - return info - return None - - def get_mds_addr(self, name): - """ - Return the instance addr as a string, like "10.214.133.138:6807\/10825" - """ - info = self.get_mds(name) - if info: - return info['addr'] - else: - log.warn(json.dumps(list(self.get_all()), indent=2)) # dump for debugging - raise RuntimeError("MDS id '{0}' not found in map".format(name)) - -class CephCluster(object): - @property - def admin_remote(self): - first_mon = misc.get_first_mon(self._ctx, None) - (result,) = self._ctx.cluster.only(first_mon).remotes.iterkeys() - return result - - def __init__(self, ctx): - self._ctx = ctx - self.mon_manager = ceph_manager.CephManager(self.admin_remote, ctx=ctx, logger=log.getChild('ceph_manager')) - - def get_config(self, key, service_type=None): - """ - Get config from mon by default, or a specific service if caller asks for it - """ - if service_type is None: - service_type = 'mon' - - service_id = sorted(misc.all_roles_of_type(self._ctx.cluster, service_type))[0] - return self.json_asok(['config', 'get', key], service_type, service_id)[key] - - def set_ceph_conf(self, subsys, key, value): - if subsys not in self._ctx.ceph['ceph'].conf: - self._ctx.ceph['ceph'].conf[subsys] = {} - self._ctx.ceph['ceph'].conf[subsys][key] = value - write_conf(self._ctx) # XXX because we don't have the ceph task's config object, if they - # used a different config path this won't work. - - def clear_ceph_conf(self, subsys, key): - del self._ctx.ceph['ceph'].conf[subsys][key] - write_conf(self._ctx) - - def json_asok(self, command, service_type, service_id): - proc = self.mon_manager.admin_socket(service_type, service_id, command) - response_data = proc.stdout.getvalue() - log.info("_json_asok output: {0}".format(response_data)) - if response_data.strip(): - return json.loads(response_data) - else: - return None - - -class MDSCluster(CephCluster): - """ - Collective operations on all the MDS daemons in the Ceph cluster. These - daemons may be in use by various Filesystems. - - For the benefit of pre-multi-filesystem tests, this class is also - a parent of Filesystem. The correct way to use MDSCluster going forward is - as a separate instance outside of your (multiple) Filesystem instances. - """ - def __init__(self, ctx): - super(MDSCluster, self).__init__(ctx) - - self.mds_ids = list(misc.all_roles_of_type(ctx.cluster, 'mds')) - - if len(self.mds_ids) == 0: - raise RuntimeError("This task requires at least one MDS") - - if hasattr(self._ctx, "daemons"): - # Presence of 'daemons' attribute implies ceph task rather than ceph_deploy task - self.mds_daemons = dict([(mds_id, self._ctx.daemons.get_daemon('mds', mds_id)) for mds_id in self.mds_ids]) - - def _one_or_all(self, mds_id, cb, in_parallel=True): - """ - Call a callback for a single named MDS, or for all. - - Note that the parallelism here isn't for performance, it's to avoid being overly kind - to the cluster by waiting a graceful ssh-latency of time between doing things, and to - avoid being overly kind by executing them in a particular order. However, some actions - don't cope with being done in parallel, so it's optional (`in_parallel`) - - :param mds_id: MDS daemon name, or None - :param cb: Callback taking single argument of MDS daemon name - :param in_parallel: whether to invoke callbacks concurrently (else one after the other) - """ - if mds_id is None: - if in_parallel: - with parallel() as p: - for mds_id in self.mds_ids: - p.spawn(cb, mds_id) - else: - for mds_id in self.mds_ids: - cb(mds_id) - else: - cb(mds_id) - - def get_config(self, key, service_type=None): - """ - get_config specialization of service_type="mds" - """ - if service_type != "mds": - return super(MDSCluster, self).get_config(key, service_type) - - # Some tests stop MDS daemons, don't send commands to a dead one: - service_id = random.sample(filter(lambda i: self.mds_daemons[i].running(), self.mds_daemons), 1)[0] - return self.json_asok(['config', 'get', key], service_type, service_id)[key] - - def mds_stop(self, mds_id=None): - """ - Stop the MDS daemon process(se). If it held a rank, that rank - will eventually go laggy. - """ - self._one_or_all(mds_id, lambda id_: self.mds_daemons[id_].stop()) - - def mds_fail(self, mds_id=None): - """ - Inform MDSMonitor of the death of the daemon process(es). If it held - a rank, that rank will be relinquished. - """ - self._one_or_all(mds_id, lambda id_: self.mon_manager.raw_cluster_cmd("mds", "fail", id_)) - - def mds_restart(self, mds_id=None): - self._one_or_all(mds_id, lambda id_: self.mds_daemons[id_].restart()) - - def mds_fail_restart(self, mds_id=None): - """ - Variation on restart that includes marking MDSs as failed, so that doing this - operation followed by waiting for healthy daemon states guarantees that they - have gone down and come up, rather than potentially seeing the healthy states - that existed before the restart. - """ - def _fail_restart(id_): - self.mds_daemons[id_].stop() - self.mon_manager.raw_cluster_cmd("mds", "fail", id_) - self.mds_daemons[id_].restart() - - self._one_or_all(mds_id, _fail_restart) - - def newfs(self, name='cephfs', create=True): - return Filesystem(self._ctx, name=name, create=create) - - def status(self): - return FSStatus(self.mon_manager) - - def delete_all_filesystems(self): - """ - Remove all filesystems that exist, and any pools in use by them. - """ - pools = json.loads(self.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['pools'] - pool_id_name = {} - for pool in pools: - pool_id_name[pool['pool']] = pool['pool_name'] - - # mark cluster down for each fs to prevent churn during deletion - status = self.status() - for fs in status.get_filesystems(): - self.mon_manager.raw_cluster_cmd("fs", "set", fs['mdsmap']['fs_name'], "cluster_down", "true") - - # get a new copy as actives may have since changed - status = self.status() - for fs in status.get_filesystems(): - mdsmap = fs['mdsmap'] - metadata_pool = pool_id_name[mdsmap['metadata_pool']] - - for gid in mdsmap['up'].values(): - self.mon_manager.raw_cluster_cmd('mds', 'fail', gid.__str__()) - - self.mon_manager.raw_cluster_cmd('fs', 'rm', mdsmap['fs_name'], '--yes-i-really-mean-it') - self.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete', - metadata_pool, metadata_pool, - '--yes-i-really-really-mean-it') - for data_pool in mdsmap['data_pools']: - data_pool = pool_id_name[data_pool] - try: - self.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete', - data_pool, data_pool, - '--yes-i-really-really-mean-it') - except CommandFailedError as e: - if e.exitstatus == 16: # EBUSY, this data pool is used - pass # by two metadata pools, let the 2nd - else: # pass delete it - raise - - def get_standby_daemons(self): - return set([s['name'] for s in self.status().get_standbys()]) - - def get_mds_hostnames(self): - result = set() - for mds_id in self.mds_ids: - mds_remote = self.mon_manager.find_remote('mds', mds_id) - result.add(mds_remote.hostname) - - return list(result) - - def set_clients_block(self, blocked, mds_id=None): - """ - Block (using iptables) client communications to this MDS. Be careful: if - other services are running on this MDS, or other MDSs try to talk to this - MDS, their communications may also be blocked as collatoral damage. - - :param mds_id: Optional ID of MDS to block, default to all - :return: - """ - da_flag = "-A" if blocked else "-D" - - def set_block(_mds_id): - remote = self.mon_manager.find_remote('mds', _mds_id) - status = self.status() - - addr = status.get_mds_addr(_mds_id) - ip_str, port_str, inst_str = re.match("(.+):(.+)/(.+)", addr).groups() - - remote.run( - args=["sudo", "iptables", da_flag, "OUTPUT", "-p", "tcp", "--sport", port_str, "-j", "REJECT", "-m", - "comment", "--comment", "teuthology"]) - remote.run( - args=["sudo", "iptables", da_flag, "INPUT", "-p", "tcp", "--dport", port_str, "-j", "REJECT", "-m", - "comment", "--comment", "teuthology"]) - - self._one_or_all(mds_id, set_block, in_parallel=False) - - def clear_firewall(self): - clear_firewall(self._ctx) - - def get_mds_info(self, mds_id): - return FSStatus(self.mon_manager).get_mds(mds_id) - - def is_full(self): - flags = json.loads(self.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['flags'] - return 'full' in flags - - def is_pool_full(self, pool_name): - pools = json.loads(self.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['pools'] - for pool in pools: - if pool['pool_name'] == pool_name: - return 'full' in pool['flags_names'].split(",") - - raise RuntimeError("Pool not found '{0}'".format(pool_name)) - -class Filesystem(MDSCluster): - """ - This object is for driving a CephFS filesystem. The MDS daemons driven by - MDSCluster may be shared with other Filesystems. - """ - def __init__(self, ctx, fscid=None, name=None, create=False, - ec_profile=None): - super(Filesystem, self).__init__(ctx) - - self.name = name - self.ec_profile = ec_profile - self.id = None - self.metadata_pool_name = None - self.metadata_overlay = False - self.data_pool_name = None - self.data_pools = None - - client_list = list(misc.all_roles_of_type(self._ctx.cluster, 'client')) - self.client_id = client_list[0] - self.client_remote = list(misc.get_clients(ctx=ctx, roles=["client.{0}".format(self.client_id)]))[0][1] - - if name is not None: - if fscid is not None: - raise RuntimeError("cannot specify fscid when creating fs") - if create and not self.legacy_configured(): - self.create() - else: - if fscid is not None: - self.id = fscid - self.getinfo(refresh = True) - - # Stash a reference to the first created filesystem on ctx, so - # that if someone drops to the interactive shell they can easily - # poke our methods. - if not hasattr(self._ctx, "filesystem"): - self._ctx.filesystem = self - - def getinfo(self, refresh = False): - status = self.status() - if self.id is not None: - fsmap = status.get_fsmap(self.id) - elif self.name is not None: - fsmap = status.get_fsmap_byname(self.name) - else: - fss = [fs for fs in status.get_filesystems()] - if len(fss) == 1: - fsmap = fss[0] - elif len(fss) == 0: - raise RuntimeError("no file system available") - else: - raise RuntimeError("more than one file system available") - self.id = fsmap['id'] - self.name = fsmap['mdsmap']['fs_name'] - self.get_pool_names(status = status, refresh = refresh) - return status - - def set_metadata_overlay(self, overlay): - if self.id is not None: - raise RuntimeError("cannot specify fscid when configuring overlay") - self.metadata_overlay = overlay - - def deactivate(self, rank): - if rank < 0: - raise RuntimeError("invalid rank") - elif rank == 0: - raise RuntimeError("cannot deactivate rank 0") - self.mon_manager.raw_cluster_cmd("mds", "deactivate", "%d:%d" % (self.id, rank)) - - def set_max_mds(self, max_mds): - self.mon_manager.raw_cluster_cmd("fs", "set", self.name, "max_mds", "%d" % max_mds) - - def set_allow_dirfrags(self, yes): - self.mon_manager.raw_cluster_cmd("fs", "set", self.name, "allow_dirfrags", str(yes).lower(), '--yes-i-really-mean-it') - - def get_pgs_per_fs_pool(self): - """ - Calculate how many PGs to use when creating a pool, in order to avoid raising any - health warnings about mon_pg_warn_min_per_osd - - :return: an integer number of PGs - """ - pg_warn_min_per_osd = int(self.get_config('mon_pg_warn_min_per_osd')) - osd_count = len(list(misc.all_roles_of_type(self._ctx.cluster, 'osd'))) - return pg_warn_min_per_osd * osd_count - - def create(self): - if self.name is None: - self.name = "cephfs" - if self.metadata_pool_name is None: - self.metadata_pool_name = "{0}_metadata".format(self.name) - if self.data_pool_name is None: - data_pool_name = "{0}_data".format(self.name) - else: - data_pool_name = self.data_pool_name - - log.info("Creating filesystem '{0}'".format(self.name)) - - pgs_per_fs_pool = self.get_pgs_per_fs_pool() - - self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', - self.metadata_pool_name, pgs_per_fs_pool.__str__()) - if self.metadata_overlay: - self.mon_manager.raw_cluster_cmd('fs', 'new', - self.name, self.metadata_pool_name, data_pool_name, - '--allow-dangerous-metadata-overlay') - else: - if self.ec_profile: - log.info("EC profile is %s", self.ec_profile) - cmd = ['osd', 'erasure-code-profile', 'set', data_pool_name] - cmd.extend(self.ec_profile) - self.mon_manager.raw_cluster_cmd(*cmd) - self.mon_manager.raw_cluster_cmd( - 'osd', 'pool', 'create', - data_pool_name, pgs_per_fs_pool.__str__(), 'erasure', - data_pool_name) - self.mon_manager.raw_cluster_cmd( - 'osd', 'pool', 'set', - data_pool_name, 'allow_ec_overwrites', 'true') - else: - self.mon_manager.raw_cluster_cmd( - 'osd', 'pool', 'create', - data_pool_name, pgs_per_fs_pool.__str__()) - self.mon_manager.raw_cluster_cmd('fs', 'new', - self.name, self.metadata_pool_name, data_pool_name) - self.check_pool_application(self.metadata_pool_name) - self.check_pool_application(data_pool_name) - # Turn off spurious standby count warnings from modifying max_mds in tests. - try: - self.mon_manager.raw_cluster_cmd('fs', 'set', self.name, 'standby_count_wanted', '0') - except CommandFailedError as e: - if e.exitstatus == 22: - # standby_count_wanted not available prior to luminous (upgrade tests would fail otherwise) - pass - else: - raise - - self.getinfo(refresh = True) - - - def check_pool_application(self, pool_name): - osd_map = self.mon_manager.get_osd_dump_json() - for pool in osd_map['pools']: - if pool['pool_name'] == pool_name: - if "application_metadata" in pool: - if not "cephfs" in pool['application_metadata']: - raise RuntimeError("Pool %p does not name cephfs as application!".\ - format(pool_name)) - - - def __del__(self): - if getattr(self._ctx, "filesystem", None) == self: - delattr(self._ctx, "filesystem") - - def exists(self): - """ - Whether a filesystem exists in the mon's filesystem list - """ - fs_list = json.loads(self.mon_manager.raw_cluster_cmd('fs', 'ls', '--format=json-pretty')) - return self.name in [fs['name'] for fs in fs_list] - - def legacy_configured(self): - """ - Check if a legacy (i.e. pre "fs new") filesystem configuration is present. If this is - the case, the caller should avoid using Filesystem.create - """ - try: - out_text = self.mon_manager.raw_cluster_cmd('--format=json-pretty', 'osd', 'lspools') - pools = json.loads(out_text) - metadata_pool_exists = 'metadata' in [p['poolname'] for p in pools] - if metadata_pool_exists: - self.metadata_pool_name = 'metadata' - except CommandFailedError as e: - # For use in upgrade tests, Ceph cuttlefish and earlier don't support - # structured output (--format) from the CLI. - if e.exitstatus == 22: - metadata_pool_exists = True - else: - raise - - return metadata_pool_exists - - def _df(self): - return json.loads(self.mon_manager.raw_cluster_cmd("df", "--format=json-pretty")) - - def get_mds_map(self): - return self.status().get_fsmap(self.id)['mdsmap'] - - def add_data_pool(self, name): - self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', name, self.get_pgs_per_fs_pool().__str__()) - self.mon_manager.raw_cluster_cmd('fs', 'add_data_pool', self.name, name) - self.get_pool_names(refresh = True) - for poolid, fs_name in self.data_pools.items(): - if name == fs_name: - return poolid - raise RuntimeError("could not get just created pool '{0}'".format(name)) - - def get_pool_names(self, refresh = False, status = None): - if refresh or self.metadata_pool_name is None or self.data_pools is None: - if status is None: - status = self.status() - fsmap = status.get_fsmap(self.id) - - osd_map = self.mon_manager.get_osd_dump_json() - id_to_name = {} - for p in osd_map['pools']: - id_to_name[p['pool']] = p['pool_name'] - - self.metadata_pool_name = id_to_name[fsmap['mdsmap']['metadata_pool']] - self.data_pools = {} - for data_pool in fsmap['mdsmap']['data_pools']: - self.data_pools[data_pool] = id_to_name[data_pool] - - def get_data_pool_name(self, refresh = False): - if refresh or self.data_pools is None: - self.get_pool_names(refresh = True) - assert(len(self.data_pools) == 1) - return self.data_pools.values()[0] - - def get_data_pool_id(self, refresh = False): - """ - Don't call this if you have multiple data pools - :return: integer - """ - if refresh or self.data_pools is None: - self.get_pool_names(refresh = True) - assert(len(self.data_pools) == 1) - return self.data_pools.keys()[0] - - def get_data_pool_names(self, refresh = False): - if refresh or self.data_pools is None: - self.get_pool_names(refresh = True) - return self.data_pools.values() - - def get_metadata_pool_name(self): - return self.metadata_pool_name - - def set_data_pool_name(self, name): - if self.id is not None: - raise RuntimeError("can't set filesystem name if its fscid is set") - self.data_pool_name = name - - def get_namespace_id(self): - return self.id - - def get_pool_df(self, pool_name): - """ - Return a dict like: - {u'bytes_used': 0, u'max_avail': 83848701, u'objects': 0, u'kb_used': 0} - """ - for pool_df in self._df()['pools']: - if pool_df['name'] == pool_name: - return pool_df['stats'] - - raise RuntimeError("Pool name '{0}' not found".format(pool_name)) - - def get_usage(self): - return self._df()['stats']['total_used_bytes'] - - def are_daemons_healthy(self): - """ - Return true if all daemons are in one of active, standby, standby-replay, and - at least max_mds daemons are in 'active'. - - Unlike most of Filesystem, this function is tolerant of new-style `fs` - commands being missing, because we are part of the ceph installation - process during upgrade suites, so must fall back to old style commands - when we get an EINVAL on a new style command. - - :return: - """ - - active_count = 0 - try: - mds_map = self.get_mds_map() - except CommandFailedError as cfe: - # Old version, fall back to non-multi-fs commands - if cfe.exitstatus == errno.EINVAL: - mds_map = json.loads( - self.mon_manager.raw_cluster_cmd('mds', 'dump', '--format=json')) - else: - raise - - log.info("are_daemons_healthy: mds map: {0}".format(mds_map)) - - for mds_id, mds_status in mds_map['info'].items(): - if mds_status['state'] not in ["up:active", "up:standby", "up:standby-replay"]: - log.warning("Unhealthy mds state {0}:{1}".format(mds_id, mds_status['state'])) - return False - elif mds_status['state'] == 'up:active': - active_count += 1 - - log.info("are_daemons_healthy: {0}/{1}".format( - active_count, mds_map['max_mds'] - )) - - if active_count >= mds_map['max_mds']: - # The MDSMap says these guys are active, but let's check they really are - for mds_id, mds_status in mds_map['info'].items(): - if mds_status['state'] == 'up:active': - try: - daemon_status = self.mds_asok(["status"], mds_id=mds_status['name']) - except CommandFailedError as cfe: - if cfe.exitstatus == errno.EINVAL: - # Old version, can't do this check - continue - else: - # MDS not even running - return False - - if daemon_status['state'] != 'up:active': - # MDS hasn't taken the latest map yet - return False - - return True - else: - return False - - def get_daemon_names(self, state=None): - """ - Return MDS daemon names of those daemons in the given state - :param state: - :return: - """ - status = self.get_mds_map() - result = [] - for mds_status in sorted(status['info'].values(), lambda a, b: cmp(a['rank'], b['rank'])): - if mds_status['state'] == state or state is None: - result.append(mds_status['name']) - - return result - - def get_active_names(self): - """ - Return MDS daemon names of those daemons holding ranks - in state up:active - - :return: list of strings like ['a', 'b'], sorted by rank - """ - return self.get_daemon_names("up:active") - - def get_all_mds_rank(self): - status = self.get_mds_map() - result = [] - for mds_status in sorted(status['info'].values(), lambda a, b: cmp(a['rank'], b['rank'])): - if mds_status['rank'] != -1 and mds_status['state'] != 'up:standby-replay': - result.append(mds_status['rank']) - - return result - - def get_rank_names(self): - """ - Return MDS daemon names of those daemons holding a rank, - sorted by rank. This includes e.g. up:replay/reconnect - as well as active, but does not include standby or - standby-replay. - """ - status = self.get_mds_map() - result = [] - for mds_status in sorted(status['info'].values(), lambda a, b: cmp(a['rank'], b['rank'])): - if mds_status['rank'] != -1 and mds_status['state'] != 'up:standby-replay': - result.append(mds_status['name']) - - return result - - def wait_for_daemons(self, timeout=None): - """ - Wait until all daemons are healthy - :return: - """ - - if timeout is None: - timeout = DAEMON_WAIT_TIMEOUT - - elapsed = 0 - while True: - if self.are_daemons_healthy(): - return - else: - time.sleep(1) - elapsed += 1 - - if elapsed > timeout: - raise RuntimeError("Timed out waiting for MDS daemons to become healthy") - - def get_lone_mds_id(self): - """ - Get a single MDS ID: the only one if there is only one - configured, else the only one currently holding a rank, - else raise an error. - """ - if len(self.mds_ids) != 1: - alive = self.get_rank_names() - if len(alive) == 1: - return alive[0] - else: - raise ValueError("Explicit MDS argument required when multiple MDSs in use") - else: - return self.mds_ids[0] - - def recreate(self): - log.info("Creating new filesystem") - self.delete_all_filesystems() - self.id = None - self.create() - - def put_metadata_object_raw(self, object_id, infile): - """ - Save an object to the metadata pool - """ - temp_bin_path = infile - self.client_remote.run(args=[ - 'sudo', os.path.join(self._prefix, 'rados'), '-p', self.metadata_pool_name, 'put', object_id, temp_bin_path - ]) - - def get_metadata_object_raw(self, object_id): - """ - Retrieve an object from the metadata pool and store it in a file. - """ - temp_bin_path = '/tmp/' + object_id + '.bin' - - self.client_remote.run(args=[ - 'sudo', os.path.join(self._prefix, 'rados'), '-p', self.metadata_pool_name, 'get', object_id, temp_bin_path - ]) - - return temp_bin_path - - def get_metadata_object(self, object_type, object_id): - """ - Retrieve an object from the metadata pool, pass it through - ceph-dencoder to dump it to JSON, and return the decoded object. - """ - temp_bin_path = '/tmp/out.bin' - - self.client_remote.run(args=[ - 'sudo', os.path.join(self._prefix, 'rados'), '-p', self.metadata_pool_name, 'get', object_id, temp_bin_path - ]) - - stdout = StringIO() - self.client_remote.run(args=[ - 'sudo', os.path.join(self._prefix, 'ceph-dencoder'), 'type', object_type, 'import', temp_bin_path, 'decode', 'dump_json' - ], stdout=stdout) - dump_json = stdout.getvalue().strip() - try: - dump = json.loads(dump_json) - except (TypeError, ValueError): - log.error("Failed to decode JSON: '{0}'".format(dump_json)) - raise - - return dump - - def get_journal_version(self): - """ - Read the JournalPointer and Journal::Header objects to learn the version of - encoding in use. - """ - journal_pointer_object = '400.00000000' - journal_pointer_dump = self.get_metadata_object("JournalPointer", journal_pointer_object) - journal_ino = journal_pointer_dump['journal_pointer']['front'] - - journal_header_object = "{0:x}.00000000".format(journal_ino) - journal_header_dump = self.get_metadata_object('Journaler::Header', journal_header_object) - - version = journal_header_dump['journal_header']['stream_format'] - log.info("Read journal version {0}".format(version)) - - return version - - def mds_asok(self, command, mds_id=None): - if mds_id is None: - mds_id = self.get_lone_mds_id() - - return self.json_asok(command, 'mds', mds_id) - - def read_cache(self, path, depth=None): - cmd = ["dump", "tree", path] - if depth is not None: - cmd.append(depth.__str__()) - result = self.mds_asok(cmd) - if len(result) == 0: - raise RuntimeError("Path not found in cache: {0}".format(path)) - - return result - - def wait_for_state(self, goal_state, reject=None, timeout=None, mds_id=None, rank=None): - """ - Block until the MDS reaches a particular state, or a failure condition - is met. - - When there are multiple MDSs, succeed when exaclty one MDS is in the - goal state, or fail when any MDS is in the reject state. - - :param goal_state: Return once the MDS is in this state - :param reject: Fail if the MDS enters this state before the goal state - :param timeout: Fail if this many seconds pass before reaching goal - :return: number of seconds waited, rounded down to integer - """ - - started_at = time.time() - while True: - status = self.status() - if rank is not None: - mds_info = status.get_rank(self.id, rank) - current_state = mds_info['state'] if mds_info else None - log.info("Looked up MDS state for mds.{0}: {1}".format(rank, current_state)) - elif mds_id is not None: - # mds_info is None if no daemon with this ID exists in the map - mds_info = status.get_mds(mds_id) - current_state = mds_info['state'] if mds_info else None - log.info("Looked up MDS state for {0}: {1}".format(mds_id, current_state)) - else: - # In general, look for a single MDS - states = [m['state'] for m in status.get_ranks(self.id)] - if [s for s in states if s == goal_state] == [goal_state]: - current_state = goal_state - elif reject in states: - current_state = reject - else: - current_state = None - log.info("mapped states {0} to {1}".format(states, current_state)) - - elapsed = time.time() - started_at - if current_state == goal_state: - log.info("reached state '{0}' in {1}s".format(current_state, elapsed)) - return elapsed - elif reject is not None and current_state == reject: - raise RuntimeError("MDS in reject state {0}".format(current_state)) - elif timeout is not None and elapsed > timeout: - log.error("MDS status at timeout: {0}".format(status.get_fsmap(self.id))) - raise RuntimeError( - "Reached timeout after {0} seconds waiting for state {1}, while in state {2}".format( - elapsed, goal_state, current_state - )) - else: - time.sleep(1) - - def _read_data_xattr(self, ino_no, xattr_name, type, pool): - mds_id = self.mds_ids[0] - remote = self.mds_daemons[mds_id].remote - if pool is None: - pool = self.get_data_pool_name() - - obj_name = "{0:x}.00000000".format(ino_no) - - args = [ - os.path.join(self._prefix, "rados"), "-p", pool, "getxattr", obj_name, xattr_name - ] - try: - proc = remote.run( - args=args, - stdout=StringIO()) - except CommandFailedError as e: - log.error(e.__str__()) - raise ObjectNotFound(obj_name) - - data = proc.stdout.getvalue() - - p = remote.run( - args=[os.path.join(self._prefix, "ceph-dencoder"), "type", type, "import", "-", "decode", "dump_json"], - stdout=StringIO(), - stdin=data - ) - - return json.loads(p.stdout.getvalue().strip()) - - def _write_data_xattr(self, ino_no, xattr_name, data, pool=None): - """ - Write to an xattr of the 0th data object of an inode. Will - succeed whether the object and/or xattr already exist or not. - - :param ino_no: integer inode number - :param xattr_name: string name of the xattr - :param data: byte array data to write to the xattr - :param pool: name of data pool or None to use primary data pool - :return: None - """ - remote = self.mds_daemons[self.mds_ids[0]].remote - if pool is None: - pool = self.get_data_pool_name() - - obj_name = "{0:x}.00000000".format(ino_no) - args = [ - os.path.join(self._prefix, "rados"), "-p", pool, "setxattr", - obj_name, xattr_name, data - ] - remote.run( - args=args, - stdout=StringIO()) - - def read_backtrace(self, ino_no, pool=None): - """ - Read the backtrace from the data pool, return a dict in the format - given by inode_backtrace_t::dump, which is something like: - - :: - - rados -p cephfs_data getxattr 10000000002.00000000 parent > out.bin - ceph-dencoder type inode_backtrace_t import out.bin decode dump_json - - { "ino": 1099511627778, - "ancestors": [ - { "dirino": 1, - "dname": "blah", - "version": 11}], - "pool": 1, - "old_pools": []} - - :param pool: name of pool to read backtrace from. If omitted, FS must have only - one data pool and that will be used. - """ - return self._read_data_xattr(ino_no, "parent", "inode_backtrace_t", pool) - - def read_layout(self, ino_no, pool=None): - """ - Read 'layout' xattr of an inode and parse the result, returning a dict like: - :: - { - "stripe_unit": 4194304, - "stripe_count": 1, - "object_size": 4194304, - "pool_id": 1, - "pool_ns": "", - } - - :param pool: name of pool to read backtrace from. If omitted, FS must have only - one data pool and that will be used. - """ - return self._read_data_xattr(ino_no, "layout", "file_layout_t", pool) - - def _enumerate_data_objects(self, ino, size): - """ - Get the list of expected data objects for a range, and the list of objects - that really exist. - - :return a tuple of two lists of strings (expected, actual) - """ - stripe_size = 1024 * 1024 * 4 - - size = max(stripe_size, size) - - want_objects = [ - "{0:x}.{1:08x}".format(ino, n) - for n in range(0, ((size - 1) / stripe_size) + 1) - ] - - exist_objects = self.rados(["ls"], pool=self.get_data_pool_name()).split("\n") - - return want_objects, exist_objects - - def data_objects_present(self, ino, size): - """ - Check that *all* the expected data objects for an inode are present in the data pool - """ - - want_objects, exist_objects = self._enumerate_data_objects(ino, size) - missing = set(want_objects) - set(exist_objects) - - if missing: - log.info("Objects missing (ino {0}, size {1}): {2}".format( - ino, size, missing - )) - return False - else: - log.info("All objects for ino {0} size {1} found".format(ino, size)) - return True - - def data_objects_absent(self, ino, size): - want_objects, exist_objects = self._enumerate_data_objects(ino, size) - present = set(want_objects) & set(exist_objects) - - if present: - log.info("Objects not absent (ino {0}, size {1}): {2}".format( - ino, size, present - )) - return False - else: - log.info("All objects for ino {0} size {1} are absent".format(ino, size)) - return True - - def dirfrag_exists(self, ino, frag): - try: - self.rados(["stat", "{0:x}.{1:08x}".format(ino, frag)]) - except CommandFailedError as e: - return False - else: - return True - - def rados(self, args, pool=None, namespace=None, stdin_data=None): - """ - Call into the `rados` CLI from an MDS - """ - - if pool is None: - pool = self.get_metadata_pool_name() - - # Doesn't matter which MDS we use to run rados commands, they all - # have access to the pools - mds_id = self.mds_ids[0] - remote = self.mds_daemons[mds_id].remote - - # NB we could alternatively use librados pybindings for this, but it's a one-liner - # using the `rados` CLI - args = ([os.path.join(self._prefix, "rados"), "-p", pool] + - (["--namespace", namespace] if namespace else []) + - args) - p = remote.run( - args=args, - stdin=stdin_data, - stdout=StringIO()) - return p.stdout.getvalue().strip() - - def list_dirfrag(self, dir_ino): - """ - Read the named object and return the list of omap keys - - :return a list of 0 or more strings - """ - - dirfrag_obj_name = "{0:x}.00000000".format(dir_ino) - - try: - key_list_str = self.rados(["listomapkeys", dirfrag_obj_name]) - except CommandFailedError as e: - log.error(e.__str__()) - raise ObjectNotFound(dirfrag_obj_name) - - return key_list_str.split("\n") if key_list_str else [] - - def erase_metadata_objects(self, prefix): - """ - For all objects in the metadata pool matching the prefix, - erase them. - - This O(N) with the number of objects in the pool, so only suitable - for use on toy test filesystems. - """ - all_objects = self.rados(["ls"]).split("\n") - matching_objects = [o for o in all_objects if o.startswith(prefix)] - for o in matching_objects: - self.rados(["rm", o]) - - def erase_mds_objects(self, rank): - """ - Erase all the per-MDS objects for a particular rank. This includes - inotable, sessiontable, journal - """ - - def obj_prefix(multiplier): - """ - MDS object naming conventions like rank 1's - journal is at 201.*** - """ - return "%x." % (multiplier * 0x100 + rank) - - # MDS_INO_LOG_OFFSET - self.erase_metadata_objects(obj_prefix(2)) - # MDS_INO_LOG_BACKUP_OFFSET - self.erase_metadata_objects(obj_prefix(3)) - # MDS_INO_LOG_POINTER_OFFSET - self.erase_metadata_objects(obj_prefix(4)) - # MDSTables & SessionMap - self.erase_metadata_objects("mds{rank:d}_".format(rank=rank)) - - @property - def _prefix(self): - """ - Override this to set a different - """ - return "" - - def _run_tool(self, tool, args, rank=None, quiet=False): - # Tests frequently have [client] configuration that jacks up - # the objecter log level (unlikely to be interesting here) - # and does not set the mds log level (very interesting here) - if quiet: - base_args = [os.path.join(self._prefix, tool), '--debug-mds=1', '--debug-objecter=1'] - else: - base_args = [os.path.join(self._prefix, tool), '--debug-mds=4', '--debug-objecter=1'] - - if rank is not None: - base_args.extend(["--rank", "%d" % rank]) - - t1 = datetime.datetime.now() - r = self.tool_remote.run( - args=base_args + args, - stdout=StringIO()).stdout.getvalue().strip() - duration = datetime.datetime.now() - t1 - log.info("Ran {0} in time {1}, result:\n{2}".format( - base_args + args, duration, r - )) - return r - - @property - def tool_remote(self): - """ - An arbitrary remote to use when invoking recovery tools. Use an MDS host because - it'll definitely have keys with perms to access cephfs metadata pool. This is public - so that tests can use this remote to go get locally written output files from the tools. - """ - mds_id = self.mds_ids[0] - return self.mds_daemons[mds_id].remote - - def journal_tool(self, args, rank=None, quiet=False): - """ - Invoke cephfs-journal-tool with the passed arguments, and return its stdout - """ - return self._run_tool("cephfs-journal-tool", args, rank, quiet) - - def table_tool(self, args, quiet=False): - """ - Invoke cephfs-table-tool with the passed arguments, and return its stdout - """ - return self._run_tool("cephfs-table-tool", args, None, quiet) - - def data_scan(self, args, quiet=False, worker_count=1): - """ - Invoke cephfs-data-scan with the passed arguments, and return its stdout - - :param worker_count: if greater than 1, multiple workers will be run - in parallel and the return value will be None - """ - - workers = [] - - for n in range(0, worker_count): - if worker_count > 1: - # data-scan args first token is a command, followed by args to it. - # insert worker arguments after the command. - cmd = args[0] - worker_args = [cmd] + ["--worker_n", n.__str__(), "--worker_m", worker_count.__str__()] + args[1:] - else: - worker_args = args - - workers.append(Greenlet.spawn(lambda wargs=worker_args: - self._run_tool("cephfs-data-scan", wargs, None, quiet))) - - for w in workers: - w.get() - - if worker_count == 1: - return workers[0].value - else: - return None diff --git a/src/ceph/qa/tasks/cephfs/fuse_mount.py b/src/ceph/qa/tasks/cephfs/fuse_mount.py deleted file mode 100644 index 8d8410c..0000000 --- a/src/ceph/qa/tasks/cephfs/fuse_mount.py +++ /dev/null @@ -1,428 +0,0 @@ - -from StringIO import StringIO -import json -import time -import logging -from textwrap import dedent - -from teuthology import misc -from teuthology.contextutil import MaxWhileTries -from teuthology.orchestra import run -from teuthology.orchestra.run import CommandFailedError -from .mount import CephFSMount - -log = logging.getLogger(__name__) - - -class FuseMount(CephFSMount): - def __init__(self, client_config, test_dir, client_id, client_remote): - super(FuseMount, self).__init__(test_dir, client_id, client_remote) - - self.client_config = client_config if client_config else {} - self.fuse_daemon = None - self._fuse_conn = None - - def mount(self, mount_path=None, mount_fs_name=None): - try: - return self._mount(mount_path, mount_fs_name) - except RuntimeError: - # Catch exceptions by the mount() logic (i.e. not remote command - # failures) and ensure the mount is not left half-up. - # Otherwise we might leave a zombie mount point that causes - # anyone traversing cephtest/ to get hung up on. - log.warn("Trying to clean up after failed mount") - self.umount_wait(force=True) - raise - - def _mount(self, mount_path, mount_fs_name): - log.info("Client client.%s config is %s" % (self.client_id, self.client_config)) - - daemon_signal = 'kill' - if self.client_config.get('coverage') or self.client_config.get('valgrind') is not None: - daemon_signal = 'term' - - log.info('Mounting ceph-fuse client.{id} at {remote} {mnt}...'.format( - id=self.client_id, remote=self.client_remote, mnt=self.mountpoint)) - - self.client_remote.run( - args=[ - 'mkdir', - '--', - self.mountpoint, - ], - ) - - run_cmd = [ - 'sudo', - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=self.test_dir), - 'daemon-helper', - daemon_signal, - ] - - fuse_cmd = ['ceph-fuse', "-f"] - - if mount_path is not None: - fuse_cmd += ["--client_mountpoint={0}".format(mount_path)] - - if mount_fs_name is not None: - fuse_cmd += ["--client_mds_namespace={0}".format(mount_fs_name)] - - fuse_cmd += [ - '--name', 'client.{id}'.format(id=self.client_id), - # TODO ceph-fuse doesn't understand dash dash '--', - self.mountpoint, - ] - - if self.client_config.get('valgrind') is not None: - run_cmd = misc.get_valgrind_args( - self.test_dir, - 'client.{id}'.format(id=self.client_id), - run_cmd, - self.client_config.get('valgrind'), - ) - - run_cmd.extend(fuse_cmd) - - def list_connections(): - self.client_remote.run( - args=["sudo", "mount", "-t", "fusectl", "/sys/fs/fuse/connections", "/sys/fs/fuse/connections"], - check_status=False - ) - p = self.client_remote.run( - args=["ls", "/sys/fs/fuse/connections"], - stdout=StringIO(), - check_status=False - ) - if p.exitstatus != 0: - return [] - - ls_str = p.stdout.getvalue().strip() - if ls_str: - return [int(n) for n in ls_str.split("\n")] - else: - return [] - - # Before starting ceph-fuse process, note the contents of - # /sys/fs/fuse/connections - pre_mount_conns = list_connections() - log.info("Pre-mount connections: {0}".format(pre_mount_conns)) - - proc = self.client_remote.run( - args=run_cmd, - logger=log.getChild('ceph-fuse.{id}'.format(id=self.client_id)), - stdin=run.PIPE, - wait=False, - ) - self.fuse_daemon = proc - - # Wait for the connection reference to appear in /sys - mount_wait = self.client_config.get('mount_wait', 0) - if mount_wait > 0: - log.info("Fuse mount waits {0} seconds before checking /sys/".format(mount_wait)) - time.sleep(mount_wait) - timeout = int(self.client_config.get('mount_timeout', 30)) - waited = 0 - - post_mount_conns = list_connections() - while len(post_mount_conns) <= len(pre_mount_conns): - if self.fuse_daemon.finished: - # Did mount fail? Raise the CommandFailedError instead of - # hitting the "failed to populate /sys/" timeout - self.fuse_daemon.wait() - time.sleep(1) - waited += 1 - if waited > timeout: - raise RuntimeError("Fuse mount failed to populate /sys/ after {0} seconds".format( - waited - )) - else: - post_mount_conns = list_connections() - - log.info("Post-mount connections: {0}".format(post_mount_conns)) - - # Record our fuse connection number so that we can use it when - # forcing an unmount - new_conns = list(set(post_mount_conns) - set(pre_mount_conns)) - if len(new_conns) == 0: - raise RuntimeError("New fuse connection directory not found ({0})".format(new_conns)) - elif len(new_conns) > 1: - raise RuntimeError("Unexpectedly numerous fuse connections {0}".format(new_conns)) - else: - self._fuse_conn = new_conns[0] - - def is_mounted(self): - proc = self.client_remote.run( - args=[ - 'stat', - '--file-system', - '--printf=%T\n', - '--', - self.mountpoint, - ], - stdout=StringIO(), - stderr=StringIO(), - wait=False - ) - try: - proc.wait() - except CommandFailedError: - if ("endpoint is not connected" in proc.stderr.getvalue() - or "Software caused connection abort" in proc.stderr.getvalue()): - # This happens is fuse is killed without unmount - log.warn("Found stale moutn point at {0}".format(self.mountpoint)) - return True - else: - # This happens if the mount directory doesn't exist - log.info('mount point does not exist: %s', self.mountpoint) - return False - - fstype = proc.stdout.getvalue().rstrip('\n') - if fstype == 'fuseblk': - log.info('ceph-fuse is mounted on %s', self.mountpoint) - return True - else: - log.debug('ceph-fuse not mounted, got fs type {fstype!r}'.format( - fstype=fstype)) - return False - - def wait_until_mounted(self): - """ - Check to make sure that fuse is mounted on mountpoint. If not, - sleep for 5 seconds and check again. - """ - - while not self.is_mounted(): - # Even if it's not mounted, it should at least - # be running: catch simple failures where it has terminated. - assert not self.fuse_daemon.poll() - - time.sleep(5) - - # Now that we're mounted, set permissions so that the rest of the test will have - # unrestricted access to the filesystem mount. - self.client_remote.run( - args=['sudo', 'chmod', '1777', self.mountpoint]) - - def _mountpoint_exists(self): - return self.client_remote.run(args=["ls", "-d", self.mountpoint], check_status=False).exitstatus == 0 - - def umount(self): - try: - log.info('Running fusermount -u on {name}...'.format(name=self.client_remote.name)) - self.client_remote.run( - args=[ - 'sudo', - 'fusermount', - '-u', - self.mountpoint, - ], - ) - except run.CommandFailedError: - log.info('Failed to unmount ceph-fuse on {name}, aborting...'.format(name=self.client_remote.name)) - - self.client_remote.run(args=[ - 'sudo', - run.Raw('PATH=/usr/sbin:$PATH'), - 'lsof', - run.Raw(';'), - 'ps', - 'auxf', - ]) - - # abort the fuse mount, killing all hung processes - if self._fuse_conn: - self.run_python(dedent(""" - import os - path = "/sys/fs/fuse/connections/{0}/abort" - if os.path.exists(path): - open(path, "w").write("1") - """).format(self._fuse_conn)) - self._fuse_conn = None - - stderr = StringIO() - try: - # make sure its unmounted - self.client_remote.run( - args=[ - 'sudo', - 'umount', - '-l', - '-f', - self.mountpoint, - ], - stderr=stderr - ) - except CommandFailedError: - if self.is_mounted(): - raise - - assert not self.is_mounted() - self._fuse_conn = None - - def umount_wait(self, force=False, require_clean=False): - """ - :param force: Complete cleanly even if the MDS is offline - """ - if force: - assert not require_clean # mutually exclusive - - # When we expect to be forcing, kill the ceph-fuse process directly. - # This should avoid hitting the more aggressive fallback killing - # in umount() which can affect other mounts too. - self.fuse_daemon.stdin.close() - - # However, we will still hit the aggressive wait if there is an ongoing - # mount -o remount (especially if the remount is stuck because MDSs - # are unavailable) - - self.umount() - - try: - if self.fuse_daemon: - # Permit a timeout, so that we do not block forever - run.wait([self.fuse_daemon], 900) - except MaxWhileTries: - log.error("process failed to terminate after unmount. This probably" - "indicates a bug within ceph-fuse.") - raise - except CommandFailedError: - if require_clean: - raise - - self.cleanup() - - def cleanup(self): - """ - Remove the mount point. - - Prerequisite: the client is not mounted. - """ - stderr = StringIO() - try: - self.client_remote.run( - args=[ - 'rmdir', - '--', - self.mountpoint, - ], - stderr=stderr - ) - except CommandFailedError: - if "No such file or directory" in stderr.getvalue(): - pass - else: - raise - - def kill(self): - """ - Terminate the client without removing the mount point. - """ - self.fuse_daemon.stdin.close() - try: - self.fuse_daemon.wait() - except CommandFailedError: - pass - - def kill_cleanup(self): - """ - Follow up ``kill`` to get to a clean unmounted state. - """ - self.umount() - self.cleanup() - - def teardown(self): - """ - Whatever the state of the mount, get it gone. - """ - super(FuseMount, self).teardown() - - self.umount() - - if self.fuse_daemon and not self.fuse_daemon.finished: - self.fuse_daemon.stdin.close() - try: - self.fuse_daemon.wait() - except CommandFailedError: - pass - - # Indiscriminate, unlike the touchier cleanup() - self.client_remote.run( - args=[ - 'rm', - '-rf', - self.mountpoint, - ], - ) - - def _asok_path(self): - return "/var/run/ceph/ceph-client.{0}.*.asok".format(self.client_id) - - @property - def _prefix(self): - return "" - - def admin_socket(self, args): - pyscript = """ -import glob -import re -import os -import subprocess - -def find_socket(client_name): - asok_path = "{asok_path}" - files = glob.glob(asok_path) - - # Given a non-glob path, it better be there - if "*" not in asok_path: - assert(len(files) == 1) - return files[0] - - for f in files: - pid = re.match(".*\.(\d+)\.asok$", f).group(1) - if os.path.exists("/proc/{{0}}".format(pid)): - return f - raise RuntimeError("Client socket {{0}} not found".format(client_name)) - -print find_socket("{client_name}") -""".format( - asok_path=self._asok_path(), - client_name="client.{0}".format(self.client_id)) - - # Find the admin socket - p = self.client_remote.run(args=[ - 'python', '-c', pyscript - ], stdout=StringIO()) - asok_path = p.stdout.getvalue().strip() - log.info("Found client admin socket at {0}".format(asok_path)) - - # Query client ID from admin socket - p = self.client_remote.run( - args=['sudo', self._prefix + 'ceph', '--admin-daemon', asok_path] + args, - stdout=StringIO()) - return json.loads(p.stdout.getvalue()) - - def get_global_id(self): - """ - Look up the CephFS client ID for this mount - """ - - return self.admin_socket(['mds_sessions'])['id'] - - def get_osd_epoch(self): - """ - Return 2-tuple of osd_epoch, osd_epoch_barrier - """ - status = self.admin_socket(['status']) - return status['osd_epoch'], status['osd_epoch_barrier'] - - def get_dentry_count(self): - """ - Return 2-tuple of dentry_count, dentry_pinned_count - """ - status = self.admin_socket(['status']) - return status['dentry_count'], status['dentry_pinned_count'] - - def set_cache_size(self, size): - return self.admin_socket(['config', 'set', 'client_cache_size', str(size)]) diff --git a/src/ceph/qa/tasks/cephfs/kernel_mount.py b/src/ceph/qa/tasks/cephfs/kernel_mount.py deleted file mode 100644 index bfa1ac6..0000000 --- a/src/ceph/qa/tasks/cephfs/kernel_mount.py +++ /dev/null @@ -1,267 +0,0 @@ -from StringIO import StringIO -import json -import logging -from textwrap import dedent -from teuthology.orchestra.run import CommandFailedError -from teuthology import misc - -from teuthology.orchestra import remote as orchestra_remote -from teuthology.orchestra import run -from teuthology.contextutil import MaxWhileTries -from .mount import CephFSMount - -log = logging.getLogger(__name__) - - -UMOUNT_TIMEOUT = 300 - - -class KernelMount(CephFSMount): - def __init__(self, mons, test_dir, client_id, client_remote, - ipmi_user, ipmi_password, ipmi_domain): - super(KernelMount, self).__init__(test_dir, client_id, client_remote) - self.mons = mons - - self.mounted = False - self.ipmi_user = ipmi_user - self.ipmi_password = ipmi_password - self.ipmi_domain = ipmi_domain - - def write_secret_file(self, remote, role, keyring, filename): - """ - Stash the keyring in the filename specified. - """ - remote.run( - args=[ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=self.test_dir), - 'ceph-authtool', - '--name={role}'.format(role=role), - '--print-key', - keyring, - run.Raw('>'), - filename, - ], - ) - - def mount(self, mount_path=None, mount_fs_name=None): - log.info('Mounting kclient client.{id} at {remote} {mnt}...'.format( - id=self.client_id, remote=self.client_remote, mnt=self.mountpoint)) - - keyring = self.get_keyring_path() - secret = '{tdir}/ceph.data/client.{id}.secret'.format(tdir=self.test_dir, id=self.client_id) - self.write_secret_file(self.client_remote, 'client.{id}'.format(id=self.client_id), - keyring, secret) - - self.client_remote.run( - args=[ - 'mkdir', - '--', - self.mountpoint, - ], - ) - - if mount_path is None: - mount_path = "/" - - opts = 'name={id},secretfile={secret},norequire_active_mds'.format(id=self.client_id, - secret=secret) - - if mount_fs_name is not None: - opts += ",mds_namespace={0}".format(mount_fs_name) - - self.client_remote.run( - args=[ - 'sudo', - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=self.test_dir), - '/sbin/mount.ceph', - '{mons}:{mount_path}'.format(mons=','.join(self.mons), mount_path=mount_path), - self.mountpoint, - '-v', - '-o', - opts - ], - ) - - self.client_remote.run( - args=['sudo', 'chmod', '1777', self.mountpoint]) - - self.mounted = True - - def umount(self, force=False): - log.debug('Unmounting client client.{id}...'.format(id=self.client_id)) - - cmd=['sudo', 'umount', self.mountpoint] - if force: - cmd.append('-f') - - try: - self.client_remote.run(args=cmd) - except Exception as e: - self.client_remote.run(args=[ - 'sudo', - run.Raw('PATH=/usr/sbin:$PATH'), - 'lsof', - run.Raw(';'), - 'ps', 'auxf', - ]) - raise e - - rproc = self.client_remote.run( - args=[ - 'rmdir', - '--', - self.mountpoint, - ], - wait=False - ) - run.wait([rproc], UMOUNT_TIMEOUT) - self.mounted = False - - def cleanup(self): - pass - - def umount_wait(self, force=False, require_clean=False): - """ - Unlike the fuse client, the kernel client's umount is immediate - """ - if not self.is_mounted(): - return - - try: - self.umount(force) - except (CommandFailedError, MaxWhileTries): - if not force: - raise - - self.kill() - self.kill_cleanup() - - self.mounted = False - - def is_mounted(self): - return self.mounted - - def wait_until_mounted(self): - """ - Unlike the fuse client, the kernel client is up and running as soon - as the initial mount() function returns. - """ - assert self.mounted - - def teardown(self): - super(KernelMount, self).teardown() - if self.mounted: - self.umount() - - def kill(self): - """ - The Ceph kernel client doesn't have a mechanism to kill itself (doing - that in side the kernel would be weird anyway), so we reboot the whole node - to get the same effect. - - We use IPMI to reboot, because we don't want the client to send any - releases of capabilities. - """ - - con = orchestra_remote.getRemoteConsole(self.client_remote.hostname, - self.ipmi_user, - self.ipmi_password, - self.ipmi_domain) - con.power_off() - - self.mounted = False - - def kill_cleanup(self): - assert not self.mounted - - con = orchestra_remote.getRemoteConsole(self.client_remote.hostname, - self.ipmi_user, - self.ipmi_password, - self.ipmi_domain) - con.power_on() - - # Wait for node to come back up after reboot - misc.reconnect(None, 300, [self.client_remote]) - - # Remove mount directory - self.client_remote.run( - args=[ - 'rmdir', - '--', - self.mountpoint, - ], - ) - - def _find_debug_dir(self): - """ - Find the debugfs folder for this mount - """ - pyscript = dedent(""" - import glob - import os - import json - - def get_id_to_dir(): - result = {} - for dir in glob.glob("/sys/kernel/debug/ceph/*"): - mds_sessions_lines = open(os.path.join(dir, "mds_sessions")).readlines() - client_id = mds_sessions_lines[1].split()[1].strip('"') - - result[client_id] = dir - return result - - print json.dumps(get_id_to_dir()) - """) - - p = self.client_remote.run(args=[ - 'sudo', 'python', '-c', pyscript - ], stdout=StringIO()) - client_id_to_dir = json.loads(p.stdout.getvalue()) - - try: - return client_id_to_dir[self.client_id] - except KeyError: - log.error("Client id '{0}' debug dir not found (clients seen were: {1})".format( - self.client_id, ",".join(client_id_to_dir.keys()) - )) - raise - - def _read_debug_file(self, filename): - debug_dir = self._find_debug_dir() - - pyscript = dedent(""" - import os - - print open(os.path.join("{debug_dir}", "{filename}")).read() - """).format(debug_dir=debug_dir, filename=filename) - - p = self.client_remote.run(args=[ - 'sudo', 'python', '-c', pyscript - ], stdout=StringIO()) - return p.stdout.getvalue() - - def get_global_id(self): - """ - Look up the CephFS client ID for this mount, using debugfs. - """ - - assert self.mounted - - mds_sessions = self._read_debug_file("mds_sessions") - lines = mds_sessions.split("\n") - return int(lines[0].split()[1]) - - def get_osd_epoch(self): - """ - Return 2-tuple of osd_epoch, osd_epoch_barrier - """ - osd_map = self._read_debug_file("osdmap") - lines = osd_map.split("\n") - first_line_tokens = lines[0].split() - epoch, barrier = int(first_line_tokens[1]), int(first_line_tokens[3]) - - return epoch, barrier diff --git a/src/ceph/qa/tasks/cephfs/mount.py b/src/ceph/qa/tasks/cephfs/mount.py deleted file mode 100644 index 4f96e6c..0000000 --- a/src/ceph/qa/tasks/cephfs/mount.py +++ /dev/null @@ -1,627 +0,0 @@ -from contextlib import contextmanager -import json -import logging -import datetime -import time -from textwrap import dedent -import os -from StringIO import StringIO -from teuthology.orchestra import run -from teuthology.orchestra.run import CommandFailedError, ConnectionLostError - -log = logging.getLogger(__name__) - - -class CephFSMount(object): - def __init__(self, test_dir, client_id, client_remote): - """ - :param test_dir: Global teuthology test dir - :param client_id: Client ID, the 'foo' in client.foo - :param client_remote: Remote instance for the host where client will run - """ - - self.test_dir = test_dir - self.client_id = client_id - self.client_remote = client_remote - self.mountpoint_dir_name = 'mnt.{id}'.format(id=self.client_id) - - self.test_files = ['a', 'b', 'c'] - - self.background_procs = [] - - @property - def mountpoint(self): - return os.path.join( - self.test_dir, '{dir_name}'.format(dir_name=self.mountpoint_dir_name)) - - def is_mounted(self): - raise NotImplementedError() - - def mount(self, mount_path=None, mount_fs_name=None): - raise NotImplementedError() - - def umount(self): - raise NotImplementedError() - - def umount_wait(self, force=False, require_clean=False): - """ - - :param force: Expect that the mount will not shutdown cleanly: kill - it hard. - :param require_clean: Wait for the Ceph client associated with the - mount (e.g. ceph-fuse) to terminate, and - raise if it doesn't do so cleanly. - :return: - """ - raise NotImplementedError() - - def kill_cleanup(self): - raise NotImplementedError() - - def kill(self): - raise NotImplementedError() - - def cleanup(self): - raise NotImplementedError() - - def wait_until_mounted(self): - raise NotImplementedError() - - def get_keyring_path(self): - return '/etc/ceph/ceph.client.{id}.keyring'.format(id=self.client_id) - - @property - def config_path(self): - """ - Path to ceph.conf: override this if you're not a normal systemwide ceph install - :return: stringv - """ - return "/etc/ceph/ceph.conf" - - @contextmanager - def mounted(self): - """ - A context manager, from an initially unmounted state, to mount - this, yield, and then unmount and clean up. - """ - self.mount() - self.wait_until_mounted() - try: - yield - finally: - self.umount_wait() - - def create_files(self): - assert(self.is_mounted()) - - for suffix in self.test_files: - log.info("Creating file {0}".format(suffix)) - self.client_remote.run(args=[ - 'sudo', 'touch', os.path.join(self.mountpoint, suffix) - ]) - - def check_files(self): - assert(self.is_mounted()) - - for suffix in self.test_files: - log.info("Checking file {0}".format(suffix)) - r = self.client_remote.run(args=[ - 'sudo', 'ls', os.path.join(self.mountpoint, suffix) - ], check_status=False) - if r.exitstatus != 0: - raise RuntimeError("Expected file {0} not found".format(suffix)) - - def create_destroy(self): - assert(self.is_mounted()) - - filename = "{0} {1}".format(datetime.datetime.now(), self.client_id) - log.debug("Creating test file {0}".format(filename)) - self.client_remote.run(args=[ - 'sudo', 'touch', os.path.join(self.mountpoint, filename) - ]) - log.debug("Deleting test file {0}".format(filename)) - self.client_remote.run(args=[ - 'sudo', 'rm', '-f', os.path.join(self.mountpoint, filename) - ]) - - def _run_python(self, pyscript): - return self.client_remote.run(args=[ - 'sudo', 'adjust-ulimits', 'daemon-helper', 'kill', 'python', '-c', pyscript - ], wait=False, stdin=run.PIPE, stdout=StringIO()) - - def run_python(self, pyscript): - p = self._run_python(pyscript) - p.wait() - return p.stdout.getvalue().strip() - - def run_shell(self, args, wait=True): - args = ["cd", self.mountpoint, run.Raw('&&'), "sudo"] + args - return self.client_remote.run(args=args, stdout=StringIO(), - stderr=StringIO(), wait=wait) - - def open_no_data(self, basename): - """ - A pure metadata operation - """ - assert(self.is_mounted()) - - path = os.path.join(self.mountpoint, basename) - - p = self._run_python(dedent( - """ - f = open("{path}", 'w') - """.format(path=path) - )) - p.wait() - - def open_background(self, basename="background_file"): - """ - Open a file for writing, then block such that the client - will hold a capability. - - Don't return until the remote process has got as far as opening - the file, then return the RemoteProcess instance. - """ - assert(self.is_mounted()) - - path = os.path.join(self.mountpoint, basename) - - pyscript = dedent(""" - import time - - f = open("{path}", 'w') - f.write('content') - f.flush() - f.write('content2') - while True: - time.sleep(1) - """).format(path=path) - - rproc = self._run_python(pyscript) - self.background_procs.append(rproc) - - # This wait would not be sufficient if the file had already - # existed, but it's simple and in practice users of open_background - # are not using it on existing files. - self.wait_for_visible(basename) - - return rproc - - def wait_for_visible(self, basename="background_file", timeout=30): - i = 0 - while i < timeout: - r = self.client_remote.run(args=[ - 'sudo', 'ls', os.path.join(self.mountpoint, basename) - ], check_status=False) - if r.exitstatus == 0: - log.debug("File {0} became visible from {1} after {2}s".format( - basename, self.client_id, i)) - return - else: - time.sleep(1) - i += 1 - - raise RuntimeError("Timed out after {0}s waiting for {1} to become visible from {2}".format( - i, basename, self.client_id)) - - def lock_background(self, basename="background_file", do_flock=True): - """ - Open and lock a files for writing, hold the lock in a background process - """ - assert(self.is_mounted()) - - path = os.path.join(self.mountpoint, basename) - - script_builder = """ - import time - import fcntl - import struct""" - if do_flock: - script_builder += """ - f1 = open("{path}-1", 'w') - fcntl.flock(f1, fcntl.LOCK_EX | fcntl.LOCK_NB)""" - script_builder += """ - f2 = open("{path}-2", 'w') - lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 0, 0, 0) - fcntl.fcntl(f2, fcntl.F_SETLK, lockdata) - while True: - time.sleep(1) - """ - - pyscript = dedent(script_builder).format(path=path) - - log.info("lock_background file {0}".format(basename)) - rproc = self._run_python(pyscript) - self.background_procs.append(rproc) - return rproc - - def lock_and_release(self, basename="background_file"): - assert(self.is_mounted()) - - path = os.path.join(self.mountpoint, basename) - - script = """ - import time - import fcntl - import struct - f1 = open("{path}-1", 'w') - fcntl.flock(f1, fcntl.LOCK_EX) - f2 = open("{path}-2", 'w') - lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 0, 0, 0) - fcntl.fcntl(f2, fcntl.F_SETLK, lockdata) - """ - pyscript = dedent(script).format(path=path) - - log.info("lock_and_release file {0}".format(basename)) - return self._run_python(pyscript) - - def check_filelock(self, basename="background_file", do_flock=True): - assert(self.is_mounted()) - - path = os.path.join(self.mountpoint, basename) - - script_builder = """ - import fcntl - import errno - import struct""" - if do_flock: - script_builder += """ - f1 = open("{path}-1", 'r') - try: - fcntl.flock(f1, fcntl.LOCK_EX | fcntl.LOCK_NB) - except IOError, e: - if e.errno == errno.EAGAIN: - pass - else: - raise RuntimeError("flock on file {path}-1 not found")""" - script_builder += """ - f2 = open("{path}-2", 'r') - try: - lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 0, 0, 0) - fcntl.fcntl(f2, fcntl.F_SETLK, lockdata) - except IOError, e: - if e.errno == errno.EAGAIN: - pass - else: - raise RuntimeError("posix lock on file {path}-2 not found") - """ - pyscript = dedent(script_builder).format(path=path) - - log.info("check lock on file {0}".format(basename)) - self.client_remote.run(args=[ - 'sudo', 'python', '-c', pyscript - ]) - - def write_background(self, basename="background_file", loop=False): - """ - Open a file for writing, complete as soon as you can - :param basename: - :return: - """ - assert(self.is_mounted()) - - path = os.path.join(self.mountpoint, basename) - - pyscript = dedent(""" - import os - import time - - fd = os.open("{path}", os.O_RDWR | os.O_CREAT, 0644) - try: - while True: - os.write(fd, 'content') - time.sleep(1) - if not {loop}: - break - except IOError, e: - pass - os.close(fd) - """).format(path=path, loop=str(loop)) - - rproc = self._run_python(pyscript) - self.background_procs.append(rproc) - return rproc - - def write_n_mb(self, filename, n_mb, seek=0, wait=True): - """ - Write the requested number of megabytes to a file - """ - assert(self.is_mounted()) - - return self.run_shell(["dd", "if=/dev/urandom", "of={0}".format(filename), - "bs=1M", "conv=fdatasync", - "count={0}".format(n_mb), - "seek={0}".format(seek) - ], wait=wait) - - def write_test_pattern(self, filename, size): - log.info("Writing {0} bytes to {1}".format(size, filename)) - return self.run_python(dedent(""" - import zlib - path = "{path}" - f = open(path, 'w') - for i in range(0, {size}): - val = zlib.crc32("%s" % i) & 7 - f.write(chr(val)) - f.close() - """.format( - path=os.path.join(self.mountpoint, filename), - size=size - ))) - - def validate_test_pattern(self, filename, size): - log.info("Validating {0} bytes from {1}".format(size, filename)) - return self.run_python(dedent(""" - import zlib - path = "{path}" - f = open(path, 'r') - bytes = f.read() - f.close() - if len(bytes) != {size}: - raise RuntimeError("Bad length {{0}} vs. expected {{1}}".format( - len(bytes), {size} - )) - for i, b in enumerate(bytes): - val = zlib.crc32("%s" % i) & 7 - if b != chr(val): - raise RuntimeError("Bad data at offset {{0}}".format(i)) - """.format( - path=os.path.join(self.mountpoint, filename), - size=size - ))) - - def open_n_background(self, fs_path, count): - """ - Open N files for writing, hold them open in a background process - - :param fs_path: Path relative to CephFS root, e.g. "foo/bar" - :return: a RemoteProcess - """ - assert(self.is_mounted()) - - abs_path = os.path.join(self.mountpoint, fs_path) - - pyscript = dedent(""" - import sys - import time - import os - - n = {count} - abs_path = "{abs_path}" - - if not os.path.exists(os.path.dirname(abs_path)): - os.makedirs(os.path.dirname(abs_path)) - - handles = [] - for i in range(0, n): - fname = "{{0}}_{{1}}".format(abs_path, i) - handles.append(open(fname, 'w')) - - while True: - time.sleep(1) - """).format(abs_path=abs_path, count=count) - - rproc = self._run_python(pyscript) - self.background_procs.append(rproc) - return rproc - - def create_n_files(self, fs_path, count, sync=False): - assert(self.is_mounted()) - - abs_path = os.path.join(self.mountpoint, fs_path) - - pyscript = dedent(""" - import sys - import time - import os - - n = {count} - abs_path = "{abs_path}" - - if not os.path.exists(os.path.dirname(abs_path)): - os.makedirs(os.path.dirname(abs_path)) - - for i in range(0, n): - fname = "{{0}}_{{1}}".format(abs_path, i) - h = open(fname, 'w') - h.write('content') - if {sync}: - h.flush() - os.fsync(h.fileno()) - h.close() - """).format(abs_path=abs_path, count=count, sync=str(sync)) - - self.run_python(pyscript) - - def teardown(self): - for p in self.background_procs: - log.info("Terminating background process") - self._kill_background(p) - - self.background_procs = [] - - def _kill_background(self, p): - if p.stdin: - p.stdin.close() - try: - p.wait() - except (CommandFailedError, ConnectionLostError): - pass - - def kill_background(self, p): - """ - For a process that was returned by one of the _background member functions, - kill it hard. - """ - self._kill_background(p) - self.background_procs.remove(p) - - def get_global_id(self): - raise NotImplementedError() - - def get_osd_epoch(self): - raise NotImplementedError() - - def stat(self, fs_path, wait=True): - """ - stat a file, and return the result as a dictionary like this: - { - "st_ctime": 1414161137.0, - "st_mtime": 1414161137.0, - "st_nlink": 33, - "st_gid": 0, - "st_dev": 16777218, - "st_size": 1190, - "st_ino": 2, - "st_uid": 0, - "st_mode": 16877, - "st_atime": 1431520593.0 - } - - Raises exception on absent file. - """ - abs_path = os.path.join(self.mountpoint, fs_path) - - pyscript = dedent(""" - import os - import stat - import json - import sys - - try: - s = os.stat("{path}") - except OSError as e: - sys.exit(e.errno) - - attrs = ["st_mode", "st_ino", "st_dev", "st_nlink", "st_uid", "st_gid", "st_size", "st_atime", "st_mtime", "st_ctime"] - print json.dumps( - dict([(a, getattr(s, a)) for a in attrs]), - indent=2) - """).format(path=abs_path) - proc = self._run_python(pyscript) - if wait: - proc.wait() - return json.loads(proc.stdout.getvalue().strip()) - else: - return proc - - def touch(self, fs_path): - """ - Create a dentry if it doesn't already exist. This python - implementation exists because the usual command line tool doesn't - pass through error codes like EIO. - - :param fs_path: - :return: - """ - abs_path = os.path.join(self.mountpoint, fs_path) - pyscript = dedent(""" - import sys - import errno - - try: - f = open("{path}", "w") - f.close() - except IOError as e: - sys.exit(errno.EIO) - """).format(path=abs_path) - proc = self._run_python(pyscript) - proc.wait() - - def path_to_ino(self, fs_path, follow_symlinks=True): - abs_path = os.path.join(self.mountpoint, fs_path) - - if follow_symlinks: - pyscript = dedent(""" - import os - import stat - - print os.stat("{path}").st_ino - """).format(path=abs_path) - else: - pyscript = dedent(""" - import os - import stat - - print os.lstat("{path}").st_ino - """).format(path=abs_path) - - proc = self._run_python(pyscript) - proc.wait() - return int(proc.stdout.getvalue().strip()) - - def path_to_nlink(self, fs_path): - abs_path = os.path.join(self.mountpoint, fs_path) - - pyscript = dedent(""" - import os - import stat - - print os.stat("{path}").st_nlink - """).format(path=abs_path) - - proc = self._run_python(pyscript) - proc.wait() - return int(proc.stdout.getvalue().strip()) - - def ls(self, path=None): - """ - Wrap ls: return a list of strings - """ - cmd = ["ls"] - if path: - cmd.append(path) - - ls_text = self.run_shell(cmd).stdout.getvalue().strip() - - if ls_text: - return ls_text.split("\n") - else: - # Special case because otherwise split on empty string - # gives you [''] instead of [] - return [] - - def setfattr(self, path, key, val): - """ - Wrap setfattr. - - :param path: relative to mount point - :param key: xattr name - :param val: xattr value - :return: None - """ - self.run_shell(["setfattr", "-n", key, "-v", val, path]) - - def getfattr(self, path, attr): - """ - Wrap getfattr: return the values of a named xattr on one file, or - None if the attribute is not found. - - :return: a string - """ - p = self.run_shell(["getfattr", "--only-values", "-n", attr, path], wait=False) - try: - p.wait() - except CommandFailedError as e: - if e.exitstatus == 1 and "No such attribute" in p.stderr.getvalue(): - return None - else: - raise - - return p.stdout.getvalue() - - def df(self): - """ - Wrap df: return a dict of usage fields in bytes - """ - - p = self.run_shell(["df", "-B1", "."]) - lines = p.stdout.getvalue().strip().split("\n") - fs, total, used, avail = lines[1].split()[:4] - log.warn(lines) - - return { - "total": int(total), - "used": int(used), - "available": int(avail) - } diff --git a/src/ceph/qa/tasks/cephfs/test_auto_repair.py b/src/ceph/qa/tasks/cephfs/test_auto_repair.py deleted file mode 100644 index c0aa2e4..0000000 --- a/src/ceph/qa/tasks/cephfs/test_auto_repair.py +++ /dev/null @@ -1,90 +0,0 @@ - -""" -Exercise the MDS's auto repair functions -""" - -import logging -import time - -from teuthology.orchestra.run import CommandFailedError -from tasks.cephfs.cephfs_test_case import CephFSTestCase - - -log = logging.getLogger(__name__) - - -# Arbitrary timeouts for operations involving restarting -# an MDS or waiting for it to come up -MDS_RESTART_GRACE = 60 - - -class TestMDSAutoRepair(CephFSTestCase): - def test_backtrace_repair(self): - """ - MDS should verify/fix backtrace on fetch dirfrag - """ - - self.mount_a.run_shell(["mkdir", "testdir1"]) - self.mount_a.run_shell(["touch", "testdir1/testfile"]) - dir_objname = "{:x}.00000000".format(self.mount_a.path_to_ino("testdir1")) - - # drop inodes caps - self.mount_a.umount_wait() - - # flush journal entries to dirfrag objects, and expire journal - self.fs.mds_asok(['flush', 'journal']) - - # Restart the MDS to drop the metadata cache (because we expired the journal, - # nothing gets replayed into cache on restart) - self.fs.mds_stop() - self.fs.mds_fail_restart() - self.fs.wait_for_daemons() - - # remove testdir1's backtrace - self.fs.rados(["rmxattr", dir_objname, "parent"]) - - # readdir (fetch dirfrag) should fix testdir1's backtrace - self.mount_a.mount() - self.mount_a.wait_until_mounted() - self.mount_a.run_shell(["ls", "testdir1"]) - - # flush journal entries to dirfrag objects - self.fs.mds_asok(['flush', 'journal']) - - # check if backtrace exists - self.fs.rados(["getxattr", dir_objname, "parent"]) - - def test_mds_readonly(self): - """ - test if MDS behave correct when it's readonly - """ - # operation should successd when MDS is not readonly - self.mount_a.run_shell(["touch", "test_file1"]) - writer = self.mount_a.write_background(loop=True) - - time.sleep(10) - self.assertFalse(writer.finished) - - # force MDS to read-only mode - self.fs.mds_asok(['force_readonly']) - time.sleep(10) - - # touching test file should fail - try: - self.mount_a.run_shell(["touch", "test_file1"]) - except CommandFailedError: - pass - else: - self.assertTrue(False) - - # background writer also should fail - self.assertTrue(writer.finished) - - # The MDS should report its readonly health state to the mon - self.wait_for_health("MDS_READ_ONLY", timeout=30) - - # restart mds to make it writable - self.fs.mds_fail_restart() - self.fs.wait_for_daemons() - - self.wait_for_health_clear(timeout=30) diff --git a/src/ceph/qa/tasks/cephfs/test_backtrace.py b/src/ceph/qa/tasks/cephfs/test_backtrace.py deleted file mode 100644 index af246a1..0000000 --- a/src/ceph/qa/tasks/cephfs/test_backtrace.py +++ /dev/null @@ -1,78 +0,0 @@ - -from tasks.cephfs.cephfs_test_case import CephFSTestCase - - -class TestBacktrace(CephFSTestCase): - def test_backtrace(self): - """ - That the 'parent' and 'layout' xattrs on the head objects of files - are updated correctly. - """ - - old_data_pool_name = self.fs.get_data_pool_name() - old_pool_id = self.fs.get_data_pool_id() - - # Create a file for subsequent checks - self.mount_a.run_shell(["mkdir", "parent_a"]) - self.mount_a.run_shell(["touch", "parent_a/alpha"]) - file_ino = self.mount_a.path_to_ino("parent_a/alpha") - - # That backtrace and layout are written after initial flush - self.fs.mds_asok(["flush", "journal"]) - backtrace = self.fs.read_backtrace(file_ino) - self.assertEqual(['alpha', 'parent_a'], [a['dname'] for a in backtrace['ancestors']]) - layout = self.fs.read_layout(file_ino) - self.assertDictEqual(layout, { - "stripe_unit": 4194304, - "stripe_count": 1, - "object_size": 4194304, - "pool_id": old_pool_id, - "pool_ns": "", - }) - self.assertEqual(backtrace['pool'], old_pool_id) - - # That backtrace is written after parentage changes - self.mount_a.run_shell(["mkdir", "parent_b"]) - self.mount_a.run_shell(["mv", "parent_a/alpha", "parent_b/alpha"]) - - self.fs.mds_asok(["flush", "journal"]) - backtrace = self.fs.read_backtrace(file_ino) - self.assertEqual(['alpha', 'parent_b'], [a['dname'] for a in backtrace['ancestors']]) - - # Create a new data pool - new_pool_name = "data_new" - new_pool_id = self.fs.add_data_pool(new_pool_name) - - # That an object which has switched pools gets its backtrace updated - self.mount_a.setfattr("./parent_b/alpha", - "ceph.file.layout.pool", new_pool_name) - self.fs.mds_asok(["flush", "journal"]) - backtrace_old_pool = self.fs.read_backtrace(file_ino, pool=old_data_pool_name) - self.assertEqual(backtrace_old_pool['pool'], new_pool_id) - backtrace_new_pool = self.fs.read_backtrace(file_ino, pool=new_pool_name) - self.assertEqual(backtrace_new_pool['pool'], new_pool_id) - new_pool_layout = self.fs.read_layout(file_ino, pool=new_pool_name) - self.assertEqual(new_pool_layout['pool_id'], new_pool_id) - self.assertEqual(new_pool_layout['pool_ns'], '') - - # That subsequent linkage changes are only written to new pool backtrace - self.mount_a.run_shell(["mkdir", "parent_c"]) - self.mount_a.run_shell(["mv", "parent_b/alpha", "parent_c/alpha"]) - self.fs.mds_asok(["flush", "journal"]) - backtrace_old_pool = self.fs.read_backtrace(file_ino, pool=old_data_pool_name) - self.assertEqual(['alpha', 'parent_b'], [a['dname'] for a in backtrace_old_pool['ancestors']]) - backtrace_new_pool = self.fs.read_backtrace(file_ino, pool=new_pool_name) - self.assertEqual(['alpha', 'parent_c'], [a['dname'] for a in backtrace_new_pool['ancestors']]) - - # That layout is written to new pool after change to other field in layout - self.mount_a.setfattr("./parent_c/alpha", - "ceph.file.layout.object_size", "8388608") - - self.fs.mds_asok(["flush", "journal"]) - new_pool_layout = self.fs.read_layout(file_ino, pool=new_pool_name) - self.assertEqual(new_pool_layout['object_size'], 8388608) - - # ...but not to the old pool: the old pool's backtrace points to the new pool, and that's enough, - # we don't update the layout in all the old pools whenever it changes - old_pool_layout = self.fs.read_layout(file_ino, pool=old_data_pool_name) - self.assertEqual(old_pool_layout['object_size'], 4194304) diff --git a/src/ceph/qa/tasks/cephfs/test_cap_flush.py b/src/ceph/qa/tasks/cephfs/test_cap_flush.py deleted file mode 100644 index 1cd102f..0000000 --- a/src/ceph/qa/tasks/cephfs/test_cap_flush.py +++ /dev/null @@ -1,64 +0,0 @@ - -import os -import time -from textwrap import dedent -from unittest import SkipTest -from tasks.cephfs.fuse_mount import FuseMount -from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology - -class TestCapFlush(CephFSTestCase): - @for_teuthology - def test_replay_create(self): - """ - MDS starts to handle client caps when it enters clientreplay stage. - When handling a client cap in clientreplay stage, it's possible that - corresponding inode does not exist because the client request which - creates inode hasn't been replayed. - """ - - if not isinstance(self.mount_a, FuseMount): - raise SkipTest("Require FUSE client to inject client release failure") - - dir_path = os.path.join(self.mount_a.mountpoint, "testdir") - py_script = dedent(""" - import os - os.mkdir("{0}") - fd = os.open("{0}", os.O_RDONLY) - os.fchmod(fd, 0777) - os.fsync(fd) - """).format(dir_path) - self.mount_a.run_python(py_script) - - self.fs.mds_asok(["flush", "journal"]) - - # client will only get unsafe replay - self.fs.mds_asok(["config", "set", "mds_log_pause", "1"]) - - file_name = "testfile" - file_path = dir_path + "/" + file_name - - # Create a file and modify its mode. ceph-fuse will mark Ax cap dirty - py_script = dedent(""" - import os - os.chdir("{0}") - os.setgid(65534) - os.setuid(65534) - fd = os.open("{1}", os.O_CREAT | os.O_RDWR, 0644) - os.fchmod(fd, 0640) - """).format(dir_path, file_name) - self.mount_a.run_python(py_script) - - # Modify file mode by different user. ceph-fuse will send a setattr request - self.mount_a.run_shell(["chmod", "600", file_path], wait=False) - - time.sleep(10) - - # Restart mds. Client will re-send the unsafe request and cap flush - self.fs.mds_stop() - self.fs.mds_fail_restart() - self.fs.wait_for_daemons() - - mode = self.mount_a.run_shell(['stat', '-c' '%a', file_path]).stdout.getvalue().strip() - # If the cap flush get dropped, mode should be 0644. - # (Ax cap stays in dirty state, which prevents setattr reply from updating file mode) - self.assertEqual(mode, "600") diff --git a/src/ceph/qa/tasks/cephfs/test_client_limits.py b/src/ceph/qa/tasks/cephfs/test_client_limits.py deleted file mode 100644 index cb5e3a4..0000000 --- a/src/ceph/qa/tasks/cephfs/test_client_limits.py +++ /dev/null @@ -1,239 +0,0 @@ - -""" -Exercise the MDS's behaviour when clients and the MDCache reach or -exceed the limits of how many caps/inodes they should hold. -""" - -import logging -from textwrap import dedent -from unittest import SkipTest -from teuthology.orchestra.run import CommandFailedError -from tasks.cephfs.cephfs_test_case import CephFSTestCase, needs_trimming -from tasks.cephfs.fuse_mount import FuseMount -import os - - -log = logging.getLogger(__name__) - - -# Arbitrary timeouts for operations involving restarting -# an MDS or waiting for it to come up -MDS_RESTART_GRACE = 60 - -# Hardcoded values from Server::recall_client_state -CAP_RECALL_RATIO = 0.8 -CAP_RECALL_MIN = 100 - - -class TestClientLimits(CephFSTestCase): - REQUIRE_KCLIENT_REMOTE = True - CLIENTS_REQUIRED = 2 - - def _test_client_pin(self, use_subdir, open_files): - """ - When a client pins an inode in its cache, for example because the file is held open, - it should reject requests from the MDS to trim these caps. The MDS should complain - to the user that it is unable to enforce its cache size limits because of this - objectionable client. - - :param use_subdir: whether to put test files in a subdir or use root - """ - - cache_size = open_files/2 - - self.set_conf('mds', 'mds cache size', cache_size) - self.fs.mds_fail_restart() - self.fs.wait_for_daemons() - - mds_min_caps_per_client = int(self.fs.get_config("mds_min_caps_per_client")) - self.assertTrue(open_files >= mds_min_caps_per_client) - mds_max_ratio_caps_per_client = float(self.fs.get_config("mds_max_ratio_caps_per_client")) - - mount_a_client_id = self.mount_a.get_global_id() - path = "subdir/mount_a" if use_subdir else "mount_a" - open_proc = self.mount_a.open_n_background(path, open_files) - - # Client should now hold: - # `open_files` caps for the open files - # 1 cap for root - # 1 cap for subdir - self.wait_until_equal(lambda: self.get_session(mount_a_client_id)['num_caps'], - open_files + (2 if use_subdir else 1), - timeout=600, - reject_fn=lambda x: x > open_files + 2) - - # MDS should not be happy about that, as the client is failing to comply - # with the SESSION_RECALL messages it is being sent - mds_recall_state_timeout = float(self.fs.get_config("mds_recall_state_timeout")) - self.wait_for_health("MDS_CLIENT_RECALL", mds_recall_state_timeout+10) - - # We can also test that the MDS health warning for oversized - # cache is functioning as intended. - self.wait_for_health("MDS_CACHE_OVERSIZED", - mds_recall_state_timeout + 10) - - # When the client closes the files, it should retain only as many caps as allowed - # under the SESSION_RECALL policy - log.info("Terminating process holding files open") - open_proc.stdin.close() - try: - open_proc.wait() - except CommandFailedError: - # We killed it, so it raises an error - pass - - # The remaining caps should comply with the numbers sent from MDS in SESSION_RECALL message, - # which depend on the caps outstanding, cache size and overall ratio - recall_expected_value = int((1.0-mds_max_ratio_caps_per_client)*(open_files+2)) - def expected_caps(): - num_caps = self.get_session(mount_a_client_id)['num_caps'] - if num_caps < mds_min_caps_per_client: - raise RuntimeError("client caps fell below min!") - elif num_caps == mds_min_caps_per_client: - return True - elif recall_expected_value*.95 <= num_caps <= recall_expected_value*1.05: - return True - else: - return False - - self.wait_until_true(expected_caps, timeout=60) - - @needs_trimming - def test_client_pin_root(self): - self._test_client_pin(False, 400) - - @needs_trimming - def test_client_pin(self): - self._test_client_pin(True, 800) - - @needs_trimming - def test_client_pin_mincaps(self): - self._test_client_pin(True, 200) - - def test_client_release_bug(self): - """ - When a client has a bug (which we will simulate) preventing it from releasing caps, - the MDS should notice that releases are not being sent promptly, and generate a health - metric to that effect. - """ - - # The debug hook to inject the failure only exists in the fuse client - if not isinstance(self.mount_a, FuseMount): - raise SkipTest("Require FUSE client to inject client release failure") - - self.set_conf('client.{0}'.format(self.mount_a.client_id), 'client inject release failure', 'true') - self.mount_a.teardown() - self.mount_a.mount() - self.mount_a.wait_until_mounted() - mount_a_client_id = self.mount_a.get_global_id() - - # Client A creates a file. He will hold the write caps on the file, and later (simulated bug) fail - # to comply with the MDSs request to release that cap - self.mount_a.run_shell(["touch", "file1"]) - - # Client B tries to stat the file that client A created - rproc = self.mount_b.write_background("file1") - - # After mds_revoke_cap_timeout, we should see a health warning (extra lag from - # MDS beacon period) - mds_revoke_cap_timeout = float(self.fs.get_config("mds_revoke_cap_timeout")) - self.wait_for_health("MDS_CLIENT_LATE_RELEASE", mds_revoke_cap_timeout + 10) - - # Client B should still be stuck - self.assertFalse(rproc.finished) - - # Kill client A - self.mount_a.kill() - self.mount_a.kill_cleanup() - - # Client B should complete - self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) - rproc.wait() - - def test_client_oldest_tid(self): - """ - When a client does not advance its oldest tid, the MDS should notice that - and generate health warnings. - """ - - # num of requests client issues - max_requests = 1000 - - # The debug hook to inject the failure only exists in the fuse client - if not isinstance(self.mount_a, FuseMount): - raise SkipTest("Require FUSE client to inject client release failure") - - self.set_conf('client', 'client inject fixed oldest tid', 'true') - self.mount_a.teardown() - self.mount_a.mount() - self.mount_a.wait_until_mounted() - - self.fs.mds_asok(['config', 'set', 'mds_max_completed_requests', '{0}'.format(max_requests)]) - - # Create lots of files - self.mount_a.create_n_files("testdir/file1", max_requests + 100) - - # Create a few files synchronously. This makes sure previous requests are completed - self.mount_a.create_n_files("testdir/file2", 5, True) - - # Wait for the health warnings. Assume mds can handle 10 request per second at least - self.wait_for_health("MDS_CLIENT_OLDEST_TID", max_requests / 10) - - def _test_client_cache_size(self, mount_subdir): - """ - check if client invalidate kernel dcache according to its cache size config - """ - - # The debug hook to inject the failure only exists in the fuse client - if not isinstance(self.mount_a, FuseMount): - raise SkipTest("Require FUSE client to inject client release failure") - - if mount_subdir: - # fuse assigns a fix inode number (1) to root inode. But in mounting into - # subdir case, the actual inode number of root is not 1. This mismatch - # confuses fuse_lowlevel_notify_inval_entry() when invalidating dentries - # in root directory. - self.mount_a.run_shell(["mkdir", "subdir"]) - self.mount_a.umount_wait() - self.set_conf('client', 'client mountpoint', '/subdir') - self.mount_a.mount() - self.mount_a.wait_until_mounted() - root_ino = self.mount_a.path_to_ino(".") - self.assertEqual(root_ino, 1); - - dir_path = os.path.join(self.mount_a.mountpoint, "testdir") - - mkdir_script = dedent(""" - import os - os.mkdir("{path}") - for n in range(0, {num_dirs}): - os.mkdir("{path}/dir{{0}}".format(n)) - """) - - num_dirs = 1000 - self.mount_a.run_python(mkdir_script.format(path=dir_path, num_dirs=num_dirs)) - self.mount_a.run_shell(["sync"]) - - dentry_count, dentry_pinned_count = self.mount_a.get_dentry_count() - self.assertGreaterEqual(dentry_count, num_dirs) - self.assertGreaterEqual(dentry_pinned_count, num_dirs) - - cache_size = num_dirs / 10 - self.mount_a.set_cache_size(cache_size) - - def trimmed(): - dentry_count, dentry_pinned_count = self.mount_a.get_dentry_count() - log.info("waiting, dentry_count, dentry_pinned_count: {0}, {1}".format( - dentry_count, dentry_pinned_count - )) - if dentry_count > cache_size or dentry_pinned_count > cache_size: - return False - - return True - - self.wait_until_true(trimmed, 30) - - @needs_trimming - def test_client_cache_size(self): - self._test_client_cache_size(False) - self._test_client_cache_size(True) diff --git a/src/ceph/qa/tasks/cephfs/test_client_recovery.py b/src/ceph/qa/tasks/cephfs/test_client_recovery.py deleted file mode 100644 index fd58c14..0000000 --- a/src/ceph/qa/tasks/cephfs/test_client_recovery.py +++ /dev/null @@ -1,474 +0,0 @@ - -""" -Teuthology task for exercising CephFS client recovery -""" - -import logging -from textwrap import dedent -import time -import distutils.version as version -import re -import os - -from teuthology.orchestra.run import CommandFailedError, ConnectionLostError -from tasks.cephfs.cephfs_test_case import CephFSTestCase -from teuthology.packaging import get_package_version - - -log = logging.getLogger(__name__) - - -# Arbitrary timeouts for operations involving restarting -# an MDS or waiting for it to come up -MDS_RESTART_GRACE = 60 - - -class TestClientNetworkRecovery(CephFSTestCase): - REQUIRE_KCLIENT_REMOTE = True - REQUIRE_ONE_CLIENT_REMOTE = True - CLIENTS_REQUIRED = 2 - - LOAD_SETTINGS = ["mds_session_timeout", "mds_reconnect_timeout", "ms_max_backoff"] - - # Environment references - mds_session_timeout = None - mds_reconnect_timeout = None - ms_max_backoff = None - - def test_network_death(self): - """ - Simulate software freeze or temporary network failure. - - Check that the client blocks I/O during failure, and completes - I/O after failure. - """ - - # We only need one client - self.mount_b.umount_wait() - - # Initially our one client session should be visible - client_id = self.mount_a.get_global_id() - ls_data = self._session_list() - self.assert_session_count(1, ls_data) - self.assertEqual(ls_data[0]['id'], client_id) - self.assert_session_state(client_id, "open") - - # ...and capable of doing I/O without blocking - self.mount_a.create_files() - - # ...but if we turn off the network - self.fs.set_clients_block(True) - - # ...and try and start an I/O - write_blocked = self.mount_a.write_background() - - # ...then it should block - self.assertFalse(write_blocked.finished) - self.assert_session_state(client_id, "open") - time.sleep(self.mds_session_timeout * 1.5) # Long enough for MDS to consider session stale - self.assertFalse(write_blocked.finished) - self.assert_session_state(client_id, "stale") - - # ...until we re-enable I/O - self.fs.set_clients_block(False) - - # ...when it should complete promptly - a = time.time() - self.wait_until_true(lambda: write_blocked.finished, self.ms_max_backoff * 2) - write_blocked.wait() # Already know we're finished, wait() to raise exception on errors - recovery_time = time.time() - a - log.info("recovery time: {0}".format(recovery_time)) - self.assert_session_state(client_id, "open") - - -class TestClientRecovery(CephFSTestCase): - REQUIRE_KCLIENT_REMOTE = True - CLIENTS_REQUIRED = 2 - - LOAD_SETTINGS = ["mds_session_timeout", "mds_reconnect_timeout", "ms_max_backoff"] - - # Environment references - mds_session_timeout = None - mds_reconnect_timeout = None - ms_max_backoff = None - - def test_basic(self): - # Check that two clients come up healthy and see each others' files - # ===================================================== - self.mount_a.create_files() - self.mount_a.check_files() - self.mount_a.umount_wait() - - self.mount_b.check_files() - - self.mount_a.mount() - self.mount_a.wait_until_mounted() - - # Check that the admin socket interface is correctly reporting - # two sessions - # ===================================================== - ls_data = self._session_list() - self.assert_session_count(2, ls_data) - - self.assertSetEqual( - set([l['id'] for l in ls_data]), - {self.mount_a.get_global_id(), self.mount_b.get_global_id()} - ) - - def test_restart(self): - # Check that after an MDS restart both clients reconnect and continue - # to handle I/O - # ===================================================== - self.fs.mds_fail_restart() - self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE) - - self.mount_a.create_destroy() - self.mount_b.create_destroy() - - def _session_num_caps(self, client_id): - ls_data = self.fs.mds_asok(['session', 'ls']) - return int(self._session_by_id(ls_data).get(client_id, {'num_caps': None})['num_caps']) - - def test_reconnect_timeout(self): - # Reconnect timeout - # ================= - # Check that if I stop an MDS and a client goes away, the MDS waits - # for the reconnect period - self.fs.mds_stop() - self.fs.mds_fail() - - mount_a_client_id = self.mount_a.get_global_id() - self.mount_a.umount_wait(force=True) - - self.fs.mds_restart() - - self.fs.wait_for_state('up:reconnect', reject='up:active', timeout=MDS_RESTART_GRACE) - # Check that the MDS locally reports its state correctly - status = self.fs.mds_asok(['status']) - self.assertIn("reconnect_status", status) - - ls_data = self._session_list() - self.assert_session_count(2, ls_data) - - # The session for the dead client should have the 'reconnect' flag set - self.assertTrue(self.get_session(mount_a_client_id)['reconnecting']) - - # Wait for the reconnect state to clear, this should take the - # reconnect timeout period. - in_reconnect_for = self.fs.wait_for_state('up:active', timeout=self.mds_reconnect_timeout * 2) - # Check that the period we waited to enter active is within a factor - # of two of the reconnect timeout. - self.assertGreater(in_reconnect_for, self.mds_reconnect_timeout / 2, - "Should have been in reconnect phase for {0} but only took {1}".format( - self.mds_reconnect_timeout, in_reconnect_for - )) - - self.assert_session_count(1) - - # Check that the client that timed out during reconnect can - # mount again and do I/O - self.mount_a.mount() - self.mount_a.wait_until_mounted() - self.mount_a.create_destroy() - - self.assert_session_count(2) - - def test_reconnect_eviction(self): - # Eviction during reconnect - # ========================= - mount_a_client_id = self.mount_a.get_global_id() - - self.fs.mds_stop() - self.fs.mds_fail() - - # The mount goes away while the MDS is offline - self.mount_a.kill() - - self.fs.mds_restart() - - # Enter reconnect phase - self.fs.wait_for_state('up:reconnect', reject='up:active', timeout=MDS_RESTART_GRACE) - self.assert_session_count(2) - - # Evict the stuck client - self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) - self.assert_session_count(1) - - # Observe that we proceed to active phase without waiting full reconnect timeout - evict_til_active = self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE) - # Once we evict the troublemaker, the reconnect phase should complete - # in well under the reconnect timeout. - self.assertLess(evict_til_active, self.mds_reconnect_timeout * 0.5, - "reconnect did not complete soon enough after eviction, took {0}".format( - evict_til_active - )) - - # We killed earlier so must clean up before trying to use again - self.mount_a.kill_cleanup() - - # Bring the client back - self.mount_a.mount() - self.mount_a.wait_until_mounted() - self.mount_a.create_destroy() - - def test_stale_caps(self): - # Capability release from stale session - # ===================================== - cap_holder = self.mount_a.open_background() - - # Wait for the file to be visible from another client, indicating - # that mount_a has completed its network ops - self.mount_b.wait_for_visible() - - # Simulate client death - self.mount_a.kill() - - try: - # Now, after mds_session_timeout seconds, the waiter should - # complete their operation when the MDS marks the holder's - # session stale. - cap_waiter = self.mount_b.write_background() - a = time.time() - cap_waiter.wait() - b = time.time() - - # Should have succeeded - self.assertEqual(cap_waiter.exitstatus, 0) - - cap_waited = b - a - log.info("cap_waiter waited {0}s".format(cap_waited)) - self.assertTrue(self.mds_session_timeout / 2.0 <= cap_waited <= self.mds_session_timeout * 2.0, - "Capability handover took {0}, expected approx {1}".format( - cap_waited, self.mds_session_timeout - )) - - cap_holder.stdin.close() - try: - cap_holder.wait() - except (CommandFailedError, ConnectionLostError): - # We killed it (and possibly its node), so it raises an error - pass - finally: - # teardown() doesn't quite handle this case cleanly, so help it out - self.mount_a.kill_cleanup() - - self.mount_a.mount() - self.mount_a.wait_until_mounted() - - def test_evicted_caps(self): - # Eviction while holding a capability - # =================================== - - # Take out a write capability on a file on client A, - # and then immediately kill it. - cap_holder = self.mount_a.open_background() - mount_a_client_id = self.mount_a.get_global_id() - - # Wait for the file to be visible from another client, indicating - # that mount_a has completed its network ops - self.mount_b.wait_for_visible() - - # Simulate client death - self.mount_a.kill() - - try: - # The waiter should get stuck waiting for the capability - # held on the MDS by the now-dead client A - cap_waiter = self.mount_b.write_background() - time.sleep(5) - self.assertFalse(cap_waiter.finished) - - self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) - # Now, because I evicted the old holder of the capability, it should - # immediately get handed over to the waiter - a = time.time() - cap_waiter.wait() - b = time.time() - cap_waited = b - a - log.info("cap_waiter waited {0}s".format(cap_waited)) - # This is the check that it happened 'now' rather than waiting - # for the session timeout - self.assertLess(cap_waited, self.mds_session_timeout / 2.0, - "Capability handover took {0}, expected less than {1}".format( - cap_waited, self.mds_session_timeout / 2.0 - )) - - cap_holder.stdin.close() - try: - cap_holder.wait() - except (CommandFailedError, ConnectionLostError): - # We killed it (and possibly its node), so it raises an error - pass - finally: - self.mount_a.kill_cleanup() - - self.mount_a.mount() - self.mount_a.wait_until_mounted() - - def test_trim_caps(self): - # Trim capability when reconnecting MDS - # =================================== - - count = 500 - # Create lots of files - for i in range(count): - self.mount_a.run_shell(["touch", "f{0}".format(i)]) - - # Populate mount_b's cache - self.mount_b.run_shell(["ls", "-l"]) - - client_id = self.mount_b.get_global_id() - num_caps = self._session_num_caps(client_id) - self.assertGreaterEqual(num_caps, count) - - # Restart MDS. client should trim its cache when reconnecting to the MDS - self.fs.mds_fail_restart() - self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE) - - num_caps = self._session_num_caps(client_id) - self.assertLess(num_caps, count, - "should have less than {0} capabilities, have {1}".format( - count, num_caps - )) - - def _is_flockable(self): - a_version_str = get_package_version(self.mount_a.client_remote, "fuse") - b_version_str = get_package_version(self.mount_b.client_remote, "fuse") - flock_version_str = "2.9" - - version_regex = re.compile(r"[0-9\.]+") - a_result = version_regex.match(a_version_str) - self.assertTrue(a_result) - b_result = version_regex.match(b_version_str) - self.assertTrue(b_result) - a_version = version.StrictVersion(a_result.group()) - b_version = version.StrictVersion(b_result.group()) - flock_version=version.StrictVersion(flock_version_str) - - if (a_version >= flock_version and b_version >= flock_version): - log.info("flock locks are available") - return True - else: - log.info("not testing flock locks, machines have versions {av} and {bv}".format( - av=a_version_str,bv=b_version_str)) - return False - - def test_filelock(self): - """ - Check that file lock doesn't get lost after an MDS restart - """ - - flockable = self._is_flockable() - lock_holder = self.mount_a.lock_background(do_flock=flockable) - - self.mount_b.wait_for_visible("background_file-2") - self.mount_b.check_filelock(do_flock=flockable) - - self.fs.mds_fail_restart() - self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE) - - self.mount_b.check_filelock(do_flock=flockable) - - # Tear down the background process - lock_holder.stdin.close() - try: - lock_holder.wait() - except (CommandFailedError, ConnectionLostError): - # We killed it, so it raises an error - pass - - def test_filelock_eviction(self): - """ - Check that file lock held by evicted client is given to - waiting client. - """ - if not self._is_flockable(): - self.skipTest("flock is not available") - - lock_holder = self.mount_a.lock_background() - self.mount_b.wait_for_visible("background_file-2") - self.mount_b.check_filelock() - - lock_taker = self.mount_b.lock_and_release() - # Check the taker is waiting (doesn't get it immediately) - time.sleep(2) - self.assertFalse(lock_holder.finished) - self.assertFalse(lock_taker.finished) - - try: - mount_a_client_id = self.mount_a.get_global_id() - self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) - - # Evicting mount_a should let mount_b's attempt to take the lock - # succeed - self.wait_until_true(lambda: lock_taker.finished, timeout=10) - finally: - # teardown() doesn't quite handle this case cleanly, so help it out - self.mount_a.kill() - self.mount_a.kill_cleanup() - - # Bring the client back - self.mount_a.mount() - self.mount_a.wait_until_mounted() - - def test_dir_fsync(self): - self._test_fsync(True); - - def test_create_fsync(self): - self._test_fsync(False); - - def _test_fsync(self, dirfsync): - """ - That calls to fsync guarantee visibility of metadata to another - client immediately after the fsyncing client dies. - """ - - # Leave this guy out until he's needed - self.mount_b.umount_wait() - - # Create dir + child dentry on client A, and fsync the dir - path = os.path.join(self.mount_a.mountpoint, "subdir") - self.mount_a.run_python( - dedent(""" - import os - import time - - path = "{path}" - - print "Starting creation..." - start = time.time() - - os.mkdir(path) - dfd = os.open(path, os.O_DIRECTORY) - - fd = open(os.path.join(path, "childfile"), "w") - print "Finished creation in {{0}}s".format(time.time() - start) - - print "Starting fsync..." - start = time.time() - if {dirfsync}: - os.fsync(dfd) - else: - os.fsync(fd) - print "Finished fsync in {{0}}s".format(time.time() - start) - """.format(path=path,dirfsync=str(dirfsync))) - ) - - # Immediately kill the MDS and then client A - self.fs.mds_stop() - self.fs.mds_fail() - self.mount_a.kill() - self.mount_a.kill_cleanup() - - # Restart the MDS. Wait for it to come up, it'll have to time out in clientreplay - self.fs.mds_restart() - log.info("Waiting for reconnect...") - self.fs.wait_for_state("up:reconnect") - log.info("Waiting for active...") - self.fs.wait_for_state("up:active", timeout=MDS_RESTART_GRACE + self.mds_reconnect_timeout) - log.info("Reached active...") - - # Is the child dentry visible from mount B? - self.mount_b.mount() - self.mount_b.wait_until_mounted() - self.mount_b.run_shell(["ls", "subdir/childfile"]) diff --git a/src/ceph/qa/tasks/cephfs/test_config_commands.py b/src/ceph/qa/tasks/cephfs/test_config_commands.py deleted file mode 100644 index ce0619f..0000000 --- a/src/ceph/qa/tasks/cephfs/test_config_commands.py +++ /dev/null @@ -1,63 +0,0 @@ - -from unittest import case -from tasks.cephfs.cephfs_test_case import CephFSTestCase -from tasks.cephfs.fuse_mount import FuseMount - - -class TestConfigCommands(CephFSTestCase): - """ - Test that daemons and clients respond to the otherwise rarely-used - runtime config modification operations. - """ - - CLIENTS_REQUIRED = 1 - MDSS_REQUIRED = 1 - - def test_client_config(self): - """ - That I can successfully issue asok "config set" commands - - :return: - """ - - if not isinstance(self.mount_a, FuseMount): - raise case.SkipTest("Test only applies to FUSE clients") - - test_key = "client_cache_size" - test_val = "123" - self.mount_a.admin_socket(['config', 'set', test_key, test_val]) - out = self.mount_a.admin_socket(['config', 'get', test_key]) - self.assertEqual(out[test_key], test_val) - - self.mount_a.write_n_mb("file.bin", 1); - - # Implicitly asserting that things don't have lockdep error in shutdown - self.mount_a.umount_wait(require_clean=True) - self.fs.mds_stop() - - def test_mds_config_asok(self): - test_key = "mds_max_purge_ops" - test_val = "123" - self.fs.mds_asok(['config', 'set', test_key, test_val]) - out = self.fs.mds_asok(['config', 'get', test_key]) - self.assertEqual(out[test_key], test_val) - - # Implicitly asserting that things don't have lockdep error in shutdown - self.mount_a.umount_wait(require_clean=True) - self.fs.mds_stop() - - def test_mds_config_tell(self): - test_key = "mds_max_purge_ops" - test_val = "123" - - mds_id = self.fs.get_lone_mds_id() - self.fs.mon_manager.raw_cluster_cmd("tell", "mds.{0}".format(mds_id), "injectargs", - "--{0}={1}".format(test_key, test_val)) - - # Read it back with asok because there is no `tell` equivalent - out = self.fs.mds_asok(['config', 'get', test_key]) - self.assertEqual(out[test_key], test_val) - - # Implicitly asserting that things don't have lockdep error in shutdown - self.mount_a.umount_wait(require_clean=True) - self.fs.mds_stop() diff --git a/src/ceph/qa/tasks/cephfs/test_damage.py b/src/ceph/qa/tasks/cephfs/test_damage.py deleted file mode 100644 index 380b49c..0000000 --- a/src/ceph/qa/tasks/cephfs/test_damage.py +++ /dev/null @@ -1,548 +0,0 @@ -import json -import logging -import errno -import re -from teuthology.contextutil import MaxWhileTries -from teuthology.exceptions import CommandFailedError -from teuthology.orchestra.run import wait -from tasks.cephfs.fuse_mount import FuseMount -from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology - -DAMAGED_ON_START = "damaged_on_start" -DAMAGED_ON_LS = "damaged_on_ls" -CRASHED = "server crashed" -NO_DAMAGE = "no damage" -FAILED_CLIENT = "client failed" -FAILED_SERVER = "server failed" - -# An EIO in response to a stat from the client -EIO_ON_LS = "eio" - -# An EIO, but nothing in damage table (not ever what we expect) -EIO_NO_DAMAGE = "eio without damage entry" - - -log = logging.getLogger(__name__) - - -class TestDamage(CephFSTestCase): - def _simple_workload_write(self): - self.mount_a.run_shell(["mkdir", "subdir"]) - self.mount_a.write_n_mb("subdir/sixmegs", 6) - return self.mount_a.stat("subdir/sixmegs") - - def is_marked_damaged(self, rank): - mds_map = self.fs.get_mds_map() - return rank in mds_map['damaged'] - - @for_teuthology #459s - def test_object_deletion(self): - """ - That the MDS has a clean 'damaged' response to loss of any single metadata object - """ - - self._simple_workload_write() - - # Hmm, actually it would be nice to permute whether the metadata pool - # state contains sessions or not, but for the moment close this session - # to avoid waiting through reconnect on every MDS start. - self.mount_a.umount_wait() - for mds_name in self.fs.get_active_names(): - self.fs.mds_asok(["flush", "journal"], mds_name) - - self.fs.mds_stop() - self.fs.mds_fail() - - self.fs.rados(['export', '/tmp/metadata.bin']) - - def is_ignored(obj_id, dentry=None): - """ - A filter to avoid redundantly mutating many similar objects (e.g. - stray dirfrags) or similar dentries (e.g. stray dir dentries) - """ - if re.match("60.\.00000000", obj_id) and obj_id != "600.00000000": - return True - - if dentry and obj_id == "100.00000000": - if re.match("stray.+_head", dentry) and dentry != "stray0_head": - return True - - return False - - def get_path(obj_id, dentry=None): - """ - What filesystem path does this object or dentry correspond to? i.e. - what should I poke to see EIO after damaging it? - """ - - if obj_id == "1.00000000" and dentry == "subdir_head": - return "./subdir" - elif obj_id == "10000000000.00000000" and dentry == "sixmegs_head": - return "./subdir/sixmegs" - - # None means ls will do an "ls -R" in hope of seeing some errors - return None - - objects = self.fs.rados(["ls"]).split("\n") - objects = [o for o in objects if not is_ignored(o)] - - # Find all objects with an OMAP header - omap_header_objs = [] - for o in objects: - header = self.fs.rados(["getomapheader", o]) - # The rados CLI wraps the header output in a hex-printed style - header_bytes = int(re.match("header \((.+) bytes\)", header).group(1)) - if header_bytes > 0: - omap_header_objs.append(o) - - # Find all OMAP key/vals - omap_keys = [] - for o in objects: - keys_str = self.fs.rados(["listomapkeys", o]) - if keys_str: - for key in keys_str.split("\n"): - if not is_ignored(o, key): - omap_keys.append((o, key)) - - # Find objects that have data in their bodies - data_objects = [] - for obj_id in objects: - stat_out = self.fs.rados(["stat", obj_id]) - size = int(re.match(".+, size (.+)$", stat_out).group(1)) - if size > 0: - data_objects.append(obj_id) - - # Define the various forms of damage we will inflict - class MetadataMutation(object): - def __init__(self, obj_id_, desc_, mutate_fn_, expectation_, ls_path=None): - self.obj_id = obj_id_ - self.desc = desc_ - self.mutate_fn = mutate_fn_ - self.expectation = expectation_ - if ls_path is None: - self.ls_path = "." - else: - self.ls_path = ls_path - - def __eq__(self, other): - return self.desc == other.desc - - def __hash__(self): - return hash(self.desc) - - junk = "deadbeef" * 10 - mutations = [] - - # Removals - for obj_id in objects: - if obj_id in [ - # JournalPointers are auto-replaced if missing (same path as upgrade) - "400.00000000", - # Missing dirfrags for non-system dirs result in empty directory - "10000000000.00000000", - # PurgeQueue is auto-created if not found on startup - "500.00000000" - ]: - expectation = NO_DAMAGE - else: - expectation = DAMAGED_ON_START - - log.info("Expectation on rm '{0}' will be '{1}'".format( - obj_id, expectation - )) - - mutations.append(MetadataMutation( - obj_id, - "Delete {0}".format(obj_id), - lambda o=obj_id: self.fs.rados(["rm", o]), - expectation - )) - - # Blatant corruptions - mutations.extend([ - MetadataMutation( - o, - "Corrupt {0}".format(o), - lambda o=o: self.fs.rados(["put", o, "-"], stdin_data=junk), - DAMAGED_ON_START - ) for o in data_objects - ]) - - # Truncations - for obj_id in data_objects: - if obj_id == "500.00000000": - # The PurgeQueue is allowed to be empty: Journaler interprets - # an empty header object as an empty journal. - expectation = NO_DAMAGE - else: - expectation = DAMAGED_ON_START - - mutations.append( - MetadataMutation( - o, - "Truncate {0}".format(o), - lambda o=o: self.fs.rados(["truncate", o, "0"]), - DAMAGED_ON_START - )) - - # OMAP value corruptions - for o, k in omap_keys: - if o.startswith("100."): - # Anything in rank 0's 'mydir' - expectation = DAMAGED_ON_START - else: - expectation = EIO_ON_LS - - mutations.append( - MetadataMutation( - o, - "Corrupt omap key {0}:{1}".format(o, k), - lambda o=o,k=k: self.fs.rados(["setomapval", o, k, junk]), - expectation, - get_path(o, k) - ) - ) - - # OMAP header corruptions - for obj_id in omap_header_objs: - if re.match("60.\.00000000", obj_id) \ - or obj_id in ["1.00000000", "100.00000000", "mds0_sessionmap"]: - expectation = DAMAGED_ON_START - else: - expectation = NO_DAMAGE - - log.info("Expectation on corrupt header '{0}' will be '{1}'".format( - obj_id, expectation - )) - - mutations.append( - MetadataMutation( - obj_id, - "Corrupt omap header on {0}".format(obj_id), - lambda o=obj_id: self.fs.rados(["setomapheader", o, junk]), - expectation - ) - ) - - results = {} - - for mutation in mutations: - log.info("Applying mutation '{0}'".format(mutation.desc)) - - # Reset MDS state - self.mount_a.umount_wait(force=True) - self.fs.mds_stop() - self.fs.mds_fail() - self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0') - - # Reset RADOS pool state - self.fs.rados(['import', '/tmp/metadata.bin']) - - # Inject the mutation - mutation.mutate_fn() - - # Try starting the MDS - self.fs.mds_restart() - - # How long we'll wait between starting a daemon and expecting - # it to make it through startup, and potentially declare itself - # damaged to the mon cluster. - startup_timeout = 60 - - if mutation.expectation not in (EIO_ON_LS, DAMAGED_ON_LS, NO_DAMAGE): - if mutation.expectation == DAMAGED_ON_START: - # The MDS may pass through active before making it to damaged - try: - self.wait_until_true(lambda: self.is_marked_damaged(0), startup_timeout) - except RuntimeError: - pass - - # Wait for MDS to either come up or go into damaged state - try: - self.wait_until_true(lambda: self.is_marked_damaged(0) or self.fs.are_daemons_healthy(), startup_timeout) - except RuntimeError: - crashed = False - # Didn't make it to healthy or damaged, did it crash? - for daemon_id, daemon in self.fs.mds_daemons.items(): - if daemon.proc and daemon.proc.finished: - crashed = True - log.error("Daemon {0} crashed!".format(daemon_id)) - daemon.proc = None # So that subsequent stop() doesn't raise error - if not crashed: - # Didn't go health, didn't go damaged, didn't crash, so what? - raise - else: - log.info("Result: Mutation '{0}' led to crash".format(mutation.desc)) - results[mutation] = CRASHED - continue - if self.is_marked_damaged(0): - log.info("Result: Mutation '{0}' led to DAMAGED state".format(mutation.desc)) - results[mutation] = DAMAGED_ON_START - continue - else: - log.info("Mutation '{0}' did not prevent MDS startup, attempting ls...".format(mutation.desc)) - else: - try: - self.wait_until_true(self.fs.are_daemons_healthy, 60) - except RuntimeError: - log.info("Result: Mutation '{0}' should have left us healthy, actually not.".format(mutation.desc)) - if self.is_marked_damaged(0): - results[mutation] = DAMAGED_ON_START - else: - results[mutation] = FAILED_SERVER - continue - log.info("Daemons came up after mutation '{0}', proceeding to ls".format(mutation.desc)) - - # MDS is up, should go damaged on ls or client mount - self.mount_a.mount() - self.mount_a.wait_until_mounted() - if mutation.ls_path == ".": - proc = self.mount_a.run_shell(["ls", "-R", mutation.ls_path], wait=False) - else: - proc = self.mount_a.stat(mutation.ls_path, wait=False) - - if mutation.expectation == DAMAGED_ON_LS: - try: - self.wait_until_true(lambda: self.is_marked_damaged(0), 60) - log.info("Result: Mutation '{0}' led to DAMAGED state after ls".format(mutation.desc)) - results[mutation] = DAMAGED_ON_LS - except RuntimeError: - if self.fs.are_daemons_healthy(): - log.error("Result: Failed to go damaged on mutation '{0}', actually went active".format( - mutation.desc)) - results[mutation] = NO_DAMAGE - else: - log.error("Result: Failed to go damaged on mutation '{0}'".format(mutation.desc)) - results[mutation] = FAILED_SERVER - - else: - try: - wait([proc], 20) - log.info("Result: Mutation '{0}' did not caused DAMAGED state".format(mutation.desc)) - results[mutation] = NO_DAMAGE - except MaxWhileTries: - log.info("Result: Failed to complete client IO on mutation '{0}'".format(mutation.desc)) - results[mutation] = FAILED_CLIENT - except CommandFailedError as e: - if e.exitstatus == errno.EIO: - log.info("Result: EIO on client") - results[mutation] = EIO_ON_LS - else: - log.info("Result: unexpected error {0} on client".format(e)) - results[mutation] = FAILED_CLIENT - - if mutation.expectation == EIO_ON_LS: - # EIOs mean something handled by DamageTable: assert that it has - # been populated - damage = json.loads( - self.fs.mon_manager.raw_cluster_cmd( - 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), "damage", "ls", '--format=json-pretty')) - if len(damage) == 0: - results[mutation] = EIO_NO_DAMAGE - - failures = [(mutation, result) for (mutation, result) in results.items() if mutation.expectation != result] - if failures: - log.error("{0} mutations had unexpected outcomes:".format(len(failures))) - for mutation, result in failures: - log.error(" Expected '{0}' actually '{1}' from '{2}'".format( - mutation.expectation, result, mutation.desc - )) - raise RuntimeError("{0} mutations had unexpected outcomes".format(len(failures))) - else: - log.info("All {0} mutations had expected outcomes".format(len(mutations))) - - def test_damaged_dentry(self): - # Damage to dentrys is interesting because it leaves the - # directory's `complete` flag in a subtle state where - # we have marked the dir complete in order that folks - # can access it, but in actual fact there is a dentry - # missing - self.mount_a.run_shell(["mkdir", "subdir/"]) - - self.mount_a.run_shell(["touch", "subdir/file_undamaged"]) - self.mount_a.run_shell(["touch", "subdir/file_to_be_damaged"]) - - subdir_ino = self.mount_a.path_to_ino("subdir") - - self.mount_a.umount_wait() - for mds_name in self.fs.get_active_names(): - self.fs.mds_asok(["flush", "journal"], mds_name) - - self.fs.mds_stop() - self.fs.mds_fail() - - # Corrupt a dentry - junk = "deadbeef" * 10 - dirfrag_obj = "{0:x}.00000000".format(subdir_ino) - self.fs.rados(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk]) - - # Start up and try to list it - self.fs.mds_restart() - self.fs.wait_for_daemons() - - self.mount_a.mount() - self.mount_a.wait_until_mounted() - dentries = self.mount_a.ls("subdir/") - - # The damaged guy should have disappeared - self.assertEqual(dentries, ["file_undamaged"]) - - # I should get ENOENT if I try and read it normally, because - # the dir is considered complete - try: - self.mount_a.stat("subdir/file_to_be_damaged", wait=True) - except CommandFailedError as e: - self.assertEqual(e.exitstatus, errno.ENOENT) - else: - raise AssertionError("Expected ENOENT") - - # The fact that there is damaged should have bee recorded - damage = json.loads( - self.fs.mon_manager.raw_cluster_cmd( - 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), - "damage", "ls", '--format=json-pretty')) - self.assertEqual(len(damage), 1) - damage_id = damage[0]['id'] - - # If I try to create a dentry with the same name as the damaged guy - # then that should be forbidden - try: - self.mount_a.touch("subdir/file_to_be_damaged") - except CommandFailedError as e: - self.assertEqual(e.exitstatus, errno.EIO) - else: - raise AssertionError("Expected EIO") - - # Attempting that touch will clear the client's complete flag, now - # when I stat it I'll get EIO instead of ENOENT - try: - self.mount_a.stat("subdir/file_to_be_damaged", wait=True) - except CommandFailedError as e: - if isinstance(self.mount_a, FuseMount): - self.assertEqual(e.exitstatus, errno.EIO) - else: - # Kernel client handles this case differently - self.assertEqual(e.exitstatus, errno.ENOENT) - else: - raise AssertionError("Expected EIO") - - nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files") - self.assertEqual(nfiles, "2") - - self.mount_a.umount_wait() - - # Now repair the stats - scrub_json = self.fs.mds_asok(["scrub_path", "/subdir", "repair"]) - log.info(json.dumps(scrub_json, indent=2)) - - self.assertEqual(scrub_json["passed_validation"], False) - self.assertEqual(scrub_json["raw_stats"]["checked"], True) - self.assertEqual(scrub_json["raw_stats"]["passed"], False) - - # Check that the file count is now correct - self.mount_a.mount() - self.mount_a.wait_until_mounted() - nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files") - self.assertEqual(nfiles, "1") - - # Clean up the omap object - self.fs.rados(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk]) - - # Clean up the damagetable entry - self.fs.mon_manager.raw_cluster_cmd( - 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), - "damage", "rm", "{did}".format(did=damage_id)) - - # Now I should be able to create a file with the same name as the - # damaged guy if I want. - self.mount_a.touch("subdir/file_to_be_damaged") - - def test_open_ino_errors(self): - """ - That errors encountered during opening inos are properly propagated - """ - - self.mount_a.run_shell(["mkdir", "dir1"]) - self.mount_a.run_shell(["touch", "dir1/file1"]) - self.mount_a.run_shell(["mkdir", "dir2"]) - self.mount_a.run_shell(["touch", "dir2/file2"]) - self.mount_a.run_shell(["mkdir", "testdir"]) - self.mount_a.run_shell(["ln", "dir1/file1", "testdir/hardlink1"]) - self.mount_a.run_shell(["ln", "dir2/file2", "testdir/hardlink2"]) - - file1_ino = self.mount_a.path_to_ino("dir1/file1") - file2_ino = self.mount_a.path_to_ino("dir2/file2") - dir2_ino = self.mount_a.path_to_ino("dir2") - - # Ensure everything is written to backing store - self.mount_a.umount_wait() - self.fs.mds_asok(["flush", "journal"]) - - # Drop everything from the MDS cache - self.mds_cluster.mds_stop() - self.fs.journal_tool(['journal', 'reset']) - self.mds_cluster.mds_fail_restart() - self.fs.wait_for_daemons() - - self.mount_a.mount() - - # Case 1: un-decodeable backtrace - - # Validate that the backtrace is present and decodable - self.fs.read_backtrace(file1_ino) - # Go corrupt the backtrace of alpha/target (used for resolving - # bravo/hardlink). - self.fs._write_data_xattr(file1_ino, "parent", "rhubarb") - - # Check that touching the hardlink gives EIO - ran = self.mount_a.run_shell(["stat", "testdir/hardlink1"], wait=False) - try: - ran.wait() - except CommandFailedError: - self.assertTrue("Input/output error" in ran.stderr.getvalue()) - - # Check that an entry is created in the damage table - damage = json.loads( - self.fs.mon_manager.raw_cluster_cmd( - 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), - "damage", "ls", '--format=json-pretty')) - self.assertEqual(len(damage), 1) - self.assertEqual(damage[0]['damage_type'], "backtrace") - self.assertEqual(damage[0]['ino'], file1_ino) - - self.fs.mon_manager.raw_cluster_cmd( - 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), - "damage", "rm", str(damage[0]['id'])) - - - # Case 2: missing dirfrag for the target inode - - self.fs.rados(["rm", "{0:x}.00000000".format(dir2_ino)]) - - # Check that touching the hardlink gives EIO - ran = self.mount_a.run_shell(["stat", "testdir/hardlink2"], wait=False) - try: - ran.wait() - except CommandFailedError: - self.assertTrue("Input/output error" in ran.stderr.getvalue()) - - # Check that an entry is created in the damage table - damage = json.loads( - self.fs.mon_manager.raw_cluster_cmd( - 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), - "damage", "ls", '--format=json-pretty')) - self.assertEqual(len(damage), 2) - if damage[0]['damage_type'] == "backtrace" : - self.assertEqual(damage[0]['ino'], file2_ino) - self.assertEqual(damage[1]['damage_type'], "dir_frag") - self.assertEqual(damage[1]['ino'], dir2_ino) - else: - self.assertEqual(damage[0]['damage_type'], "dir_frag") - self.assertEqual(damage[0]['ino'], dir2_ino) - self.assertEqual(damage[1]['damage_type'], "backtrace") - self.assertEqual(damage[1]['ino'], file2_ino) - - for entry in damage: - self.fs.mon_manager.raw_cluster_cmd( - 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), - "damage", "rm", str(entry['id'])) diff --git a/src/ceph/qa/tasks/cephfs/test_data_scan.py b/src/ceph/qa/tasks/cephfs/test_data_scan.py deleted file mode 100644 index a2d3157..0000000 --- a/src/ceph/qa/tasks/cephfs/test_data_scan.py +++ /dev/null @@ -1,600 +0,0 @@ - -""" -Test our tools for recovering metadata from the data pool -""" -import json - -import logging -import os -from textwrap import dedent -import traceback -from collections import namedtuple, defaultdict - -from teuthology.orchestra.run import CommandFailedError -from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology - -log = logging.getLogger(__name__) - - -ValidationError = namedtuple("ValidationError", ["exception", "backtrace"]) - - -class Workload(object): - def __init__(self, filesystem, mount): - self._mount = mount - self._filesystem = filesystem - self._initial_state = None - - # Accumulate backtraces for every failed validation, and return them. Backtraces - # are rather verbose, but we only see them when something breaks, and they - # let us see which check failed without having to decorate each check with - # a string - self._errors = [] - - def assert_equal(self, a, b): - try: - if a != b: - raise AssertionError("{0} != {1}".format(a, b)) - except AssertionError as e: - self._errors.append( - ValidationError(e, traceback.format_exc(3)) - ) - - def write(self): - """ - Write the workload files to the mount - """ - raise NotImplementedError() - - def validate(self): - """ - Read from the mount and validate that the workload files are present (i.e. have - survived or been reconstructed from the test scenario) - """ - raise NotImplementedError() - - def damage(self): - """ - Damage the filesystem pools in ways that will be interesting to recover from. By - default just wipe everything in the metadata pool - """ - # Delete every object in the metadata pool - objects = self._filesystem.rados(["ls"]).split("\n") - for o in objects: - self._filesystem.rados(["rm", o]) - - def flush(self): - """ - Called after client unmount, after write: flush whatever you want - """ - self._filesystem.mds_asok(["flush", "journal"]) - - -class SimpleWorkload(Workload): - """ - Single file, single directory, check that it gets recovered and so does its size - """ - def write(self): - self._mount.run_shell(["mkdir", "subdir"]) - self._mount.write_n_mb("subdir/sixmegs", 6) - self._initial_state = self._mount.stat("subdir/sixmegs") - - def validate(self): - self._mount.run_shell(["ls", "subdir"]) - st = self._mount.stat("subdir/sixmegs") - self.assert_equal(st['st_size'], self._initial_state['st_size']) - return self._errors - - -class MovedFile(Workload): - def write(self): - # Create a file whose backtrace disagrees with his eventual position - # in the metadata. We will see that he gets reconstructed in his - # original position according to his backtrace. - self._mount.run_shell(["mkdir", "subdir_alpha"]) - self._mount.run_shell(["mkdir", "subdir_bravo"]) - self._mount.write_n_mb("subdir_alpha/sixmegs", 6) - self._filesystem.mds_asok(["flush", "journal"]) - self._mount.run_shell(["mv", "subdir_alpha/sixmegs", "subdir_bravo/sixmegs"]) - self._initial_state = self._mount.stat("subdir_bravo/sixmegs") - - def flush(self): - pass - - def validate(self): - self.assert_equal(self._mount.ls(), ["subdir_alpha"]) - st = self._mount.stat("subdir_alpha/sixmegs") - self.assert_equal(st['st_size'], self._initial_state['st_size']) - return self._errors - - -class BacktracelessFile(Workload): - def write(self): - self._mount.run_shell(["mkdir", "subdir"]) - self._mount.write_n_mb("subdir/sixmegs", 6) - self._initial_state = self._mount.stat("subdir/sixmegs") - - def flush(self): - # Never flush metadata, so backtrace won't be written - pass - - def validate(self): - ino_name = "%x" % self._initial_state["st_ino"] - - # The inode should be linked into lost+found because we had no path for it - self.assert_equal(self._mount.ls(), ["lost+found"]) - self.assert_equal(self._mount.ls("lost+found"), [ino_name]) - st = self._mount.stat("lost+found/{ino_name}".format(ino_name=ino_name)) - - # We might not have got the name or path, but we should still get the size - self.assert_equal(st['st_size'], self._initial_state['st_size']) - - return self._errors - - -class StripedStashedLayout(Workload): - def __init__(self, fs, m): - super(StripedStashedLayout, self).__init__(fs, m) - - # Nice small stripes so we can quickly do our writes+validates - self.sc = 4 - self.ss = 65536 - self.os = 262144 - - self.interesting_sizes = [ - # Exactly stripe_count objects will exist - self.os * self.sc, - # Fewer than stripe_count objects will exist - self.os * self.sc / 2, - self.os * (self.sc - 1) + self.os / 2, - self.os * (self.sc - 1) + self.os / 2 - 1, - self.os * (self.sc + 1) + self.os / 2, - self.os * (self.sc + 1) + self.os / 2 + 1, - # More than stripe_count objects will exist - self.os * self.sc + self.os * self.sc / 2 - ] - - def write(self): - # Create a dir with a striped layout set on it - self._mount.run_shell(["mkdir", "stripey"]) - - self._mount.setfattr("./stripey", "ceph.dir.layout", - "stripe_unit={ss} stripe_count={sc} object_size={os} pool={pool}".format( - ss=self.ss, os=self.os, sc=self.sc, - pool=self._filesystem.get_data_pool_name() - )) - - # Write files, then flush metadata so that its layout gets written into an xattr - for i, n_bytes in enumerate(self.interesting_sizes): - self._mount.write_test_pattern("stripey/flushed_file_{0}".format(i), n_bytes) - # This is really just validating the validator - self._mount.validate_test_pattern("stripey/flushed_file_{0}".format(i), n_bytes) - self._filesystem.mds_asok(["flush", "journal"]) - - # Write another file in the same way, but this time don't flush the metadata, - # so that it won't have the layout xattr - self._mount.write_test_pattern("stripey/unflushed_file", 1024 * 512) - self._mount.validate_test_pattern("stripey/unflushed_file", 1024 * 512) - - self._initial_state = { - "unflushed_ino": self._mount.path_to_ino("stripey/unflushed_file") - } - - def flush(self): - # Pass because we already selectively flushed during write - pass - - def validate(self): - # The first files should have been recovered into its original location - # with the correct layout: read back correct data - for i, n_bytes in enumerate(self.interesting_sizes): - try: - self._mount.validate_test_pattern("stripey/flushed_file_{0}".format(i), n_bytes) - except CommandFailedError as e: - self._errors.append( - ValidationError("File {0} (size {1}): {2}".format(i, n_bytes, e), traceback.format_exc(3)) - ) - - # The unflushed file should have been recovered into lost+found without - # the correct layout: read back junk - ino_name = "%x" % self._initial_state["unflushed_ino"] - self.assert_equal(self._mount.ls("lost+found"), [ino_name]) - try: - self._mount.validate_test_pattern(os.path.join("lost+found", ino_name), 1024 * 512) - except CommandFailedError: - pass - else: - self._errors.append( - ValidationError("Unexpectedly valid data in unflushed striped file", "") - ) - - return self._errors - - -class ManyFilesWorkload(Workload): - def __init__(self, filesystem, mount, file_count): - super(ManyFilesWorkload, self).__init__(filesystem, mount) - self.file_count = file_count - - def write(self): - self._mount.run_shell(["mkdir", "subdir"]) - for n in range(0, self.file_count): - self._mount.write_test_pattern("subdir/{0}".format(n), 6 * 1024 * 1024) - - def validate(self): - for n in range(0, self.file_count): - try: - self._mount.validate_test_pattern("subdir/{0}".format(n), 6 * 1024 * 1024) - except CommandFailedError as e: - self._errors.append( - ValidationError("File {0}: {1}".format(n, e), traceback.format_exc(3)) - ) - - return self._errors - - -class MovedDir(Workload): - def write(self): - # Create a nested dir that we will then move. Two files with two different - # backtraces referring to the moved dir, claiming two different locations for - # it. We will see that only one backtrace wins and the dir ends up with - # single linkage. - self._mount.run_shell(["mkdir", "-p", "grandmother/parent"]) - self._mount.write_n_mb("grandmother/parent/orig_pos_file", 1) - self._filesystem.mds_asok(["flush", "journal"]) - self._mount.run_shell(["mkdir", "grandfather"]) - self._mount.run_shell(["mv", "grandmother/parent", "grandfather"]) - self._mount.write_n_mb("grandfather/parent/new_pos_file", 2) - self._filesystem.mds_asok(["flush", "journal"]) - - self._initial_state = ( - self._mount.stat("grandfather/parent/orig_pos_file"), - self._mount.stat("grandfather/parent/new_pos_file") - ) - - def validate(self): - root_files = self._mount.ls() - self.assert_equal(len(root_files), 1) - self.assert_equal(root_files[0] in ["grandfather", "grandmother"], True) - winner = root_files[0] - st_opf = self._mount.stat("{0}/parent/orig_pos_file".format(winner)) - st_npf = self._mount.stat("{0}/parent/new_pos_file".format(winner)) - - self.assert_equal(st_opf['st_size'], self._initial_state[0]['st_size']) - self.assert_equal(st_npf['st_size'], self._initial_state[1]['st_size']) - - -class MissingZerothObject(Workload): - def write(self): - self._mount.run_shell(["mkdir", "subdir"]) - self._mount.write_n_mb("subdir/sixmegs", 6) - self._initial_state = self._mount.stat("subdir/sixmegs") - - def damage(self): - super(MissingZerothObject, self).damage() - zeroth_id = "{0:x}.00000000".format(self._initial_state['st_ino']) - self._filesystem.rados(["rm", zeroth_id], pool=self._filesystem.get_data_pool_name()) - - def validate(self): - st = self._mount.stat("lost+found/{0:x}".format(self._initial_state['st_ino'])) - self.assert_equal(st['st_size'], self._initial_state['st_size']) - - -class NonDefaultLayout(Workload): - """ - Check that the reconstruction copes with files that have a different - object size in their layout - """ - def write(self): - self._mount.run_shell(["touch", "datafile"]) - self._mount.setfattr("./datafile", "ceph.file.layout.object_size", "8388608") - self._mount.run_shell(["dd", "if=/dev/urandom", "of=./datafile", "bs=1M", "count=32"]) - self._initial_state = self._mount.stat("datafile") - - def validate(self): - # Check we got the layout reconstructed properly - object_size = int(self._mount.getfattr( - "./datafile", "ceph.file.layout.object_size")) - self.assert_equal(object_size, 8388608) - - # Check we got the file size reconstructed properly - st = self._mount.stat("datafile") - self.assert_equal(st['st_size'], self._initial_state['st_size']) - - -class TestDataScan(CephFSTestCase): - MDSS_REQUIRED = 2 - - def is_marked_damaged(self, rank): - mds_map = self.fs.get_mds_map() - return rank in mds_map['damaged'] - - def _rebuild_metadata(self, workload, workers=1): - """ - That when all objects in metadata pool are removed, we can rebuild a metadata pool - based on the contents of a data pool, and a client can see and read our files. - """ - - # First, inject some files - - workload.write() - - # Unmount the client and flush the journal: the tool should also cope with - # situations where there is dirty metadata, but we'll test that separately - self.mount_a.umount_wait() - workload.flush() - - # Stop the MDS - self.fs.mds_stop() - self.fs.mds_fail() - - # After recovery, we need the MDS to not be strict about stats (in production these options - # are off by default, but in QA we need to explicitly disable them) - self.fs.set_ceph_conf('mds', 'mds verify scatter', False) - self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False) - - # Apply any data damage the workload wants - workload.damage() - - # Reset the MDS map in case multiple ranks were in play: recovery procedure - # only understands how to rebuild metadata under rank 0 - self.fs.mon_manager.raw_cluster_cmd('fs', 'reset', self.fs.name, - '--yes-i-really-mean-it') - - self.fs.mds_restart() - - def get_state(mds_id): - info = self.mds_cluster.get_mds_info(mds_id) - return info['state'] if info is not None else None - - self.wait_until_true(lambda: self.is_marked_damaged(0), 60) - for mds_id in self.fs.mds_ids: - self.wait_until_equal( - lambda: get_state(mds_id), - "up:standby", - timeout=60) - - self.fs.table_tool([self.fs.name + ":0", "reset", "session"]) - self.fs.table_tool([self.fs.name + ":0", "reset", "snap"]) - self.fs.table_tool([self.fs.name + ":0", "reset", "inode"]) - - # Run the recovery procedure - if False: - with self.assertRaises(CommandFailedError): - # Normal reset should fail when no objects are present, we'll use --force instead - self.fs.journal_tool(["journal", "reset"]) - - self.fs.journal_tool(["journal", "reset", "--force"]) - self.fs.data_scan(["init"]) - self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()], worker_count=workers) - self.fs.data_scan(["scan_inodes", self.fs.get_data_pool_name()], worker_count=workers) - - # Mark the MDS repaired - self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0') - - # Start the MDS - self.fs.mds_restart() - self.fs.wait_for_daemons() - log.info(str(self.mds_cluster.status())) - - # Mount a client - self.mount_a.mount() - self.mount_a.wait_until_mounted() - - # See that the files are present and correct - errors = workload.validate() - if errors: - log.error("Validation errors found: {0}".format(len(errors))) - for e in errors: - log.error(e.exception) - log.error(e.backtrace) - raise AssertionError("Validation failed, first error: {0}\n{1}".format( - errors[0].exception, errors[0].backtrace - )) - - def test_rebuild_simple(self): - self._rebuild_metadata(SimpleWorkload(self.fs, self.mount_a)) - - def test_rebuild_moved_file(self): - self._rebuild_metadata(MovedFile(self.fs, self.mount_a)) - - def test_rebuild_backtraceless(self): - self._rebuild_metadata(BacktracelessFile(self.fs, self.mount_a)) - - def test_rebuild_moved_dir(self): - self._rebuild_metadata(MovedDir(self.fs, self.mount_a)) - - def test_rebuild_missing_zeroth(self): - self._rebuild_metadata(MissingZerothObject(self.fs, self.mount_a)) - - def test_rebuild_nondefault_layout(self): - self._rebuild_metadata(NonDefaultLayout(self.fs, self.mount_a)) - - def test_stashed_layout(self): - self._rebuild_metadata(StripedStashedLayout(self.fs, self.mount_a)) - - def _dirfrag_keys(self, object_id): - keys_str = self.fs.rados(["listomapkeys", object_id]) - if keys_str: - return keys_str.split("\n") - else: - return [] - - def test_fragmented_injection(self): - """ - That when injecting a dentry into a fragmented directory, we put it in the right fragment. - """ - - self.fs.set_allow_dirfrags(True) - - file_count = 100 - file_names = ["%s" % n for n in range(0, file_count)] - - # Create a directory of `file_count` files, each named after its - # decimal number and containing the string of its decimal number - self.mount_a.run_python(dedent(""" - import os - path = os.path.join("{path}", "subdir") - os.mkdir(path) - for n in range(0, {file_count}): - open(os.path.join(path, "%s" % n), 'w').write("%s" % n) - """.format( - path=self.mount_a.mountpoint, - file_count=file_count - ))) - - dir_ino = self.mount_a.path_to_ino("subdir") - - # Only one MDS should be active! - self.assertEqual(len(self.fs.get_active_names()), 1) - - # Ensure that one directory is fragmented - mds_id = self.fs.get_active_names()[0] - self.fs.mds_asok(["dirfrag", "split", "/subdir", "0/0", "1"], mds_id) - - # Flush journal and stop MDS - self.mount_a.umount_wait() - self.fs.mds_asok(["flush", "journal"], mds_id) - self.fs.mds_stop() - self.fs.mds_fail() - - # Pick a dentry and wipe out its key - # Because I did a 1 bit split, I know one frag will be named <inode>.01000000 - frag_obj_id = "{0:x}.01000000".format(dir_ino) - keys = self._dirfrag_keys(frag_obj_id) - victim_key = keys[7] # arbitrary choice - log.info("victim_key={0}".format(victim_key)) - victim_dentry = victim_key.split("_head")[0] - self.fs.rados(["rmomapkey", frag_obj_id, victim_key]) - - # Start filesystem back up, observe that the file appears to be gone in an `ls` - self.fs.mds_restart() - self.fs.wait_for_daemons() - self.mount_a.mount() - self.mount_a.wait_until_mounted() - files = self.mount_a.run_shell(["ls", "subdir/"]).stdout.getvalue().strip().split("\n") - self.assertListEqual(sorted(files), sorted(list(set(file_names) - set([victim_dentry])))) - - # Stop the filesystem - self.mount_a.umount_wait() - self.fs.mds_stop() - self.fs.mds_fail() - - # Run data-scan, observe that it inserts our dentry back into the correct fragment - # by checking the omap now has the dentry's key again - self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()]) - self.fs.data_scan(["scan_inodes", self.fs.get_data_pool_name()]) - self.assertIn(victim_key, self._dirfrag_keys(frag_obj_id)) - - # Start the filesystem and check that the dentry we deleted is now once again visible - # and points to the correct file data. - self.fs.mds_restart() - self.fs.wait_for_daemons() - self.mount_a.mount() - self.mount_a.wait_until_mounted() - out = self.mount_a.run_shell(["cat", "subdir/{0}".format(victim_dentry)]).stdout.getvalue().strip() - self.assertEqual(out, victim_dentry) - - # Finally, close the loop by checking our injected dentry survives a merge - mds_id = self.fs.get_active_names()[0] - self.mount_a.ls("subdir") # Do an ls to ensure both frags are in cache so the merge will work - self.fs.mds_asok(["dirfrag", "merge", "/subdir", "0/0"], mds_id) - self.fs.mds_asok(["flush", "journal"], mds_id) - frag_obj_id = "{0:x}.00000000".format(dir_ino) - keys = self._dirfrag_keys(frag_obj_id) - self.assertListEqual(sorted(keys), sorted(["%s_head" % f for f in file_names])) - - @for_teuthology - def test_parallel_execution(self): - self._rebuild_metadata(ManyFilesWorkload(self.fs, self.mount_a, 25), workers=7) - - def test_pg_files(self): - """ - That the pg files command tells us which files are associated with - a particular PG - """ - file_count = 20 - self.mount_a.run_shell(["mkdir", "mydir"]) - self.mount_a.create_n_files("mydir/myfile", file_count) - - # Some files elsewhere in the system that we will ignore - # to check that the tool is filtering properly - self.mount_a.run_shell(["mkdir", "otherdir"]) - self.mount_a.create_n_files("otherdir/otherfile", file_count) - - pgs_to_files = defaultdict(list) - # Rough (slow) reimplementation of the logic - for i in range(0, file_count): - file_path = "mydir/myfile_{0}".format(i) - ino = self.mount_a.path_to_ino(file_path) - obj = "{0:x}.{1:08x}".format(ino, 0) - pgid = json.loads(self.fs.mon_manager.raw_cluster_cmd( - "osd", "map", self.fs.get_data_pool_name(), obj, - "--format=json-pretty" - ))['pgid'] - pgs_to_files[pgid].append(file_path) - log.info("{0}: {1}".format(file_path, pgid)) - - pg_count = self.fs.get_pgs_per_fs_pool() - for pg_n in range(0, pg_count): - pg_str = "{0}.{1}".format(self.fs.get_data_pool_id(), pg_n) - out = self.fs.data_scan(["pg_files", "mydir", pg_str]) - lines = [l for l in out.split("\n") if l] - log.info("{0}: {1}".format(pg_str, lines)) - self.assertSetEqual(set(lines), set(pgs_to_files[pg_str])) - - def test_scan_links(self): - """ - The scan_links command fixes linkage errors - """ - self.mount_a.run_shell(["mkdir", "testdir1"]) - self.mount_a.run_shell(["mkdir", "testdir2"]) - dir1_ino = self.mount_a.path_to_ino("testdir1") - dir2_ino = self.mount_a.path_to_ino("testdir2") - dirfrag1_oid = "{0:x}.00000000".format(dir1_ino) - dirfrag2_oid = "{0:x}.00000000".format(dir2_ino) - - self.mount_a.run_shell(["touch", "testdir1/file1"]) - self.mount_a.run_shell(["ln", "testdir1/file1", "testdir1/link1"]) - self.mount_a.run_shell(["ln", "testdir1/file1", "testdir2/link2"]) - - mds_id = self.fs.get_active_names()[0] - self.fs.mds_asok(["flush", "journal"], mds_id) - - dirfrag1_keys = self._dirfrag_keys(dirfrag1_oid) - - # introduce duplicated primary link - file1_key = "file1_head" - self.assertIn(file1_key, dirfrag1_keys) - file1_omap_data = self.fs.rados(["getomapval", dirfrag1_oid, file1_key, '-']) - self.fs.rados(["setomapval", dirfrag2_oid, file1_key], stdin_data=file1_omap_data) - self.assertIn(file1_key, self._dirfrag_keys(dirfrag2_oid)) - - # remove a remote link, make inode link count incorrect - link1_key = 'link1_head' - self.assertIn(link1_key, dirfrag1_keys) - self.fs.rados(["rmomapkey", dirfrag1_oid, link1_key]) - - # increase good primary link's version - self.mount_a.run_shell(["touch", "testdir1/file1"]) - self.mount_a.umount_wait() - - self.fs.mds_asok(["flush", "journal"], mds_id) - self.fs.mds_stop() - self.fs.mds_fail() - - # repair linkage errors - self.fs.data_scan(["scan_links"]) - - # primary link in testdir2 was deleted? - self.assertNotIn(file1_key, self._dirfrag_keys(dirfrag2_oid)) - - self.fs.mds_restart() - self.fs.wait_for_daemons() - - self.mount_a.mount() - self.mount_a.wait_until_mounted() - - # link count was adjusted? - file1_nlink = self.mount_a.path_to_nlink("testdir1/file1") - self.assertEqual(file1_nlink, 2) diff --git a/src/ceph/qa/tasks/cephfs/test_dump_tree.py b/src/ceph/qa/tasks/cephfs/test_dump_tree.py deleted file mode 100644 index 6d943f9..0000000 --- a/src/ceph/qa/tasks/cephfs/test_dump_tree.py +++ /dev/null @@ -1,66 +0,0 @@ -from tasks.cephfs.cephfs_test_case import CephFSTestCase -import random -import os - -class TestDumpTree(CephFSTestCase): - def get_paths_to_ino(self): - inos = {} - p = self.mount_a.run_shell(["find", "./"]) - paths = p.stdout.getvalue().strip().split() - for path in paths: - inos[path] = self.mount_a.path_to_ino(path, False) - - return inos - - def populate(self): - self.mount_a.run_shell(["git", "clone", - "https://github.com/ceph/ceph-qa-suite"]) - - def test_basic(self): - self.mount_a.run_shell(["mkdir", "parent"]) - self.mount_a.run_shell(["mkdir", "parent/child"]) - self.mount_a.run_shell(["touch", "parent/child/file"]) - self.mount_a.run_shell(["mkdir", "parent/child/grandchild"]) - self.mount_a.run_shell(["touch", "parent/child/grandchild/file"]) - - inos = self.get_paths_to_ino() - tree = self.fs.mds_asok(["dump", "tree", "/parent/child", "1"]) - - target_inos = [inos["./parent/child"], inos["./parent/child/file"], - inos["./parent/child/grandchild"]] - - for ino in tree: - del target_inos[target_inos.index(ino['ino'])] # don't catch! - - assert(len(target_inos) == 0) - - def test_random(self): - random.seed(0) - - self.populate() - inos = self.get_paths_to_ino() - target = random.choice(inos.keys()) - - if target != "./": - target = os.path.dirname(target) - - subtree = [path for path in inos.keys() if path.startswith(target)] - target_inos = [inos[path] for path in subtree] - tree = self.fs.mds_asok(["dump", "tree", target[1:]]) - - for ino in tree: - del target_inos[target_inos.index(ino['ino'])] # don't catch! - - assert(len(target_inos) == 0) - - target_depth = target.count('/') - maxdepth = max([path.count('/') for path in subtree]) - target_depth - depth = random.randint(0, maxdepth) - target_inos = [inos[path] for path in subtree \ - if path.count('/') <= depth + target_depth] - tree = self.fs.mds_asok(["dump", "tree", target[1:], str(depth)]) - - for ino in tree: - del target_inos[target_inos.index(ino['ino'])] # don't catch! - - assert(len(target_inos) == 0) diff --git a/src/ceph/qa/tasks/cephfs/test_exports.py b/src/ceph/qa/tasks/cephfs/test_exports.py deleted file mode 100644 index 913999d..0000000 --- a/src/ceph/qa/tasks/cephfs/test_exports.py +++ /dev/null @@ -1,107 +0,0 @@ -import logging -import time -from tasks.cephfs.fuse_mount import FuseMount -from tasks.cephfs.cephfs_test_case import CephFSTestCase - -log = logging.getLogger(__name__) - -class TestExports(CephFSTestCase): - MDSS_REQUIRED = 2 - - def _wait_subtrees(self, status, rank, test): - timeout = 30 - pause = 2 - test = sorted(test) - for i in range(timeout/pause): - subtrees = self.fs.mds_asok(["get", "subtrees"], mds_id=status.get_rank(self.fs.id, rank)['name']) - subtrees = filter(lambda s: s['dir']['path'].startswith('/'), subtrees) - filtered = sorted([(s['dir']['path'], s['auth_first']) for s in subtrees]) - log.info("%s =?= %s", filtered, test) - if filtered == test: - # Confirm export_pin in output is correct: - for s in subtrees: - self.assertTrue(s['export_pin'] == s['auth_first']) - return subtrees - time.sleep(pause) - raise RuntimeError("rank {0} failed to reach desired subtree state", rank) - - def test_export_pin(self): - self.fs.set_max_mds(2) - self.fs.wait_for_daemons() - - status = self.fs.status() - - self.mount_a.run_shell(["mkdir", "-p", "1/2/3"]) - self._wait_subtrees(status, 0, []) - - # NOP - self.mount_a.setfattr("1", "ceph.dir.pin", "-1") - self._wait_subtrees(status, 0, []) - - # NOP (rank < -1) - self.mount_a.setfattr("1", "ceph.dir.pin", "-2341") - self._wait_subtrees(status, 0, []) - - # pin /1 to rank 1 - self.mount_a.setfattr("1", "ceph.dir.pin", "1") - self._wait_subtrees(status, 1, [('/1', 1)]) - - # Check export_targets is set properly - status = self.fs.status() - log.info(status) - r0 = status.get_rank(self.fs.id, 0) - self.assertTrue(sorted(r0['export_targets']) == [1]) - - # redundant pin /1/2 to rank 1 - self.mount_a.setfattr("1/2", "ceph.dir.pin", "1") - self._wait_subtrees(status, 1, [('/1', 1), ('/1/2', 1)]) - - # change pin /1/2 to rank 0 - self.mount_a.setfattr("1/2", "ceph.dir.pin", "0") - self._wait_subtrees(status, 1, [('/1', 1), ('/1/2', 0)]) - self._wait_subtrees(status, 0, [('/1', 1), ('/1/2', 0)]) - - # change pin /1/2/3 to (presently) non-existent rank 2 - self.mount_a.setfattr("1/2/3", "ceph.dir.pin", "2") - self._wait_subtrees(status, 0, [('/1', 1), ('/1/2', 0)]) - self._wait_subtrees(status, 1, [('/1', 1), ('/1/2', 0)]) - - # change pin /1/2 back to rank 1 - self.mount_a.setfattr("1/2", "ceph.dir.pin", "1") - self._wait_subtrees(status, 1, [('/1', 1), ('/1/2', 1)]) - - # add another directory pinned to 1 - self.mount_a.run_shell(["mkdir", "-p", "1/4/5"]) - self.mount_a.setfattr("1/4/5", "ceph.dir.pin", "1") - self._wait_subtrees(status, 1, [('/1', 1), ('/1/2', 1), ('/1/4/5', 1)]) - - # change pin /1 to 0 - self.mount_a.setfattr("1", "ceph.dir.pin", "0") - self._wait_subtrees(status, 0, [('/1', 0), ('/1/2', 1), ('/1/4/5', 1)]) - - # change pin /1/2 to default (-1); does the subtree root properly respect it's parent pin? - self.mount_a.setfattr("1/2", "ceph.dir.pin", "-1") - self._wait_subtrees(status, 0, [('/1', 0), ('/1/4/5', 1)]) - - if len(list(status.get_standbys())): - self.fs.set_max_mds(3) - self.fs.wait_for_state('up:active', rank=2) - self._wait_subtrees(status, 0, [('/1', 0), ('/1/4/5', 1), ('/1/2/3', 2)]) - - # Check export_targets is set properly - status = self.fs.status() - log.info(status) - r0 = status.get_rank(self.fs.id, 0) - self.assertTrue(sorted(r0['export_targets']) == [1,2]) - r1 = status.get_rank(self.fs.id, 1) - self.assertTrue(sorted(r1['export_targets']) == [0]) - r2 = status.get_rank(self.fs.id, 2) - self.assertTrue(sorted(r2['export_targets']) == []) - - # Test rename - self.mount_a.run_shell(["mkdir", "-p", "a/b", "aa/bb"]) - self.mount_a.setfattr("a", "ceph.dir.pin", "1") - self.mount_a.setfattr("aa/bb", "ceph.dir.pin", "0") - self._wait_subtrees(status, 0, [('/1', 0), ('/1/4/5', 1), ('/1/2/3', 2), ('/a', 1), ('/aa/bb', 0)]) - self.mount_a.run_shell(["mv", "aa", "a/b/"]) - self._wait_subtrees(status, 0, [('/1', 0), ('/1/4/5', 1), ('/1/2/3', 2), ('/a', 1), ('/a/b/aa/bb', 0)]) diff --git a/src/ceph/qa/tasks/cephfs/test_failover.py b/src/ceph/qa/tasks/cephfs/test_failover.py deleted file mode 100644 index 9d3392c..0000000 --- a/src/ceph/qa/tasks/cephfs/test_failover.py +++ /dev/null @@ -1,645 +0,0 @@ -import json -import logging -from unittest import case, SkipTest - -from cephfs_test_case import CephFSTestCase -from teuthology.exceptions import CommandFailedError -from teuthology import misc as teuthology -from tasks.cephfs.fuse_mount import FuseMount - -log = logging.getLogger(__name__) - - -class TestFailover(CephFSTestCase): - CLIENTS_REQUIRED = 1 - MDSS_REQUIRED = 2 - - def test_simple(self): - """ - That when the active MDS is killed, a standby MDS is promoted into - its rank after the grace period. - - This is just a simple unit test, the harder cases are covered - in thrashing tests. - """ - - # Need all my standbys up as well as the active daemons - self.wait_for_daemon_start() - - (original_active, ) = self.fs.get_active_names() - original_standbys = self.mds_cluster.get_standby_daemons() - - # Kill the rank 0 daemon's physical process - self.fs.mds_stop(original_active) - - grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon")) - - # Wait until the monitor promotes his replacement - def promoted(): - active = self.fs.get_active_names() - return active and active[0] in original_standbys - - log.info("Waiting for promotion of one of the original standbys {0}".format( - original_standbys)) - self.wait_until_true( - promoted, - timeout=grace*2) - - # Start the original rank 0 daemon up again, see that he becomes a standby - self.fs.mds_restart(original_active) - self.wait_until_true( - lambda: original_active in self.mds_cluster.get_standby_daemons(), - timeout=60 # Approximately long enough for MDS to start and mon to notice - ) - - def test_client_abort(self): - """ - That a client will respect fuse_require_active_mds and error out - when the cluster appears to be unavailable. - """ - - if not isinstance(self.mount_a, FuseMount): - raise SkipTest("Requires FUSE client to inject client metadata") - - require_active = self.fs.get_config("fuse_require_active_mds", service_type="mon").lower() == "true" - if not require_active: - raise case.SkipTest("fuse_require_active_mds is not set") - - grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon")) - - # Check it's not laggy to begin with - (original_active, ) = self.fs.get_active_names() - self.assertNotIn("laggy_since", self.fs.mon_manager.get_mds_status(original_active)) - - self.mounts[0].umount_wait() - - # Control: that we can mount and unmount usually, while the cluster is healthy - self.mounts[0].mount() - self.mounts[0].wait_until_mounted() - self.mounts[0].umount_wait() - - # Stop the daemon processes - self.fs.mds_stop() - - # Wait for everyone to go laggy - def laggy(): - mdsmap = self.fs.get_mds_map() - for info in mdsmap['info'].values(): - if "laggy_since" not in info: - return False - - return True - - self.wait_until_true(laggy, grace * 2) - with self.assertRaises(CommandFailedError): - self.mounts[0].mount() - - def test_standby_count_wanted(self): - """ - That cluster health warnings are generated by insufficient standbys available. - """ - - # Need all my standbys up as well as the active daemons - self.wait_for_daemon_start() - - grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon")) - - standbys = self.mds_cluster.get_standby_daemons() - self.assertGreaterEqual(len(standbys), 1) - self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys))) - - # Kill a standby and check for warning - victim = standbys.pop() - self.fs.mds_stop(victim) - log.info("waiting for insufficient standby daemon warning") - self.wait_for_health("MDS_INSUFFICIENT_STANDBY", grace*2) - - # restart the standby, see that he becomes a standby, check health clears - self.fs.mds_restart(victim) - self.wait_until_true( - lambda: victim in self.mds_cluster.get_standby_daemons(), - timeout=60 # Approximately long enough for MDS to start and mon to notice - ) - self.wait_for_health_clear(timeout=30) - - # Set it one greater than standbys ever seen - standbys = self.mds_cluster.get_standby_daemons() - self.assertGreaterEqual(len(standbys), 1) - self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys)+1)) - log.info("waiting for insufficient standby daemon warning") - self.wait_for_health("MDS_INSUFFICIENT_STANDBY", grace*2) - - # Set it to 0 - self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', '0') - self.wait_for_health_clear(timeout=30) - - - - -class TestStandbyReplay(CephFSTestCase): - MDSS_REQUIRED = 4 - REQUIRE_FILESYSTEM = False - - def set_standby_for(self, leader, follower, replay): - self.set_conf("mds.{0}".format(follower), "mds_standby_for_name", leader) - if replay: - self.set_conf("mds.{0}".format(follower), "mds_standby_replay", "true") - - def get_info_by_name(self, mds_name): - status = self.mds_cluster.status() - info = status.get_mds(mds_name) - if info is None: - log.warn(str(status)) - raise RuntimeError("MDS '{0}' not found".format(mds_name)) - else: - return info - - def test_standby_replay_unused(self): - # Pick out exactly 3 daemons to be run during test - use_daemons = sorted(self.mds_cluster.mds_ids[0:3]) - mds_a, mds_b, mds_c = use_daemons - log.info("Using MDS daemons: {0}".format(use_daemons)) - - # B and C should both follow A, but only one will - # really get into standby replay state. - self.set_standby_for(mds_a, mds_b, True) - self.set_standby_for(mds_a, mds_c, True) - - # Create FS and start A - fs_a = self.mds_cluster.newfs("alpha") - self.mds_cluster.mds_restart(mds_a) - fs_a.wait_for_daemons() - self.assertEqual(fs_a.get_active_names(), [mds_a]) - - # Start B, he should go into standby replay - self.mds_cluster.mds_restart(mds_b) - self.wait_for_daemon_start([mds_b]) - info_b = self.get_info_by_name(mds_b) - self.assertEqual(info_b['state'], "up:standby-replay") - self.assertEqual(info_b['standby_for_name'], mds_a) - self.assertEqual(info_b['rank'], 0) - - # Start C, he should go into standby (*not* replay) - self.mds_cluster.mds_restart(mds_c) - self.wait_for_daemon_start([mds_c]) - info_c = self.get_info_by_name(mds_c) - self.assertEqual(info_c['state'], "up:standby") - self.assertEqual(info_c['standby_for_name'], mds_a) - self.assertEqual(info_c['rank'], -1) - - # Kill B, C should go into standby replay - self.mds_cluster.mds_stop(mds_b) - self.mds_cluster.mds_fail(mds_b) - self.wait_until_equal( - lambda: self.get_info_by_name(mds_c)['state'], - "up:standby-replay", - 60) - info_c = self.get_info_by_name(mds_c) - self.assertEqual(info_c['state'], "up:standby-replay") - self.assertEqual(info_c['standby_for_name'], mds_a) - self.assertEqual(info_c['rank'], 0) - - def test_standby_failure(self): - """ - That the failure of a standby-replay daemon happens cleanly - and doesn't interrupt anything else. - """ - # Pick out exactly 2 daemons to be run during test - use_daemons = sorted(self.mds_cluster.mds_ids[0:2]) - mds_a, mds_b = use_daemons - log.info("Using MDS daemons: {0}".format(use_daemons)) - - # Configure two pairs of MDSs that are standby for each other - self.set_standby_for(mds_a, mds_b, True) - self.set_standby_for(mds_b, mds_a, False) - - # Create FS alpha and get mds_a to come up as active - fs_a = self.mds_cluster.newfs("alpha") - self.mds_cluster.mds_restart(mds_a) - fs_a.wait_for_daemons() - self.assertEqual(fs_a.get_active_names(), [mds_a]) - - # Start the standbys - self.mds_cluster.mds_restart(mds_b) - self.wait_for_daemon_start([mds_b]) - - # See the standby come up as the correct rank - info_b = self.get_info_by_name(mds_b) - self.assertEqual(info_b['state'], "up:standby-replay") - self.assertEqual(info_b['standby_for_name'], mds_a) - self.assertEqual(info_b['rank'], 0) - - # Kill the standby - self.mds_cluster.mds_stop(mds_b) - self.mds_cluster.mds_fail(mds_b) - - # See that the standby is gone and the active remains - self.assertEqual(fs_a.get_active_names(), [mds_a]) - mds_map = fs_a.get_mds_map() - self.assertEqual(len(mds_map['info']), 1) - self.assertEqual(mds_map['failed'], []) - self.assertEqual(mds_map['damaged'], []) - self.assertEqual(mds_map['stopped'], []) - - def test_rank_stopped(self): - """ - That when a rank is STOPPED, standby replays for - that rank get torn down - """ - # Pick out exactly 2 daemons to be run during test - use_daemons = sorted(self.mds_cluster.mds_ids[0:4]) - mds_a, mds_b, mds_a_s, mds_b_s = use_daemons - log.info("Using MDS daemons: {0}".format(use_daemons)) - - # a and b both get a standby - self.set_standby_for(mds_a, mds_a_s, True) - self.set_standby_for(mds_b, mds_b_s, True) - - # Create FS alpha and get mds_a to come up as active - fs_a = self.mds_cluster.newfs("alpha") - fs_a.set_max_mds(2) - - self.mds_cluster.mds_restart(mds_a) - self.wait_until_equal(lambda: fs_a.get_active_names(), [mds_a], 30) - self.mds_cluster.mds_restart(mds_b) - fs_a.wait_for_daemons() - self.assertEqual(sorted(fs_a.get_active_names()), [mds_a, mds_b]) - - # Start the standbys - self.mds_cluster.mds_restart(mds_b_s) - self.wait_for_daemon_start([mds_b_s]) - self.mds_cluster.mds_restart(mds_a_s) - self.wait_for_daemon_start([mds_a_s]) - info_b_s = self.get_info_by_name(mds_b_s) - self.assertEqual(info_b_s['state'], "up:standby-replay") - info_a_s = self.get_info_by_name(mds_a_s) - self.assertEqual(info_a_s['state'], "up:standby-replay") - - # Shrink the cluster - fs_a.set_max_mds(1) - fs_a.mon_manager.raw_cluster_cmd("mds", "stop", "{0}:1".format(fs_a.name)) - self.wait_until_equal( - lambda: fs_a.get_active_names(), [mds_a], - 60 - ) - - # Both 'b' and 'b_s' should go back to being standbys - self.wait_until_equal( - lambda: self.mds_cluster.get_standby_daemons(), {mds_b, mds_b_s}, - 60 - ) - - -class TestMultiFilesystems(CephFSTestCase): - CLIENTS_REQUIRED = 2 - MDSS_REQUIRED = 4 - - # We'll create our own filesystems and start our own daemons - REQUIRE_FILESYSTEM = False - - def setUp(self): - super(TestMultiFilesystems, self).setUp() - self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set", - "enable_multiple", "true", - "--yes-i-really-mean-it") - - def _setup_two(self): - fs_a = self.mds_cluster.newfs("alpha") - fs_b = self.mds_cluster.newfs("bravo") - - self.mds_cluster.mds_restart() - - # Wait for both filesystems to go healthy - fs_a.wait_for_daemons() - fs_b.wait_for_daemons() - - # Reconfigure client auth caps - for mount in self.mounts: - self.mds_cluster.mon_manager.raw_cluster_cmd_result( - 'auth', 'caps', "client.{0}".format(mount.client_id), - 'mds', 'allow', - 'mon', 'allow r', - 'osd', 'allow rw pool={0}, allow rw pool={1}'.format( - fs_a.get_data_pool_name(), fs_b.get_data_pool_name())) - - return fs_a, fs_b - - def test_clients(self): - fs_a, fs_b = self._setup_two() - - # Mount a client on fs_a - self.mount_a.mount(mount_fs_name=fs_a.name) - self.mount_a.write_n_mb("pad.bin", 1) - self.mount_a.write_n_mb("test.bin", 2) - a_created_ino = self.mount_a.path_to_ino("test.bin") - self.mount_a.create_files() - - # Mount a client on fs_b - self.mount_b.mount(mount_fs_name=fs_b.name) - self.mount_b.write_n_mb("test.bin", 1) - b_created_ino = self.mount_b.path_to_ino("test.bin") - self.mount_b.create_files() - - # Check that a non-default filesystem mount survives an MDS - # failover (i.e. that map subscription is continuous, not - # just the first time), reproduces #16022 - old_fs_b_mds = fs_b.get_active_names()[0] - self.mds_cluster.mds_stop(old_fs_b_mds) - self.mds_cluster.mds_fail(old_fs_b_mds) - fs_b.wait_for_daemons() - background = self.mount_b.write_background() - # Raise exception if the write doesn't finish (i.e. if client - # has not kept up with MDS failure) - try: - self.wait_until_true(lambda: background.finished, timeout=30) - except RuntimeError: - # The mount is stuck, we'll have to force it to fail cleanly - background.stdin.close() - self.mount_b.umount_wait(force=True) - raise - - self.mount_a.umount_wait() - self.mount_b.umount_wait() - - # See that the client's files went into the correct pool - self.assertTrue(fs_a.data_objects_present(a_created_ino, 1024 * 1024)) - self.assertTrue(fs_b.data_objects_present(b_created_ino, 1024 * 1024)) - - def test_standby(self): - fs_a, fs_b = self._setup_two() - - # Assert that the remaining two MDS daemons are now standbys - a_daemons = fs_a.get_active_names() - b_daemons = fs_b.get_active_names() - self.assertEqual(len(a_daemons), 1) - self.assertEqual(len(b_daemons), 1) - original_a = a_daemons[0] - original_b = b_daemons[0] - expect_standby_daemons = set(self.mds_cluster.mds_ids) - (set(a_daemons) | set(b_daemons)) - - # Need all my standbys up as well as the active daemons - self.wait_for_daemon_start() - self.assertEqual(expect_standby_daemons, self.mds_cluster.get_standby_daemons()) - - # Kill fs_a's active MDS, see a standby take over - self.mds_cluster.mds_stop(original_a) - self.mds_cluster.mon_manager.raw_cluster_cmd("mds", "fail", original_a) - self.wait_until_equal(lambda: len(fs_a.get_active_names()), 1, 30, - reject_fn=lambda v: v > 1) - # Assert that it's a *different* daemon that has now appeared in the map for fs_a - self.assertNotEqual(fs_a.get_active_names()[0], original_a) - - # Kill fs_b's active MDS, see a standby take over - self.mds_cluster.mds_stop(original_b) - self.mds_cluster.mon_manager.raw_cluster_cmd("mds", "fail", original_b) - self.wait_until_equal(lambda: len(fs_b.get_active_names()), 1, 30, - reject_fn=lambda v: v > 1) - # Assert that it's a *different* daemon that has now appeared in the map for fs_a - self.assertNotEqual(fs_b.get_active_names()[0], original_b) - - # Both of the original active daemons should be gone, and all standbys used up - self.assertEqual(self.mds_cluster.get_standby_daemons(), set()) - - # Restart the ones I killed, see them reappear as standbys - self.mds_cluster.mds_restart(original_a) - self.mds_cluster.mds_restart(original_b) - self.wait_until_true( - lambda: {original_a, original_b} == self.mds_cluster.get_standby_daemons(), - timeout=30 - ) - - def test_grow_shrink(self): - # Usual setup... - fs_a, fs_b = self._setup_two() - - # Increase max_mds on fs_b, see a standby take up the role - fs_b.set_max_mds(2) - self.wait_until_equal(lambda: len(fs_b.get_active_names()), 2, 30, - reject_fn=lambda v: v > 2 or v < 1) - - # Increase max_mds on fs_a, see a standby take up the role - fs_a.set_max_mds(2) - self.wait_until_equal(lambda: len(fs_a.get_active_names()), 2, 30, - reject_fn=lambda v: v > 2 or v < 1) - - # Shrink fs_b back to 1, see a daemon go back to standby - fs_b.set_max_mds(1) - fs_b.deactivate(1) - self.wait_until_equal(lambda: len(fs_b.get_active_names()), 1, 30, - reject_fn=lambda v: v > 2 or v < 1) - - # Grow fs_a up to 3, see the former fs_b daemon join it. - fs_a.set_max_mds(3) - self.wait_until_equal(lambda: len(fs_a.get_active_names()), 3, 60, - reject_fn=lambda v: v > 3 or v < 2) - - def test_standby_for_name(self): - # Pick out exactly 4 daemons to be run during test - use_daemons = sorted(self.mds_cluster.mds_ids[0:4]) - mds_a, mds_b, mds_c, mds_d = use_daemons - log.info("Using MDS daemons: {0}".format(use_daemons)) - - def set_standby_for(leader, follower, replay): - self.set_conf("mds.{0}".format(follower), "mds_standby_for_name", leader) - if replay: - self.set_conf("mds.{0}".format(follower), "mds_standby_replay", "true") - - # Configure two pairs of MDSs that are standby for each other - set_standby_for(mds_a, mds_b, True) - set_standby_for(mds_b, mds_a, False) - set_standby_for(mds_c, mds_d, True) - set_standby_for(mds_d, mds_c, False) - - # Create FS alpha and get mds_a to come up as active - fs_a = self.mds_cluster.newfs("alpha") - self.mds_cluster.mds_restart(mds_a) - fs_a.wait_for_daemons() - self.assertEqual(fs_a.get_active_names(), [mds_a]) - - # Create FS bravo and get mds_c to come up as active - fs_b = self.mds_cluster.newfs("bravo") - self.mds_cluster.mds_restart(mds_c) - fs_b.wait_for_daemons() - self.assertEqual(fs_b.get_active_names(), [mds_c]) - - # Start the standbys - self.mds_cluster.mds_restart(mds_b) - self.mds_cluster.mds_restart(mds_d) - self.wait_for_daemon_start([mds_b, mds_d]) - - def get_info_by_name(fs, mds_name): - mds_map = fs.get_mds_map() - for gid_str, info in mds_map['info'].items(): - if info['name'] == mds_name: - return info - - log.warn(json.dumps(mds_map, indent=2)) - raise RuntimeError("MDS '{0}' not found in filesystem MDSMap".format(mds_name)) - - # See both standbys come up as standby replay for the correct ranks - # mds_b should be in filesystem alpha following mds_a - info_b = get_info_by_name(fs_a, mds_b) - self.assertEqual(info_b['state'], "up:standby-replay") - self.assertEqual(info_b['standby_for_name'], mds_a) - self.assertEqual(info_b['rank'], 0) - # mds_d should be in filesystem alpha following mds_c - info_d = get_info_by_name(fs_b, mds_d) - self.assertEqual(info_d['state'], "up:standby-replay") - self.assertEqual(info_d['standby_for_name'], mds_c) - self.assertEqual(info_d['rank'], 0) - - # Kill both active daemons - self.mds_cluster.mds_stop(mds_a) - self.mds_cluster.mds_fail(mds_a) - self.mds_cluster.mds_stop(mds_c) - self.mds_cluster.mds_fail(mds_c) - - # Wait for standbys to take over - fs_a.wait_for_daemons() - self.assertEqual(fs_a.get_active_names(), [mds_b]) - fs_b.wait_for_daemons() - self.assertEqual(fs_b.get_active_names(), [mds_d]) - - # Start the original active daemons up again - self.mds_cluster.mds_restart(mds_a) - self.mds_cluster.mds_restart(mds_c) - self.wait_for_daemon_start([mds_a, mds_c]) - - self.assertEqual(set(self.mds_cluster.get_standby_daemons()), - {mds_a, mds_c}) - - def test_standby_for_rank(self): - use_daemons = sorted(self.mds_cluster.mds_ids[0:4]) - mds_a, mds_b, mds_c, mds_d = use_daemons - log.info("Using MDS daemons: {0}".format(use_daemons)) - - def set_standby_for(leader_rank, leader_fs, follower_id): - self.set_conf("mds.{0}".format(follower_id), - "mds_standby_for_rank", leader_rank) - - fscid = leader_fs.get_namespace_id() - self.set_conf("mds.{0}".format(follower_id), - "mds_standby_for_fscid", fscid) - - fs_a = self.mds_cluster.newfs("alpha") - fs_b = self.mds_cluster.newfs("bravo") - set_standby_for(0, fs_a, mds_a) - set_standby_for(0, fs_a, mds_b) - set_standby_for(0, fs_b, mds_c) - set_standby_for(0, fs_b, mds_d) - - self.mds_cluster.mds_restart(mds_a) - fs_a.wait_for_daemons() - self.assertEqual(fs_a.get_active_names(), [mds_a]) - - self.mds_cluster.mds_restart(mds_c) - fs_b.wait_for_daemons() - self.assertEqual(fs_b.get_active_names(), [mds_c]) - - self.mds_cluster.mds_restart(mds_b) - self.mds_cluster.mds_restart(mds_d) - self.wait_for_daemon_start([mds_b, mds_d]) - - self.mds_cluster.mds_stop(mds_a) - self.mds_cluster.mds_fail(mds_a) - self.mds_cluster.mds_stop(mds_c) - self.mds_cluster.mds_fail(mds_c) - - fs_a.wait_for_daemons() - self.assertEqual(fs_a.get_active_names(), [mds_b]) - fs_b.wait_for_daemons() - self.assertEqual(fs_b.get_active_names(), [mds_d]) - - def test_standby_for_fscid(self): - """ - That I can set a standby FSCID with no rank, and the result is - that daemons join any rank for that filesystem. - """ - use_daemons = sorted(self.mds_cluster.mds_ids[0:4]) - mds_a, mds_b, mds_c, mds_d = use_daemons - - log.info("Using MDS daemons: {0}".format(use_daemons)) - - def set_standby_for(leader_fs, follower_id): - fscid = leader_fs.get_namespace_id() - self.set_conf("mds.{0}".format(follower_id), - "mds_standby_for_fscid", fscid) - - # Create two filesystems which should have two ranks each - fs_a = self.mds_cluster.newfs("alpha") - - fs_b = self.mds_cluster.newfs("bravo") - - fs_a.set_max_mds(2) - fs_b.set_max_mds(2) - - # Set all the daemons to have a FSCID assignment but no other - # standby preferences. - set_standby_for(fs_a, mds_a) - set_standby_for(fs_a, mds_b) - set_standby_for(fs_b, mds_c) - set_standby_for(fs_b, mds_d) - - # Now when we start all daemons at once, they should fall into - # ranks in the right filesystem - self.mds_cluster.mds_restart(mds_a) - self.mds_cluster.mds_restart(mds_b) - self.mds_cluster.mds_restart(mds_c) - self.mds_cluster.mds_restart(mds_d) - self.wait_for_daemon_start([mds_a, mds_b, mds_c, mds_d]) - fs_a.wait_for_daemons() - fs_b.wait_for_daemons() - self.assertEqual(set(fs_a.get_active_names()), {mds_a, mds_b}) - self.assertEqual(set(fs_b.get_active_names()), {mds_c, mds_d}) - - def test_standby_for_invalid_fscid(self): - """ - That an invalid standby_fscid does not cause a mon crash - """ - use_daemons = sorted(self.mds_cluster.mds_ids[0:3]) - mds_a, mds_b, mds_c = use_daemons - log.info("Using MDS daemons: {0}".format(use_daemons)) - - def set_standby_for_rank(leader_rank, follower_id): - self.set_conf("mds.{0}".format(follower_id), - "mds_standby_for_rank", leader_rank) - - # Create one fs - fs_a = self.mds_cluster.newfs("cephfs") - - # Get configured mons in the cluster, so we can see if any - # crashed later. - configured_mons = fs_a.mon_manager.get_mon_quorum() - - # Set all the daemons to have a rank assignment but no other - # standby preferences. - set_standby_for_rank(0, mds_a) - set_standby_for_rank(0, mds_b) - - # Set third daemon to have invalid fscid assignment and no other - # standby preferences - invalid_fscid = 123 - self.set_conf("mds.{0}".format(mds_c), "mds_standby_for_fscid", invalid_fscid) - - #Restart all the daemons to make the standby preference applied - self.mds_cluster.mds_restart(mds_a) - self.mds_cluster.mds_restart(mds_b) - self.mds_cluster.mds_restart(mds_c) - self.wait_for_daemon_start([mds_a, mds_b, mds_c]) - - #Stop active mds daemon service of fs - if (fs_a.get_active_names(), [mds_a]): - self.mds_cluster.mds_stop(mds_a) - self.mds_cluster.mds_fail(mds_a) - fs_a.wait_for_daemons() - else: - self.mds_cluster.mds_stop(mds_b) - self.mds_cluster.mds_fail(mds_b) - fs_a.wait_for_daemons() - - #Get active mons from cluster - active_mons = fs_a.mon_manager.get_mon_quorum() - - #Check for active quorum mon status and configured mon status - self.assertEqual(active_mons, configured_mons, - "Not all mons are in quorum Invalid standby invalid fscid test failed!") diff --git a/src/ceph/qa/tasks/cephfs/test_flush.py b/src/ceph/qa/tasks/cephfs/test_flush.py deleted file mode 100644 index 1f84e42..0000000 --- a/src/ceph/qa/tasks/cephfs/test_flush.py +++ /dev/null @@ -1,113 +0,0 @@ - -from textwrap import dedent -from tasks.cephfs.cephfs_test_case import CephFSTestCase -from tasks.cephfs.filesystem import ObjectNotFound, ROOT_INO - - -class TestFlush(CephFSTestCase): - def test_flush(self): - self.mount_a.run_shell(["mkdir", "mydir"]) - self.mount_a.run_shell(["touch", "mydir/alpha"]) - dir_ino = self.mount_a.path_to_ino("mydir") - file_ino = self.mount_a.path_to_ino("mydir/alpha") - - # Unmount the client so that it isn't still holding caps - self.mount_a.umount_wait() - - # Before flush, the dirfrag object does not exist - with self.assertRaises(ObjectNotFound): - self.fs.list_dirfrag(dir_ino) - - # Before flush, the file's backtrace has not been written - with self.assertRaises(ObjectNotFound): - self.fs.read_backtrace(file_ino) - - # Before flush, there are no dentries in the root - self.assertEqual(self.fs.list_dirfrag(ROOT_INO), []) - - # Execute flush - flush_data = self.fs.mds_asok(["flush", "journal"]) - self.assertEqual(flush_data['return_code'], 0) - - # After flush, the dirfrag object has been created - dir_list = self.fs.list_dirfrag(dir_ino) - self.assertEqual(dir_list, ["alpha_head"]) - - # And the 'mydir' dentry is in the root - self.assertEqual(self.fs.list_dirfrag(ROOT_INO), ['mydir_head']) - - # ...and the data object has its backtrace - backtrace = self.fs.read_backtrace(file_ino) - self.assertEqual(['alpha', 'mydir'], [a['dname'] for a in backtrace['ancestors']]) - self.assertEqual([dir_ino, 1], [a['dirino'] for a in backtrace['ancestors']]) - self.assertEqual(file_ino, backtrace['ino']) - - # ...and the journal is truncated to just a single subtreemap from the - # newly created segment - summary_output = self.fs.journal_tool(["event", "get", "summary"]) - try: - self.assertEqual(summary_output, - dedent( - """ - Events by type: - SUBTREEMAP: 1 - Errors: 0 - """ - ).strip()) - except AssertionError: - # In some states, flushing the journal will leave you - # an extra event from locks a client held. This is - # correct behaviour: the MDS is flushing the journal, - # it's just that new events are getting added too. - # In this case, we should nevertheless see a fully - # empty journal after a second flush. - self.assertEqual(summary_output, - dedent( - """ - Events by type: - SUBTREEMAP: 1 - UPDATE: 1 - Errors: 0 - """ - ).strip()) - flush_data = self.fs.mds_asok(["flush", "journal"]) - self.assertEqual(flush_data['return_code'], 0) - self.assertEqual(self.fs.journal_tool(["event", "get", "summary"]), - dedent( - """ - Events by type: - SUBTREEMAP: 1 - Errors: 0 - """ - ).strip()) - - # Now for deletion! - # We will count the RADOS deletions and MDS file purges, to verify that - # the expected behaviour is happening as a result of the purge - initial_dels = self.fs.mds_asok(['perf', 'dump', 'objecter'])['objecter']['osdop_delete'] - initial_purges = self.fs.mds_asok(['perf', 'dump', 'mds_cache'])['mds_cache']['strays_enqueued'] - - # Use a client to delete a file - self.mount_a.mount() - self.mount_a.wait_until_mounted() - self.mount_a.run_shell(["rm", "-rf", "mydir"]) - - # Flush the journal so that the directory inode can be purged - flush_data = self.fs.mds_asok(["flush", "journal"]) - self.assertEqual(flush_data['return_code'], 0) - - # We expect to see a single file purge - self.wait_until_true( - lambda: self.fs.mds_asok(['perf', 'dump', 'mds_cache'])['mds_cache']['strays_enqueued'] - initial_purges >= 2, - 60) - - # We expect two deletions, one of the dirfrag and one of the backtrace - self.wait_until_true( - lambda: self.fs.mds_asok(['perf', 'dump', 'objecter'])['objecter']['osdop_delete'] - initial_dels >= 2, - 60) # timeout is fairly long to allow for tick+rados latencies - - with self.assertRaises(ObjectNotFound): - self.fs.list_dirfrag(dir_ino) - with self.assertRaises(ObjectNotFound): - self.fs.read_backtrace(file_ino) - self.assertEqual(self.fs.list_dirfrag(ROOT_INO), []) diff --git a/src/ceph/qa/tasks/cephfs/test_forward_scrub.py b/src/ceph/qa/tasks/cephfs/test_forward_scrub.py deleted file mode 100644 index ac912dd..0000000 --- a/src/ceph/qa/tasks/cephfs/test_forward_scrub.py +++ /dev/null @@ -1,291 +0,0 @@ - -""" -Test that the forward scrub functionality can traverse metadata and apply -requested tags, on well formed metadata. - -This is *not* the real testing for forward scrub, which will need to test -how the functionality responds to damaged metadata. - -""" -import json - -import logging -from collections import namedtuple -from textwrap import dedent - -from teuthology.orchestra.run import CommandFailedError -from tasks.cephfs.cephfs_test_case import CephFSTestCase - -import struct - -log = logging.getLogger(__name__) - - -ValidationError = namedtuple("ValidationError", ["exception", "backtrace"]) - - -class TestForwardScrub(CephFSTestCase): - MDSS_REQUIRED = 1 - - def _read_str_xattr(self, pool, obj, attr): - """ - Read a ceph-encoded string from a rados xattr - """ - output = self.fs.rados(["getxattr", obj, attr], pool=pool) - strlen = struct.unpack('i', output[0:4])[0] - return output[4:(4 + strlen)] - - def _get_paths_to_ino(self): - inos = {} - p = self.mount_a.run_shell(["find", "./"]) - paths = p.stdout.getvalue().strip().split() - for path in paths: - inos[path] = self.mount_a.path_to_ino(path) - - return inos - - def test_apply_tag(self): - self.mount_a.run_shell(["mkdir", "parentdir"]) - self.mount_a.run_shell(["mkdir", "parentdir/childdir"]) - self.mount_a.run_shell(["touch", "rfile"]) - self.mount_a.run_shell(["touch", "parentdir/pfile"]) - self.mount_a.run_shell(["touch", "parentdir/childdir/cfile"]) - - # Build a structure mapping path to inode, as we will later want - # to check object by object and objects are named after ino number - inos = self._get_paths_to_ino() - - # Flush metadata: this is a friendly test of forward scrub so we're skipping - # the part where it's meant to cope with dirty metadata - self.mount_a.umount_wait() - self.fs.mds_asok(["flush", "journal"]) - - tag = "mytag" - - # Execute tagging forward scrub - self.fs.mds_asok(["tag", "path", "/parentdir", tag]) - # Wait for completion - import time - time.sleep(10) - # FIXME watching clog isn't a nice mechanism for this, once we have a ScrubMap we'll - # watch that instead - - # Check that dirs were tagged - for dirpath in ["./parentdir", "./parentdir/childdir"]: - self.assertTagged(inos[dirpath], tag, self.fs.get_metadata_pool_name()) - - # Check that files were tagged - for filepath in ["./parentdir/pfile", "./parentdir/childdir/cfile"]: - self.assertTagged(inos[filepath], tag, self.fs.get_data_pool_name()) - - # This guy wasn't in the tag path, shouldn't have been tagged - self.assertUntagged(inos["./rfile"]) - - def assertUntagged(self, ino): - file_obj_name = "{0:x}.00000000".format(ino) - with self.assertRaises(CommandFailedError): - self._read_str_xattr( - self.fs.get_data_pool_name(), - file_obj_name, - "scrub_tag" - ) - - def assertTagged(self, ino, tag, pool): - file_obj_name = "{0:x}.00000000".format(ino) - wrote = self._read_str_xattr( - pool, - file_obj_name, - "scrub_tag" - ) - self.assertEqual(wrote, tag) - - def _validate_linkage(self, expected): - inos = self._get_paths_to_ino() - try: - self.assertDictEqual(inos, expected) - except AssertionError: - log.error("Expected: {0}".format(json.dumps(expected, indent=2))) - log.error("Actual: {0}".format(json.dumps(inos, indent=2))) - raise - - def test_orphan_scan(self): - # Create some files whose metadata we will flush - self.mount_a.run_python(dedent(""" - import os - mount_point = "{mount_point}" - parent = os.path.join(mount_point, "parent") - os.mkdir(parent) - flushed = os.path.join(parent, "flushed") - os.mkdir(flushed) - for f in ["alpha", "bravo", "charlie"]: - open(os.path.join(flushed, f), 'w').write(f) - """.format(mount_point=self.mount_a.mountpoint))) - - inos = self._get_paths_to_ino() - - # Flush journal - # Umount before flush to avoid cap releases putting - # things we don't want in the journal later. - self.mount_a.umount_wait() - self.fs.mds_asok(["flush", "journal"]) - - # Create a new inode that's just in the log, i.e. would - # look orphaned to backward scan if backward scan wisnae - # respectin' tha scrub_tag xattr. - self.mount_a.mount() - self.mount_a.run_shell(["mkdir", "parent/unflushed"]) - self.mount_a.run_shell(["dd", "if=/dev/urandom", - "of=./parent/unflushed/jfile", - "bs=1M", "count=8"]) - inos["./parent/unflushed"] = self.mount_a.path_to_ino("./parent/unflushed") - inos["./parent/unflushed/jfile"] = self.mount_a.path_to_ino("./parent/unflushed/jfile") - self.mount_a.umount_wait() - - # Orphan an inode by deleting its dentry - # Our victim will be.... bravo. - self.mount_a.umount_wait() - self.fs.mds_stop() - self.fs.mds_fail() - self.fs.set_ceph_conf('mds', 'mds verify scatter', False) - self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False) - frag_obj_id = "{0:x}.00000000".format(inos["./parent/flushed"]) - self.fs.rados(["rmomapkey", frag_obj_id, "bravo_head"]) - - self.fs.mds_restart() - self.fs.wait_for_daemons() - - # See that the orphaned file is indeed missing from a client's POV - self.mount_a.mount() - damaged_state = self._get_paths_to_ino() - self.assertNotIn("./parent/flushed/bravo", damaged_state) - self.mount_a.umount_wait() - - # Run a tagging forward scrub - tag = "mytag123" - self.fs.mds_asok(["tag", "path", "/parent", tag]) - - # See that the orphan wisnae tagged - self.assertUntagged(inos['./parent/flushed/bravo']) - - # See that the flushed-metadata-and-still-present files are tagged - self.assertTagged(inos['./parent/flushed/alpha'], tag, self.fs.get_data_pool_name()) - self.assertTagged(inos['./parent/flushed/charlie'], tag, self.fs.get_data_pool_name()) - - # See that journalled-but-not-flushed file *was* tagged - self.assertTagged(inos['./parent/unflushed/jfile'], tag, self.fs.get_data_pool_name()) - - # Run cephfs-data-scan targeting only orphans - self.fs.mds_stop() - self.fs.mds_fail() - self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()]) - self.fs.data_scan([ - "scan_inodes", - "--filter-tag", tag, - self.fs.get_data_pool_name() - ]) - - # After in-place injection stats should be kosher again - self.fs.set_ceph_conf('mds', 'mds verify scatter', True) - self.fs.set_ceph_conf('mds', 'mds debug scatterstat', True) - - # And we should have all the same linkage we started with, - # and no lost+found, and no extra inodes! - self.fs.mds_restart() - self.fs.wait_for_daemons() - self.mount_a.mount() - self._validate_linkage(inos) - - def _stash_inotable(self): - # Get all active ranks - ranks = self.fs.get_all_mds_rank() - - inotable_dict = {} - for rank in ranks: - inotable_oid = "mds{rank:d}_".format(rank=rank) + "inotable" - print "Trying to fetch inotable object: " + inotable_oid - - #self.fs.get_metadata_object("InoTable", "mds0_inotable") - inotable_raw = self.fs.get_metadata_object_raw(inotable_oid) - inotable_dict[inotable_oid] = inotable_raw - return inotable_dict - - def test_inotable_sync(self): - self.mount_a.write_n_mb("file1_sixmegs", 6) - - # Flush journal - self.mount_a.umount_wait() - self.fs.mds_asok(["flush", "journal"]) - - inotable_copy = self._stash_inotable() - - self.mount_a.mount() - - self.mount_a.write_n_mb("file2_sixmegs", 6) - self.mount_a.write_n_mb("file3_sixmegs", 6) - - inos = self._get_paths_to_ino() - - # Flush journal - self.mount_a.umount_wait() - self.fs.mds_asok(["flush", "journal"]) - - self.mount_a.umount_wait() - - with self.assert_cluster_log("inode table repaired", invert_match=True): - self.fs.mds_asok(["scrub_path", "/", "repair", "recursive"]) - - self.mds_cluster.mds_stop() - self.mds_cluster.mds_fail() - - # Truncate the journal (to ensure the inotable on disk - # is all that will be in the InoTable in memory) - - self.fs.journal_tool(["event", "splice", - "--inode={0}".format(inos["./file2_sixmegs"]), "summary"]) - - self.fs.journal_tool(["event", "splice", - "--inode={0}".format(inos["./file3_sixmegs"]), "summary"]) - - # Revert to old inotable. - for key, value in inotable_copy.iteritems(): - self.fs.put_metadata_object_raw(key, value) - - self.mds_cluster.mds_restart() - self.fs.wait_for_daemons() - - with self.assert_cluster_log("inode table repaired"): - self.fs.mds_asok(["scrub_path", "/", "repair", "recursive"]) - - self.mds_cluster.mds_stop() - table_text = self.fs.table_tool(["0", "show", "inode"]) - table = json.loads(table_text) - self.assertGreater( - table['0']['data']['inotable']['free'][0]['start'], - inos['./file3_sixmegs']) - - def test_backtrace_repair(self): - """ - That the MDS can repair an inodes backtrace in the data pool - if it is found to be damaged. - """ - # Create a file for subsequent checks - self.mount_a.run_shell(["mkdir", "parent_a"]) - self.mount_a.run_shell(["touch", "parent_a/alpha"]) - file_ino = self.mount_a.path_to_ino("parent_a/alpha") - - # That backtrace and layout are written after initial flush - self.fs.mds_asok(["flush", "journal"]) - backtrace = self.fs.read_backtrace(file_ino) - self.assertEqual(['alpha', 'parent_a'], - [a['dname'] for a in backtrace['ancestors']]) - - # Go corrupt the backtrace - self.fs._write_data_xattr(file_ino, "parent", - "oh i'm sorry did i overwrite your xattr?") - - with self.assert_cluster_log("bad backtrace on inode"): - self.fs.mds_asok(["scrub_path", "/", "repair", "recursive"]) - self.fs.mds_asok(["flush", "journal"]) - backtrace = self.fs.read_backtrace(file_ino) - self.assertEqual(['alpha', 'parent_a'], - [a['dname'] for a in backtrace['ancestors']]) diff --git a/src/ceph/qa/tasks/cephfs/test_fragment.py b/src/ceph/qa/tasks/cephfs/test_fragment.py deleted file mode 100644 index a62ef74..0000000 --- a/src/ceph/qa/tasks/cephfs/test_fragment.py +++ /dev/null @@ -1,232 +0,0 @@ - - -from tasks.cephfs.cephfs_test_case import CephFSTestCase -from teuthology.orchestra import run - -import logging -log = logging.getLogger(__name__) - - -class TestFragmentation(CephFSTestCase): - CLIENTS_REQUIRED = 1 - MDSS_REQUIRED = 1 - - def get_splits(self): - return self.fs.mds_asok(['perf', 'dump', 'mds'])['mds']['dir_split'] - - def get_merges(self): - return self.fs.mds_asok(['perf', 'dump', 'mds'])['mds']['dir_merge'] - - def get_dir_ino(self, path): - dir_cache = self.fs.read_cache(path, 0) - dir_ino = None - dir_inono = self.mount_a.path_to_ino(path.strip("/")) - for ino in dir_cache: - if ino['ino'] == dir_inono: - dir_ino = ino - break - self.assertIsNotNone(dir_ino) - return dir_ino - - def _configure(self, **kwargs): - """ - Apply kwargs as MDS configuration settings, enable dirfrags - and restart the MDSs. - """ - kwargs['mds_bal_frag'] = "true" - - for k, v in kwargs.items(): - self.ceph_cluster.set_ceph_conf("mds", k, v.__str__()) - - self.fs.set_allow_dirfrags(True) - - self.mds_cluster.mds_fail_restart() - self.fs.wait_for_daemons() - - def test_oversize(self): - """ - That a directory is split when it becomes too large. - """ - - split_size = 20 - merge_size = 5 - - self._configure( - mds_bal_split_size=split_size, - mds_bal_merge_size=merge_size, - mds_bal_split_bits=1 - ) - - self.assertEqual(self.get_splits(), 0) - - self.mount_a.create_n_files("splitdir/file", split_size + 1) - - self.wait_until_true( - lambda: self.get_splits() == 1, - timeout=30 - ) - - frags = self.get_dir_ino("/splitdir")['dirfrags'] - self.assertEqual(len(frags), 2) - self.assertEqual(frags[0]['dirfrag'], "0x10000000000.0*") - self.assertEqual(frags[1]['dirfrag'], "0x10000000000.1*") - self.assertEqual( - sum([len(f['dentries']) for f in frags]), - split_size + 1 - ) - - self.assertEqual(self.get_merges(), 0) - - self.mount_a.run_shell(["rm", "-f", run.Raw("splitdir/file*")]) - - self.wait_until_true( - lambda: self.get_merges() == 1, - timeout=30 - ) - - self.assertEqual(len(self.get_dir_ino("/splitdir")["dirfrags"]), 1) - - def test_rapid_creation(self): - """ - That the fast-splitting limit of 1.5x normal limit is - applied when creating dentries quickly. - """ - - split_size = 100 - merge_size = 1 - - self._configure( - mds_bal_split_size=split_size, - mds_bal_merge_size=merge_size, - mds_bal_split_bits=3, - mds_bal_fragment_size_max=int(split_size * 1.5 + 2) - ) - - # We test this only at a single split level. If a client was sending - # IO so fast that it hit a second split before the first split - # was complete, it could violate mds_bal_fragment_size_max -- there - # is a window where the child dirfrags of a split are unfrozen - # (so they can grow), but still have STATE_FRAGMENTING (so they - # can't be split). - - # By writing 4x the split size when the split bits are set - # to 3 (i.e. 4-ways), I am reasonably sure to see precisely - # one split. The test is to check whether that split - # happens soon enough that the client doesn't exceed - # 2x the split_size (the "immediate" split mode should - # kick in at 1.5x the split size). - - self.assertEqual(self.get_splits(), 0) - self.mount_a.create_n_files("splitdir/file", split_size * 4) - self.wait_until_equal( - self.get_splits, - 1, - reject_fn=lambda s: s > 1, - timeout=30 - ) - - def test_deep_split(self): - """ - That when the directory grows many times larger than split size, - the fragments get split again. - """ - - split_size = 100 - merge_size = 1 # i.e. don't merge frag unless its empty - split_bits = 1 - - branch_factor = 2**split_bits - - # Arbitrary: how many levels shall we try fragmenting before - # ending the test? - max_depth = 5 - - self._configure( - mds_bal_split_size=split_size, - mds_bal_merge_size=merge_size, - mds_bal_split_bits=split_bits - ) - - # Each iteration we will create another level of fragments. The - # placement of dentries into fragments is by hashes (i.e. pseudo - # random), so we rely on statistics to get the behaviour that - # by writing about 1.5x as many dentries as the split_size times - # the number of frags, we will get them all to exceed their - # split size and trigger a split. - depth = 0 - files_written = 0 - splits_expected = 0 - while depth < max_depth: - log.info("Writing files for depth {0}".format(depth)) - target_files = branch_factor**depth * int(split_size * 1.5) - create_files = target_files - files_written - - self.ceph_cluster.mon_manager.raw_cluster_cmd("log", - "{0} Writing {1} files (depth={2})".format( - self.__class__.__name__, create_files, depth - )) - self.mount_a.create_n_files("splitdir/file_{0}".format(depth), - create_files) - self.ceph_cluster.mon_manager.raw_cluster_cmd("log", - "{0} Done".format(self.__class__.__name__)) - - files_written += create_files - log.info("Now have {0} files".format(files_written)) - - splits_expected += branch_factor**depth - log.info("Waiting to see {0} splits".format(splits_expected)) - try: - self.wait_until_equal( - self.get_splits, - splits_expected, - timeout=30, - reject_fn=lambda x: x > splits_expected - ) - - frags = self.get_dir_ino("/splitdir")['dirfrags'] - self.assertEqual(len(frags), branch_factor**(depth+1)) - self.assertEqual( - sum([len(f['dentries']) for f in frags]), - target_files - ) - except: - # On failures, log what fragmentation we actually ended - # up with. This block is just for logging, at the end - # we raise the exception again. - frags = self.get_dir_ino("/splitdir")['dirfrags'] - log.info("depth={0} splits_expected={1} files_written={2}".format( - depth, splits_expected, files_written - )) - log.info("Dirfrags:") - for f in frags: - log.info("{0}: {1}".format( - f['dirfrag'], len(f['dentries']) - )) - raise - - depth += 1 - - # Remember the inode number because we will be checking for - # objects later. - dir_inode_no = self.mount_a.path_to_ino("splitdir") - - self.mount_a.run_shell(["rm", "-rf", "splitdir/"]) - self.mount_a.umount_wait() - - self.fs.mds_asok(['flush', 'journal']) - - # Wait for all strays to purge - self.wait_until_equal( - lambda: self.fs.mds_asok(['perf', 'dump', 'mds_cache'] - )['mds_cache']['num_strays'], - 0, - timeout=1200 - ) - # Check that the metadata pool objects for all the myriad - # child fragments are gone - metadata_objs = self.fs.rados(["ls"]) - frag_objs = [] - for o in metadata_objs: - if o.startswith("{0:x}.".format(dir_inode_no)): - frag_objs.append(o) - self.assertListEqual(frag_objs, []) diff --git a/src/ceph/qa/tasks/cephfs/test_full.py b/src/ceph/qa/tasks/cephfs/test_full.py deleted file mode 100644 index e69ccb3..0000000 --- a/src/ceph/qa/tasks/cephfs/test_full.py +++ /dev/null @@ -1,414 +0,0 @@ - - -import json -import logging -import os -from textwrap import dedent -import time -from teuthology.orchestra.run import CommandFailedError -from tasks.cephfs.fuse_mount import FuseMount -from tasks.cephfs.cephfs_test_case import CephFSTestCase - - -log = logging.getLogger(__name__) - - -class FullnessTestCase(CephFSTestCase): - CLIENTS_REQUIRED = 2 - - # Subclasses define whether they're filling whole cluster or just data pool - data_only = False - - # Subclasses define how many bytes should be written to achieve fullness - pool_capacity = None - fill_mb = None - - # Subclasses define what fullness means to them - def is_full(self): - raise NotImplementedError() - - def setUp(self): - CephFSTestCase.setUp(self) - - # These tests just use a single active MDS throughout, so remember its ID - # for use in mds_asok calls - self.active_mds_id = self.fs.get_active_names()[0] - - # Capture the initial OSD map epoch for later use - self.initial_osd_epoch = json.loads( - self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json").strip() - )['epoch'] - - # Check the initial barrier epoch on the MDS: this should be - # set to the latest map at MDS startup. We do this check in - # setUp to get in there before subclasses might touch things - # in their own setUp functions. - self.assertGreaterEqual(self.fs.mds_asok(["status"], mds_id=self.active_mds_id)['osdmap_epoch_barrier'], - self.initial_osd_epoch) - - def test_barrier(self): - """ - That when an OSD epoch barrier is set on an MDS, subsequently - issued capabilities cause clients to update their OSD map to that - epoch. - """ - - # Sync up clients with initial MDS OSD map barrier - self.mount_a.open_no_data("foo") - self.mount_b.open_no_data("bar") - - # Grab mounts' initial OSD epochs: later we will check that - # it hasn't advanced beyond this point. - mount_a_initial_epoch = self.mount_a.get_osd_epoch()[0] - mount_b_initial_epoch = self.mount_b.get_osd_epoch()[0] - - # Freshly mounted at start of test, should be up to date with OSD map - self.assertGreaterEqual(mount_a_initial_epoch, self.initial_osd_epoch) - self.assertGreaterEqual(mount_b_initial_epoch, self.initial_osd_epoch) - - # Set and unset a flag to cause OSD epoch to increment - self.fs.mon_manager.raw_cluster_cmd("osd", "set", "pause") - self.fs.mon_manager.raw_cluster_cmd("osd", "unset", "pause") - - out = self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json").strip() - new_epoch = json.loads(out)['epoch'] - self.assertNotEqual(self.initial_osd_epoch, new_epoch) - - # Do a metadata operation on clients, witness that they end up with - # the old OSD map from startup time (nothing has prompted client - # to update its map) - self.mount_a.open_no_data("alpha") - self.mount_b.open_no_data("bravo1") - - # Sleep long enough that if the OSD map was propagating it would - # have done so (this is arbitrary because we are 'waiting' for something - # to *not* happen). - time.sleep(30) - - mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch() - self.assertEqual(mount_a_epoch, mount_a_initial_epoch) - mount_b_epoch, mount_b_barrier = self.mount_b.get_osd_epoch() - self.assertEqual(mount_b_epoch, mount_b_initial_epoch) - - # Set a barrier on the MDS - self.fs.mds_asok(["osdmap", "barrier", new_epoch.__str__()], mds_id=self.active_mds_id) - - # Do an operation on client B, witness that it ends up with - # the latest OSD map from the barrier. This shouldn't generate any - # cap revokes to A because B was already the last one to touch - # a file in root. - self.mount_b.run_shell(["touch", "bravo2"]) - self.mount_b.open_no_data("bravo2") - - # Some time passes here because the metadata part of the operation - # completes immediately, while the resulting OSD map update happens - # asynchronously (it's an Objecter::_maybe_request_map) as a result - # of seeing the new epoch barrier. - self.wait_until_equal( - lambda: self.mount_b.get_osd_epoch(), - (new_epoch, new_epoch), - 30, - lambda x: x[0] > new_epoch or x[1] > new_epoch) - - # ...and none of this should have affected the oblivious mount a, - # because it wasn't doing any data or metadata IO - mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch() - self.assertEqual(mount_a_epoch, mount_a_initial_epoch) - - def _data_pool_name(self): - data_pool_names = self.fs.get_data_pool_names() - if len(data_pool_names) > 1: - raise RuntimeError("This test can't handle multiple data pools") - else: - return data_pool_names[0] - - def _test_full(self, easy_case): - """ - - That a client trying to write data to a file is prevented - from doing so with an -EFULL result - - That they are also prevented from creating new files by the MDS. - - That they may delete another file to get the system healthy again - - :param easy_case: if true, delete a successfully written file to - free up space. else, delete the file that experienced - the failed write. - """ - - osd_mon_report_interval_max = int(self.fs.get_config("osd_mon_report_interval_max", service_type='osd')) - - log.info("Writing {0}MB should fill this cluster".format(self.fill_mb)) - - # Fill up the cluster. This dd may or may not fail, as it depends on - # how soon the cluster recognises its own fullness - self.mount_a.write_n_mb("large_file_a", self.fill_mb / 2) - try: - self.mount_a.write_n_mb("large_file_b", self.fill_mb / 2) - except CommandFailedError: - log.info("Writing file B failed (full status happened already)") - assert self.is_full() - else: - log.info("Writing file B succeeded (full status will happen soon)") - self.wait_until_true(lambda: self.is_full(), - timeout=osd_mon_report_interval_max * 5) - - # Attempting to write more data should give me ENOSPC - with self.assertRaises(CommandFailedError) as ar: - self.mount_a.write_n_mb("large_file_b", 50, seek=self.fill_mb / 2) - self.assertEqual(ar.exception.exitstatus, 1) # dd returns 1 on "No space" - - # Wait for the MDS to see the latest OSD map so that it will reliably - # be applying the policy of rejecting non-deletion metadata operations - # while in the full state. - osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch'] - self.wait_until_true( - lambda: self.fs.mds_asok(['status'], mds_id=self.active_mds_id)['osdmap_epoch'] >= osd_epoch, - timeout=10) - - if not self.data_only: - with self.assertRaises(CommandFailedError): - self.mount_a.write_n_mb("small_file_1", 0) - - # Clear out some space - if easy_case: - self.mount_a.run_shell(['rm', '-f', 'large_file_a']) - self.mount_a.run_shell(['rm', '-f', 'large_file_b']) - else: - # In the hard case it is the file that filled the system. - # Before the new #7317 (ENOSPC, epoch barrier) changes, this - # would fail because the last objects written would be - # stuck in the client cache as objecter operations. - self.mount_a.run_shell(['rm', '-f', 'large_file_b']) - self.mount_a.run_shell(['rm', '-f', 'large_file_a']) - - # Here we are waiting for two things to happen: - # * The MDS to purge the stray folder and execute object deletions - # * The OSDs to inform the mon that they are no longer full - self.wait_until_true(lambda: not self.is_full(), - timeout=osd_mon_report_interval_max * 5) - - # Wait for the MDS to see the latest OSD map so that it will reliably - # be applying the free space policy - osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch'] - self.wait_until_true( - lambda: self.fs.mds_asok(['status'], mds_id=self.active_mds_id)['osdmap_epoch'] >= osd_epoch, - timeout=10) - - # Now I should be able to write again - self.mount_a.write_n_mb("large_file", 50, seek=0) - - # Ensure that the MDS keeps its OSD epoch barrier across a restart - - def test_full_different_file(self): - self._test_full(True) - - def test_full_same_file(self): - self._test_full(False) - - def _remote_write_test(self, template): - """ - Run some remote python in a way that's useful for - testing free space behaviour (see test_* methods using this) - """ - file_path = os.path.join(self.mount_a.mountpoint, "full_test_file") - - # Enough to trip the full flag - osd_mon_report_interval_max = int(self.fs.get_config("osd_mon_report_interval_max", service_type='osd')) - mon_tick_interval = int(self.fs.get_config("mon_tick_interval", service_type="mon")) - - # Sufficient data to cause RADOS cluster to go 'full' - log.info("pool capacity {0}, {1}MB should be enough to fill it".format(self.pool_capacity, self.fill_mb)) - - # Long enough for RADOS cluster to notice it is full and set flag on mons - # (report_interval for mon to learn PG stats, tick interval for it to update OSD map, - # factor of 1.5 for I/O + network latency in committing OSD map and distributing it - # to the OSDs) - full_wait = (osd_mon_report_interval_max + mon_tick_interval) * 1.5 - - # Configs for this test should bring this setting down in order to - # run reasonably quickly - if osd_mon_report_interval_max > 10: - log.warn("This test may run rather slowly unless you decrease" - "osd_mon_report_interval_max (5 is a good setting)!") - - self.mount_a.run_python(template.format( - fill_mb=self.fill_mb, - file_path=file_path, - full_wait=full_wait, - is_fuse=isinstance(self.mount_a, FuseMount) - )) - - def test_full_fclose(self): - # A remote script which opens a file handle, fills up the filesystem, and then - # checks that ENOSPC errors on buffered writes appear correctly as errors in fsync - remote_script = dedent(""" - import time - import datetime - import subprocess - import os - - # Write some buffered data through before going full, all should be well - print "writing some data through which we expect to succeed" - bytes = 0 - f = os.open("{file_path}", os.O_WRONLY | os.O_CREAT) - bytes += os.write(f, 'a' * 4096) - os.fsync(f) - print "fsync'ed data successfully, will now attempt to fill fs" - - # Okay, now we're going to fill up the filesystem, and then keep - # writing until we see an error from fsync. As long as we're doing - # buffered IO, the error should always only appear from fsync and not - # from write - full = False - - for n in range(0, {fill_mb}): - bytes += os.write(f, 'x' * 1024 * 1024) - print "wrote bytes via buffered write, may repeat" - print "done writing bytes" - - # OK, now we should sneak in under the full condition - # due to the time it takes the OSDs to report to the - # mons, and get a successful fsync on our full-making data - os.fsync(f) - print "successfully fsync'ed prior to getting full state reported" - - # Now wait for the full flag to get set so that our - # next flush IO will fail - time.sleep(30) - - # A buffered IO, should succeed - print "starting buffered write we expect to succeed" - os.write(f, 'x' * 4096) - print "wrote, now waiting 30s and then doing a close we expect to fail" - - # Wait long enough for a background flush that should fail - time.sleep(30) - - if {is_fuse}: - # ...and check that the failed background flush is reflected in fclose - try: - os.close(f) - except OSError: - print "close() returned an error as expected" - else: - raise RuntimeError("close() failed to raise error") - else: - # The kernel cephfs client does not raise errors on fclose - os.close(f) - - os.unlink("{file_path}") - """) - self._remote_write_test(remote_script) - - def test_full_fsync(self): - """ - That when the full flag is encountered during asynchronous - flushes, such that an fwrite() succeeds but an fsync/fclose() - should return the ENOSPC error. - """ - - # A remote script which opens a file handle, fills up the filesystem, and then - # checks that ENOSPC errors on buffered writes appear correctly as errors in fsync - remote_script = dedent(""" - import time - import datetime - import subprocess - import os - - # Write some buffered data through before going full, all should be well - print "writing some data through which we expect to succeed" - bytes = 0 - f = os.open("{file_path}", os.O_WRONLY | os.O_CREAT) - bytes += os.write(f, 'a' * 4096) - os.fsync(f) - print "fsync'ed data successfully, will now attempt to fill fs" - - # Okay, now we're going to fill up the filesystem, and then keep - # writing until we see an error from fsync. As long as we're doing - # buffered IO, the error should always only appear from fsync and not - # from write - full = False - - for n in range(0, {fill_mb} + 1): - try: - bytes += os.write(f, 'x' * 1024 * 1024) - print "wrote bytes via buffered write, moving on to fsync" - except OSError as e: - print "Unexpected error %s from write() instead of fsync()" % e - raise - - try: - os.fsync(f) - print "fsync'ed successfully" - except OSError as e: - print "Reached fullness after %.2f MB" % (bytes / (1024.0 * 1024.0)) - full = True - break - else: - print "Not full yet after %.2f MB" % (bytes / (1024.0 * 1024.0)) - - if n > {fill_mb} * 0.8: - # Be cautious in the last region where we expect to hit - # the full condition, so that we don't overshoot too dramatically - print "sleeping a bit as we've exceeded 80% of our expected full ratio" - time.sleep({full_wait}) - - if not full: - raise RuntimeError("Failed to reach fullness after writing %d bytes" % bytes) - - # close() should not raise an error because we already caught it in - # fsync. There shouldn't have been any more writeback errors - # since then because all IOs got cancelled on the full flag. - print "calling close" - os.close(f) - print "close() did not raise error" - - os.unlink("{file_path}") - """) - - self._remote_write_test(remote_script) - - -class TestQuotaFull(FullnessTestCase): - """ - Test per-pool fullness, which indicates quota limits exceeded - """ - pool_capacity = 1024 * 1024 * 32 # arbitrary low-ish limit - fill_mb = pool_capacity / (1024 * 1024) - - # We are only testing quota handling on the data pool, not the metadata - # pool. - data_only = True - - def setUp(self): - super(TestQuotaFull, self).setUp() - - pool_name = self.fs.get_data_pool_name() - self.fs.mon_manager.raw_cluster_cmd("osd", "pool", "set-quota", pool_name, - "max_bytes", "{0}".format(self.pool_capacity)) - - def is_full(self): - return self.fs.is_pool_full(self.fs.get_data_pool_name()) - - -class TestClusterFull(FullnessTestCase): - """ - Test cluster-wide fullness, which indicates that an OSD has become too full - """ - pool_capacity = None - REQUIRE_MEMSTORE = True - - def setUp(self): - super(TestClusterFull, self).setUp() - - if self.pool_capacity is None: - # This is a hack to overcome weird fluctuations in the reported - # `max_avail` attribute of pools that sometimes occurs in between - # tests (reason as yet unclear, but this dodges the issue) - TestClusterFull.pool_capacity = self.fs.get_pool_df(self._data_pool_name())['max_avail'] - TestClusterFull.fill_mb = int(1.05 * (self.pool_capacity / (1024.0 * 1024.0))) - - def is_full(self): - return self.fs.is_full() - -# Hide the parent class so that unittest.loader doesn't try to run it. -del globals()['FullnessTestCase'] diff --git a/src/ceph/qa/tasks/cephfs/test_journal_migration.py b/src/ceph/qa/tasks/cephfs/test_journal_migration.py deleted file mode 100644 index 64fe939..0000000 --- a/src/ceph/qa/tasks/cephfs/test_journal_migration.py +++ /dev/null @@ -1,118 +0,0 @@ - -from StringIO import StringIO -from tasks.cephfs.cephfs_test_case import CephFSTestCase -from tasks.workunit import task as workunit - -JOURNAL_FORMAT_LEGACY = 0 -JOURNAL_FORMAT_RESILIENT = 1 - - -class TestJournalMigration(CephFSTestCase): - CLIENTS_REQUIRED = 1 - MDSS_REQUIRED = 2 - - def test_journal_migration(self): - old_journal_version = JOURNAL_FORMAT_LEGACY - new_journal_version = JOURNAL_FORMAT_RESILIENT - - # Pick out two daemons to use - mds_a, mds_b = sorted(self.mds_cluster.mds_ids[0:2]) - - self.mount_a.umount_wait() - self.fs.mds_stop() - - # Enable standby replay, to cover the bug case #8811 where - # a standby replay might mistakenly end up trying to rewrite - # the journal at the same time as an active daemon. - self.fs.set_ceph_conf('mds', 'mds standby replay', "true") - self.fs.set_ceph_conf('mds', 'mds standby for rank', "0") - - # Create a filesystem using the older journal format. - self.fs.set_ceph_conf('mds', 'mds journal format', old_journal_version) - self.fs.recreate() - self.fs.mds_restart(mds_id=mds_a) - self.fs.wait_for_daemons() - self.assertEqual(self.fs.get_active_names(), [mds_a]) - - def replay_names(): - return [s['name'] - for s in self.fs.status().get_replays(fscid = self.fs.id)] - - # Start the standby and wait for it to come up - self.fs.mds_restart(mds_id=mds_b) - self.wait_until_equal( - replay_names, - [mds_b], - timeout = 30) - - # Do some client work so that the log is populated with something. - with self.mount_a.mounted(): - self.mount_a.create_files() - self.mount_a.check_files() # sanity, this should always pass - - # Run a more substantial workunit so that the length of the log to be - # coverted is going span at least a few segments - workunit(self.ctx, { - 'clients': { - "client.{0}".format(self.mount_a.client_id): ["suites/fsstress.sh"], - }, - "timeout": "3h" - }) - - # Modify the ceph.conf to ask the MDS to use the new journal format. - self.fs.set_ceph_conf('mds', 'mds journal format', new_journal_version) - - # Restart the MDS. - self.fs.mds_fail_restart(mds_id=mds_a) - self.fs.mds_fail_restart(mds_id=mds_b) - - # This ensures that all daemons come up into a valid state - self.fs.wait_for_daemons() - - # Check that files created in the initial client workload are still visible - # in a client mount. - with self.mount_a.mounted(): - self.mount_a.check_files() - - # Verify that the journal really has been rewritten. - journal_version = self.fs.get_journal_version() - if journal_version != new_journal_version: - raise RuntimeError("Journal was not upgraded, version should be {0} but is {1}".format( - new_journal_version, journal_version() - )) - - # Verify that cephfs-journal-tool can now read the rewritten journal - inspect_out = self.fs.journal_tool(["journal", "inspect"]) - if not inspect_out.endswith(": OK"): - raise RuntimeError("Unexpected journal-tool result: '{0}'".format( - inspect_out - )) - - self.fs.journal_tool(["event", "get", "json", "--path", "/tmp/journal.json"]) - p = self.fs.tool_remote.run( - args=[ - "python", - "-c", - "import json; print len(json.load(open('/tmp/journal.json')))" - ], - stdout=StringIO()) - event_count = int(p.stdout.getvalue().strip()) - if event_count < 1000: - # Approximate value of "lots", expected from having run fsstress - raise RuntimeError("Unexpectedly few journal events: {0}".format(event_count)) - - # Do some client work to check that writing the log is still working - with self.mount_a.mounted(): - workunit(self.ctx, { - 'clients': { - "client.{0}".format(self.mount_a.client_id): ["fs/misc/trivial_sync.sh"], - }, - "timeout": "3h" - }) - - # Check that both an active and a standby replay are still up - self.assertEqual(len(replay_names()), 1) - self.assertEqual(len(self.fs.get_active_names()), 1) - self.assertTrue(self.mds_cluster.mds_daemons[mds_a].running()) - self.assertTrue(self.mds_cluster.mds_daemons[mds_b].running()) - diff --git a/src/ceph/qa/tasks/cephfs/test_journal_repair.py b/src/ceph/qa/tasks/cephfs/test_journal_repair.py deleted file mode 100644 index 62cbbb0..0000000 --- a/src/ceph/qa/tasks/cephfs/test_journal_repair.py +++ /dev/null @@ -1,443 +0,0 @@ - -""" -Test our tools for recovering the content of damaged journals -""" - -import json -import logging -from textwrap import dedent -import time - -from teuthology.exceptions import CommandFailedError, ConnectionLostError -from tasks.cephfs.filesystem import ObjectNotFound, ROOT_INO -from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology -from tasks.workunit import task as workunit - -log = logging.getLogger(__name__) - - -class TestJournalRepair(CephFSTestCase): - MDSS_REQUIRED = 2 - - def test_inject_to_empty(self): - """ - That when some dentries in the journal but nothing is in - the backing store, we correctly populate the backing store - from the journalled dentries. - """ - - # Inject metadata operations - self.mount_a.run_shell(["touch", "rootfile"]) - self.mount_a.run_shell(["mkdir", "subdir"]) - self.mount_a.run_shell(["touch", "subdir/subdirfile"]) - # There are several different paths for handling hardlinks, depending - # on whether an existing dentry (being overwritten) is also a hardlink - self.mount_a.run_shell(["mkdir", "linkdir"]) - - # Test inode -> remote transition for a dentry - self.mount_a.run_shell(["touch", "linkdir/link0"]) - self.mount_a.run_shell(["rm", "-f", "linkdir/link0"]) - self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link0"]) - - # Test nothing -> remote transition - self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link1"]) - - # Test remote -> inode transition - self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link2"]) - self.mount_a.run_shell(["rm", "-f", "linkdir/link2"]) - self.mount_a.run_shell(["touch", "linkdir/link2"]) - - # Test remote -> diff remote transition - self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link3"]) - self.mount_a.run_shell(["rm", "-f", "linkdir/link3"]) - self.mount_a.run_shell(["ln", "rootfile", "linkdir/link3"]) - - # Test an empty directory - self.mount_a.run_shell(["mkdir", "subdir/subsubdir"]) - self.mount_a.run_shell(["sync"]) - - # Before we unmount, make a note of the inode numbers, later we will - # check that they match what we recover from the journal - rootfile_ino = self.mount_a.path_to_ino("rootfile") - subdir_ino = self.mount_a.path_to_ino("subdir") - linkdir_ino = self.mount_a.path_to_ino("linkdir") - subdirfile_ino = self.mount_a.path_to_ino("subdir/subdirfile") - subsubdir_ino = self.mount_a.path_to_ino("subdir/subsubdir") - - self.mount_a.umount_wait() - - # Stop the MDS - self.fs.mds_stop() - self.fs.mds_fail() - - # Now, the journal should contain the operations, but the backing - # store shouldn't - with self.assertRaises(ObjectNotFound): - self.fs.list_dirfrag(subdir_ino) - self.assertEqual(self.fs.list_dirfrag(ROOT_INO), []) - - # Execute the dentry recovery, this should populate the backing store - self.fs.journal_tool(['event', 'recover_dentries', 'list']) - - # Dentries in ROOT_INO are present - self.assertEqual(sorted(self.fs.list_dirfrag(ROOT_INO)), sorted(['rootfile_head', 'subdir_head', 'linkdir_head'])) - self.assertEqual(self.fs.list_dirfrag(subdir_ino), ['subdirfile_head', 'subsubdir_head']) - self.assertEqual(sorted(self.fs.list_dirfrag(linkdir_ino)), - sorted(['link0_head', 'link1_head', 'link2_head', 'link3_head'])) - - # Now check the MDS can read what we wrote: truncate the journal - # and start the mds. - self.fs.journal_tool(['journal', 'reset']) - self.fs.mds_fail_restart() - self.fs.wait_for_daemons() - - # List files - self.mount_a.mount() - self.mount_a.wait_until_mounted() - - # First ls -R to populate MDCache, such that hardlinks will - # resolve properly (recover_dentries does not create backtraces, - # so ordinarily hardlinks to inodes that happen not to have backtraces - # will be invisible in readdir). - # FIXME: hook in forward scrub here to regenerate backtraces - proc = self.mount_a.run_shell(['ls', '-R']) - self.mount_a.umount_wait() # remount to clear client cache before our second ls - self.mount_a.mount() - self.mount_a.wait_until_mounted() - - proc = self.mount_a.run_shell(['ls', '-R']) - self.assertEqual(proc.stdout.getvalue().strip(), - dedent(""" - .: - linkdir - rootfile - subdir - - ./linkdir: - link0 - link1 - link2 - link3 - - ./subdir: - subdirfile - subsubdir - - ./subdir/subsubdir: - """).strip()) - - # Check the correct inos were preserved by path - self.assertEqual(rootfile_ino, self.mount_a.path_to_ino("rootfile")) - self.assertEqual(subdir_ino, self.mount_a.path_to_ino("subdir")) - self.assertEqual(subdirfile_ino, self.mount_a.path_to_ino("subdir/subdirfile")) - self.assertEqual(subsubdir_ino, self.mount_a.path_to_ino("subdir/subsubdir")) - - # Check that the hard link handling came out correctly - self.assertEqual(self.mount_a.path_to_ino("linkdir/link0"), subdirfile_ino) - self.assertEqual(self.mount_a.path_to_ino("linkdir/link1"), subdirfile_ino) - self.assertNotEqual(self.mount_a.path_to_ino("linkdir/link2"), subdirfile_ino) - self.assertEqual(self.mount_a.path_to_ino("linkdir/link3"), rootfile_ino) - - # Create a new file, ensure it is not issued the same ino as one of the - # recovered ones - self.mount_a.run_shell(["touch", "afterwards"]) - new_ino = self.mount_a.path_to_ino("afterwards") - self.assertNotIn(new_ino, [rootfile_ino, subdir_ino, subdirfile_ino]) - - # Check that we can do metadata ops in the recovered directory - self.mount_a.run_shell(["touch", "subdir/subsubdir/subsubdirfile"]) - - @for_teuthology # 308s - def test_reset(self): - """ - That after forcibly modifying the backing store, we can get back into - a good state by resetting the MDSMap. - - The scenario is that we have two active MDSs, and we lose the journals. Once - we have completely lost confidence in the integrity of the metadata, we want to - return the system to a single-MDS state to go into a scrub to recover what we - can. - """ - - # Set max_mds to 2 - self.fs.set_max_mds(2) - - # See that we have two active MDSs - self.wait_until_equal(lambda: len(self.fs.get_active_names()), 2, 30, - reject_fn=lambda v: v > 2 or v < 1) - active_mds_names = self.fs.get_active_names() - - # Switch off any unneeded MDS daemons - for unneeded_mds in set(self.mds_cluster.mds_ids) - set(active_mds_names): - self.mds_cluster.mds_stop(unneeded_mds) - self.mds_cluster.mds_fail(unneeded_mds) - - # Create a dir on each rank - self.mount_a.run_shell(["mkdir", "alpha"]) - self.mount_a.run_shell(["mkdir", "bravo"]) - self.mount_a.setfattr("alpha/", "ceph.dir.pin", "0") - self.mount_a.setfattr("bravo/", "ceph.dir.pin", "1") - - def subtrees_assigned(): - got_subtrees = self.fs.mds_asok(["get", "subtrees"], mds_id=active_mds_names[0]) - - for s in got_subtrees: - if s['dir']['path'] == '/bravo': - if s['auth_first'] == 1: - return True - else: - # Should not happen - raise RuntimeError("/bravo is subtree but not rank 1!") - - return False - - # Ensure the pinning has taken effect and the /bravo dir is now - # migrated to rank 1. - self.wait_until_true(subtrees_assigned, 30) - - # Do some IO (this should be split across ranks according to - # the rank-pinned dirs) - self.mount_a.create_n_files("alpha/file", 1000) - self.mount_a.create_n_files("bravo/file", 1000) - - # Flush the journals so that we have some backing store data - # belonging to one MDS, and some to the other MDS. - for mds_name in active_mds_names: - self.fs.mds_asok(["flush", "journal"], mds_name) - - # Stop (hard) the second MDS daemon - self.fs.mds_stop(active_mds_names[1]) - - # Wipe out the tables for MDS rank 1 so that it is broken and can't start - # (this is the simulated failure that we will demonstrate that the disaster - # recovery tools can get us back from) - self.fs.erase_metadata_objects(prefix="mds1_") - - # Try to access files from the client - blocked_ls = self.mount_a.run_shell(["ls", "-R"], wait=False) - - # Check that this "ls -R" blocked rather than completing: indicates - # it got stuck trying to access subtrees which were on the now-dead MDS. - log.info("Sleeping to check ls is blocked...") - time.sleep(60) - self.assertFalse(blocked_ls.finished) - - # This mount is now useless because it will depend on MDS rank 1, and MDS rank 1 - # is not coming back. Kill it. - log.info("Killing mount, it's blocked on the MDS we killed") - self.mount_a.kill() - self.mount_a.kill_cleanup() - try: - # Now that the mount is dead, the ls -R should error out. - blocked_ls.wait() - except (CommandFailedError, ConnectionLostError): - # The ConnectionLostError case is for kernel client, where - # killing the mount also means killing the node. - pass - - # See that the second MDS will crash when it starts and tries to - # acquire rank 1 - damaged_id = active_mds_names[1] - self.fs.mds_restart(damaged_id) - - # The daemon taking the damaged rank should start starting, then - # restart back into standby after asking the mon to mark the rank - # damaged. - def is_marked_damaged(): - mds_map = self.fs.get_mds_map() - return 1 in mds_map['damaged'] - - self.wait_until_true(is_marked_damaged, 60) - - def get_state(): - info = self.mds_cluster.get_mds_info(damaged_id) - return info['state'] if info is not None else None - - self.wait_until_equal( - get_state, - "up:standby", - timeout=60) - - self.fs.mds_stop(damaged_id) - self.fs.mds_fail(damaged_id) - - # Now give up and go through a disaster recovery procedure - self.fs.mds_stop(active_mds_names[0]) - self.fs.mds_fail(active_mds_names[0]) - # Invoke recover_dentries quietly, because otherwise log spews millions of lines - self.fs.journal_tool(["event", "recover_dentries", "summary"], rank=0, quiet=True) - self.fs.journal_tool(["event", "recover_dentries", "summary"], rank=1, quiet=True) - self.fs.table_tool(["0", "reset", "session"]) - self.fs.journal_tool(["journal", "reset"], rank=0) - self.fs.erase_mds_objects(1) - self.fs.mon_manager.raw_cluster_cmd('fs', 'reset', self.fs.name, - '--yes-i-really-mean-it') - - # Bring an MDS back online, mount a client, and see that we can walk the full - # filesystem tree again - self.fs.mds_fail_restart(active_mds_names[0]) - self.wait_until_equal(lambda: self.fs.get_active_names(), [active_mds_names[0]], 30, - reject_fn=lambda v: len(v) > 1) - self.mount_a.mount() - self.mount_a.run_shell(["ls", "-R"], wait=True) - - def test_table_tool(self): - active_mdss = self.fs.get_active_names() - self.assertEqual(len(active_mdss), 1) - mds_name = active_mdss[0] - - self.mount_a.run_shell(["touch", "foo"]) - self.fs.mds_asok(["flush", "journal"], mds_name) - - log.info(self.fs.table_tool(["all", "show", "inode"])) - log.info(self.fs.table_tool(["all", "show", "snap"])) - log.info(self.fs.table_tool(["all", "show", "session"])) - - # Inode table should always be the same because initial state - # and choice of inode are deterministic. - # Should see one inode consumed - self.assertEqual( - json.loads(self.fs.table_tool(["all", "show", "inode"])), - {"0": { - "data": { - "version": 2, - "inotable": { - "projected_free": [ - {"start": 1099511628777, - "len": 1099511626775}], - "free": [ - {"start": 1099511628777, - "len": 1099511626775}]}}, - "result": 0}} - - ) - - # Should see one session - session_data = json.loads(self.fs.table_tool( - ["all", "show", "session"])) - self.assertEqual(len(session_data["0"]["data"]["Sessions"]), 1) - self.assertEqual(session_data["0"]["result"], 0) - - # Should see no snaps - self.assertEqual( - json.loads(self.fs.table_tool(["all", "show", "snap"])), - {"version": 0, - "snapserver": {"last_snap": 1, - "pending_noop": [], - "snaps": [], - "need_to_purge": {}, - "pending_update": [], - "pending_destroy": []}, - "result": 0} - ) - - # Reset everything - for table in ["session", "inode", "snap"]: - self.fs.table_tool(["all", "reset", table]) - - log.info(self.fs.table_tool(["all", "show", "inode"])) - log.info(self.fs.table_tool(["all", "show", "snap"])) - log.info(self.fs.table_tool(["all", "show", "session"])) - - # Should see 0 sessions - session_data = json.loads(self.fs.table_tool( - ["all", "show", "session"])) - self.assertEqual(len(session_data["0"]["data"]["Sessions"]), 0) - self.assertEqual(session_data["0"]["result"], 0) - - # Should see entire inode range now marked free - self.assertEqual( - json.loads(self.fs.table_tool(["all", "show", "inode"])), - {"0": {"data": {"version": 1, - "inotable": {"projected_free": [ - {"start": 1099511627776, - "len": 1099511627776}], - "free": [ - {"start": 1099511627776, - "len": 1099511627776}]}}, - "result": 0}} - ) - - # Should see no snaps - self.assertEqual( - json.loads(self.fs.table_tool(["all", "show", "snap"])), - {"version": 1, - "snapserver": {"last_snap": 1, - "pending_noop": [], - "snaps": [], - "need_to_purge": {}, - "pending_update": [], - "pending_destroy": []}, - "result": 0} - ) - - def test_table_tool_take_inos(self): - initial_range_start = 1099511627776 - initial_range_len = 1099511627776 - # Initially a completely clear range - self.assertEqual( - json.loads(self.fs.table_tool(["all", "show", "inode"])), - {"0": {"data": {"version": 0, - "inotable": {"projected_free": [ - {"start": initial_range_start, - "len": initial_range_len}], - "free": [ - {"start": initial_range_start, - "len": initial_range_len}]}}, - "result": 0}} - ) - - # Remove some - self.assertEqual( - json.loads(self.fs.table_tool(["all", "take_inos", "{0}".format(initial_range_start + 100)])), - {"0": {"data": {"version": 1, - "inotable": {"projected_free": [ - {"start": initial_range_start + 101, - "len": initial_range_len - 101}], - "free": [ - {"start": initial_range_start + 101, - "len": initial_range_len - 101}]}}, - "result": 0}} - ) - - @for_teuthology # Hack: "for_teuthology" because .sh doesn't work outside teuth - def test_journal_smoke(self): - workunit(self.ctx, { - 'clients': { - "client.{0}".format(self.mount_a.client_id): [ - "fs/misc/trivial_sync.sh"], - }, - "timeout": "1h" - }) - - for mount in self.mounts: - mount.umount_wait() - - self.fs.mds_stop() - self.fs.mds_fail() - - # journal tool smoke - workunit(self.ctx, { - 'clients': { - "client.{0}".format(self.mount_a.client_id): [ - "suites/cephfs_journal_tool_smoke.sh"], - }, - "timeout": "1h" - }) - - - - self.fs.mds_restart() - self.fs.wait_for_daemons() - - self.mount_a.mount() - - # trivial sync moutn a - workunit(self.ctx, { - 'clients': { - "client.{0}".format(self.mount_a.client_id): [ - "fs/misc/trivial_sync.sh"], - }, - "timeout": "1h" - }) - diff --git a/src/ceph/qa/tasks/cephfs/test_mantle.py b/src/ceph/qa/tasks/cephfs/test_mantle.py deleted file mode 100644 index 6cd86ad..0000000 --- a/src/ceph/qa/tasks/cephfs/test_mantle.py +++ /dev/null @@ -1,109 +0,0 @@ -from tasks.cephfs.cephfs_test_case import CephFSTestCase -import json -import logging - -log = logging.getLogger(__name__) -failure = "using old balancer; mantle failed for balancer=" -success = "mantle balancer version changed: " - -class TestMantle(CephFSTestCase): - def start_mantle(self): - self.wait_for_health_clear(timeout=30) - self.fs.set_max_mds(2) - self.wait_until_equal(lambda: len(self.fs.get_active_names()), 2, 30, - reject_fn=lambda v: v > 2 or v < 1) - - for m in self.fs.get_active_names(): - self.fs.mds_asok(['config', 'set', 'debug_objecter', '20'], mds_id=m) - self.fs.mds_asok(['config', 'set', 'debug_ms', '0'], mds_id=m) - self.fs.mds_asok(['config', 'set', 'debug_mds', '0'], mds_id=m) - self.fs.mds_asok(['config', 'set', 'debug_mds_balancer', '5'], mds_id=m) - - def push_balancer(self, obj, lua_code, expect): - self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', obj) - self.fs.rados(["put", obj, "-"], stdin_data=lua_code) - with self.assert_cluster_log(failure + obj + " " + expect): - log.info("run a " + obj + " balancer that expects=" + expect) - - def test_version_empty(self): - self.start_mantle() - expect = " : (2) No such file or directory" - - ret = self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer') - assert(ret == 22) # EINVAL - - self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', " ") - with self.assert_cluster_log(failure + " " + expect): pass - - def test_version_not_in_rados(self): - self.start_mantle() - expect = failure + "ghost.lua : (2) No such file or directory" - self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "ghost.lua") - with self.assert_cluster_log(expect): pass - - def test_balancer_invalid(self): - self.start_mantle() - expect = ": (22) Invalid argument" - - lua_code = "this is invalid lua code!" - self.push_balancer("invalid.lua", lua_code, expect) - - lua_code = "BAL_LOG()" - self.push_balancer("invalid_log.lua", lua_code, expect) - - lua_code = "BAL_LOG(0)" - self.push_balancer("invalid_log_again.lua", lua_code, expect) - - def test_balancer_valid(self): - self.start_mantle() - lua_code = "BAL_LOG(0, \"test\")\nreturn {3, 4}" - self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "valid.lua") - self.fs.rados(["put", "valid.lua", "-"], stdin_data=lua_code) - with self.assert_cluster_log(success + "valid.lua"): - log.info("run a valid.lua balancer") - - def test_return_invalid(self): - self.start_mantle() - expect = ": (22) Invalid argument" - - lua_code = "return \"hello\"" - self.push_balancer("string.lua", lua_code, expect) - - lua_code = "return 3" - self.push_balancer("number.lua", lua_code, expect) - - lua_code = "return {}" - self.push_balancer("dict_empty.lua", lua_code, expect) - - lua_code = "return {\"this\", \"is\", \"a\", \"test\"}" - self.push_balancer("dict_of_strings.lua", lua_code, expect) - - lua_code = "return {3, \"test\"}" - self.push_balancer("dict_of_mixed.lua", lua_code, expect) - - lua_code = "return {3}" - self.push_balancer("not_enough_numbers.lua", lua_code, expect) - - lua_code = "return {3, 4, 5, 6, 7, 8, 9}" - self.push_balancer("too_many_numbers.lua", lua_code, expect) - - def test_dead_osd(self): - self.start_mantle() - expect = " : (110) Connection timed out" - - # kill the OSDs so that the balancer pull from RADOS times out - osd_map = json.loads(self.fs.mon_manager.raw_cluster_cmd('osd', 'dump', '--format=json-pretty')) - for i in range(0, len(osd_map['osds'])): - self.fs.mon_manager.raw_cluster_cmd_result('osd', 'down', str(i)) - self.fs.mon_manager.raw_cluster_cmd_result('osd', 'out', str(i)) - - # trigger a pull from RADOS - self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "valid.lua") - - # make the timeout a little longer since dead OSDs spam ceph -w - with self.assert_cluster_log(failure + "valid.lua" + expect, timeout=30): - log.info("run a balancer that should timeout") - - # cleanup - for i in range(0, len(osd_map['osds'])): - self.fs.mon_manager.raw_cluster_cmd_result('osd', 'in', str(i)) diff --git a/src/ceph/qa/tasks/cephfs/test_misc.py b/src/ceph/qa/tasks/cephfs/test_misc.py deleted file mode 100644 index d857cfd..0000000 --- a/src/ceph/qa/tasks/cephfs/test_misc.py +++ /dev/null @@ -1,149 +0,0 @@ - -from unittest import SkipTest -from tasks.cephfs.fuse_mount import FuseMount -from tasks.cephfs.cephfs_test_case import CephFSTestCase -from teuthology.orchestra.run import CommandFailedError -import errno -import time -import json - - -class TestMisc(CephFSTestCase): - CLIENTS_REQUIRED = 2 - - LOAD_SETTINGS = ["mds_session_autoclose"] - mds_session_autoclose = None - - def test_getattr_caps(self): - """ - Check if MDS recognizes the 'mask' parameter of open request. - The paramter allows client to request caps when opening file - """ - - if not isinstance(self.mount_a, FuseMount): - raise SkipTest("Require FUSE client") - - # Enable debug. Client will requests CEPH_CAP_XATTR_SHARED - # on lookup/open - self.mount_b.umount_wait() - self.set_conf('client', 'client debug getattr caps', 'true') - self.mount_b.mount() - self.mount_b.wait_until_mounted() - - # create a file and hold it open. MDS will issue CEPH_CAP_EXCL_* - # to mount_a - p = self.mount_a.open_background("testfile") - self.mount_b.wait_for_visible("testfile") - - # this tiggers a lookup request and an open request. The debug - # code will check if lookup/open reply contains xattrs - self.mount_b.run_shell(["cat", "testfile"]) - - self.mount_a.kill_background(p) - - def test_fs_new(self): - data_pool_name = self.fs.get_data_pool_name() - - self.fs.mds_stop() - self.fs.mds_fail() - - self.fs.mon_manager.raw_cluster_cmd('fs', 'rm', self.fs.name, - '--yes-i-really-mean-it') - - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete', - self.fs.metadata_pool_name, - self.fs.metadata_pool_name, - '--yes-i-really-really-mean-it') - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', - self.fs.metadata_pool_name, - self.fs.get_pgs_per_fs_pool().__str__()) - - dummyfile = '/etc/fstab' - - self.fs.put_metadata_object_raw("key", dummyfile) - - def get_pool_df(fs, name): - try: - return fs.get_pool_df(name)['objects'] > 0 - except RuntimeError as e: - return False - - self.wait_until_true(lambda: get_pool_df(self.fs, self.fs.metadata_pool_name), timeout=30) - - try: - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name, - self.fs.metadata_pool_name, - data_pool_name) - except CommandFailedError as e: - self.assertEqual(e.exitstatus, errno.EINVAL) - else: - raise AssertionError("Expected EINVAL") - - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name, - self.fs.metadata_pool_name, - data_pool_name, "--force") - - self.fs.mon_manager.raw_cluster_cmd('fs', 'rm', self.fs.name, - '--yes-i-really-mean-it') - - - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete', - self.fs.metadata_pool_name, - self.fs.metadata_pool_name, - '--yes-i-really-really-mean-it') - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', - self.fs.metadata_pool_name, - self.fs.get_pgs_per_fs_pool().__str__()) - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name, - self.fs.metadata_pool_name, - data_pool_name) - - def test_evict_client(self): - """ - Check that a slow client session won't get evicted if it's the - only session - """ - - self.mount_b.umount_wait() - ls_data = self.fs.mds_asok(['session', 'ls']) - self.assert_session_count(1, ls_data) - - self.mount_a.kill() - self.mount_a.kill_cleanup() - - time.sleep(self.mds_session_autoclose * 1.5) - ls_data = self.fs.mds_asok(['session', 'ls']) - self.assert_session_count(1, ls_data) - - self.mount_a.mount() - self.mount_a.wait_until_mounted() - self.mount_b.mount() - self.mount_b.wait_until_mounted() - - ls_data = self._session_list() - self.assert_session_count(2, ls_data) - - self.mount_a.kill() - self.mount_a.kill_cleanup() - - time.sleep(self.mds_session_autoclose * 1.5) - ls_data = self.fs.mds_asok(['session', 'ls']) - self.assert_session_count(1, ls_data) - - def test_filtered_df(self): - pool_name = self.fs.get_data_pool_name() - raw_df = self.fs.get_pool_df(pool_name) - raw_avail = float(raw_df["max_avail"]) - out = self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'get', - pool_name, 'size', - '-f', 'json-pretty') - j = json.loads(out) - pool_size = int(j['size']) - - proc = self.mount_a.run_shell(['df', '.']) - output = proc.stdout.getvalue() - fs_avail = output.split('\n')[1].split()[3] - fs_avail = float(fs_avail) * 1024 - - ratio = raw_avail / fs_avail - assert 0.9 < ratio < 1.1 diff --git a/src/ceph/qa/tasks/cephfs/test_pool_perm.py b/src/ceph/qa/tasks/cephfs/test_pool_perm.py deleted file mode 100644 index 22775e7..0000000 --- a/src/ceph/qa/tasks/cephfs/test_pool_perm.py +++ /dev/null @@ -1,113 +0,0 @@ -from textwrap import dedent -from teuthology.exceptions import CommandFailedError -from tasks.cephfs.cephfs_test_case import CephFSTestCase -import os - - -class TestPoolPerm(CephFSTestCase): - def test_pool_perm(self): - self.mount_a.run_shell(["touch", "test_file"]) - - file_path = os.path.join(self.mount_a.mountpoint, "test_file") - - remote_script = dedent(""" - import os - import errno - - fd = os.open("{path}", os.O_RDWR) - try: - if {check_read}: - ret = os.read(fd, 1024) - else: - os.write(fd, 'content') - except OSError, e: - if e.errno != errno.EPERM: - raise - else: - raise RuntimeError("client does not check permission of data pool") - """) - - client_name = "client.{0}".format(self.mount_a.client_id) - - # set data pool read only - self.fs.mon_manager.raw_cluster_cmd_result( - 'auth', 'caps', client_name, 'mds', 'allow', 'mon', 'allow r', 'osd', - 'allow r pool={0}'.format(self.fs.get_data_pool_name())) - - self.mount_a.umount_wait() - self.mount_a.mount() - self.mount_a.wait_until_mounted() - - # write should fail - self.mount_a.run_python(remote_script.format(path=file_path, check_read=str(False))) - - # set data pool write only - self.fs.mon_manager.raw_cluster_cmd_result( - 'auth', 'caps', client_name, 'mds', 'allow', 'mon', 'allow r', 'osd', - 'allow w pool={0}'.format(self.fs.get_data_pool_name())) - - self.mount_a.umount_wait() - self.mount_a.mount() - self.mount_a.wait_until_mounted() - - # read should fail - self.mount_a.run_python(remote_script.format(path=file_path, check_read=str(True))) - - def test_forbidden_modification(self): - """ - That a client who does not have the capability for setting - layout pools is prevented from doing so. - """ - - # Set up - client_name = "client.{0}".format(self.mount_a.client_id) - new_pool_name = "data_new" - self.fs.add_data_pool(new_pool_name) - - self.mount_a.run_shell(["touch", "layoutfile"]) - self.mount_a.run_shell(["mkdir", "layoutdir"]) - - # Set MDS 'rw' perms: missing 'p' means no setting pool layouts - self.fs.mon_manager.raw_cluster_cmd_result( - 'auth', 'caps', client_name, 'mds', 'allow rw', 'mon', 'allow r', - 'osd', - 'allow rw pool={0},allow rw pool={1}'.format( - self.fs.get_data_pool_names()[0], - self.fs.get_data_pool_names()[1], - )) - - self.mount_a.umount_wait() - self.mount_a.mount() - self.mount_a.wait_until_mounted() - - with self.assertRaises(CommandFailedError): - self.mount_a.setfattr("layoutfile", "ceph.file.layout.pool", - new_pool_name) - with self.assertRaises(CommandFailedError): - self.mount_a.setfattr("layoutdir", "ceph.dir.layout.pool", - new_pool_name) - self.mount_a.umount_wait() - - # Set MDS 'rwp' perms: should now be able to set layouts - self.fs.mon_manager.raw_cluster_cmd_result( - 'auth', 'caps', client_name, 'mds', 'allow rwp', 'mon', 'allow r', - 'osd', - 'allow rw pool={0},allow rw pool={1}'.format( - self.fs.get_data_pool_names()[0], - self.fs.get_data_pool_names()[1], - )) - self.mount_a.mount() - self.mount_a.wait_until_mounted() - self.mount_a.setfattr("layoutfile", "ceph.file.layout.pool", - new_pool_name) - self.mount_a.setfattr("layoutdir", "ceph.dir.layout.pool", - new_pool_name) - self.mount_a.umount_wait() - - def tearDown(self): - self.fs.mon_manager.raw_cluster_cmd_result( - 'auth', 'caps', "client.{0}".format(self.mount_a.client_id), - 'mds', 'allow', 'mon', 'allow r', 'osd', - 'allow rw pool={0}'.format(self.fs.get_data_pool_names()[0])) - super(TestPoolPerm, self).tearDown() - diff --git a/src/ceph/qa/tasks/cephfs/test_quota.py b/src/ceph/qa/tasks/cephfs/test_quota.py deleted file mode 100644 index ee11c58..0000000 --- a/src/ceph/qa/tasks/cephfs/test_quota.py +++ /dev/null @@ -1,106 +0,0 @@ - -from cephfs_test_case import CephFSTestCase - -from teuthology.exceptions import CommandFailedError - -class TestQuota(CephFSTestCase): - CLIENTS_REQUIRED = 2 - MDSS_REQUIRED = 1 - - def test_remote_update_getfattr(self): - """ - That quota changes made from one client are visible to another - client looking at ceph.quota xattrs - """ - self.mount_a.run_shell(["mkdir", "subdir"]) - - self.assertEqual( - self.mount_a.getfattr("./subdir", "ceph.quota.max_files"), - None) - self.assertEqual( - self.mount_b.getfattr("./subdir", "ceph.quota.max_files"), - None) - - self.mount_a.setfattr("./subdir", "ceph.quota.max_files", "10") - self.assertEqual( - self.mount_a.getfattr("./subdir", "ceph.quota.max_files"), - "10") - - # Should be visible as soon as setxattr operation completes on - # mds (we get here sooner because setfattr gets an early reply) - self.wait_until_equal( - lambda: self.mount_b.getfattr("./subdir", "ceph.quota.max_files"), - "10", timeout=10) - - def test_remote_update_df(self): - """ - That when a client modifies the quota on a directory used - as another client's root, the other client sees the change - reflected in their statfs output. - """ - - self.mount_b.umount_wait() - - self.mount_a.run_shell(["mkdir", "subdir"]) - - size_before = 1024 * 1024 * 128 - self.mount_a.setfattr("./subdir", "ceph.quota.max_bytes", - "%s" % size_before) - - self.mount_b.mount(mount_path="/subdir") - - self.assertDictEqual( - self.mount_b.df(), - { - "total": size_before, - "used": 0, - "available": size_before - }) - - size_after = 1024 * 1024 * 256 - self.mount_a.setfattr("./subdir", "ceph.quota.max_bytes", - "%s" % size_after) - - # Should be visible as soon as setxattr operation completes on - # mds (we get here sooner because setfattr gets an early reply) - self.wait_until_equal( - lambda: self.mount_b.df(), - { - "total": size_after, - "used": 0, - "available": size_after - }, - timeout=10 - ) - - def test_remote_update_write(self): - """ - That when a client modifies the quota on a directory used - as another client's root, the other client sees the effect - of the change when writing data. - """ - - self.mount_a.run_shell(["mkdir", "subdir_files"]) - self.mount_a.run_shell(["mkdir", "subdir_data"]) - - # Set some nice high quotas that mount_b's initial operations - # will be well within - self.mount_a.setfattr("./subdir_files", "ceph.quota.max_files", "100") - self.mount_a.setfattr("./subdir_data", "ceph.quota.max_bytes", "104857600") - - # Do some writes within my quota - self.mount_b.create_n_files("subdir_files/file", 20) - self.mount_b.write_n_mb("subdir_data/file", 20) - - # Set quotas lower than what mount_b already wrote, it should - # refuse to write more once it's seen them - self.mount_a.setfattr("./subdir_files", "ceph.quota.max_files", "10") - self.mount_a.setfattr("./subdir_data", "ceph.quota.max_bytes", "1048576") - - # Do some writes that would have been okay within the old quota, - # but are forbidden under the new quota - with self.assertRaises(CommandFailedError): - self.mount_b.create_n_files("subdir_files/file", 40) - with self.assertRaises(CommandFailedError): - self.mount_b.write_n_mb("subdir_data/file", 40) - diff --git a/src/ceph/qa/tasks/cephfs/test_readahead.py b/src/ceph/qa/tasks/cephfs/test_readahead.py deleted file mode 100644 index 31e7bf1..0000000 --- a/src/ceph/qa/tasks/cephfs/test_readahead.py +++ /dev/null @@ -1,31 +0,0 @@ -import logging -from tasks.cephfs.fuse_mount import FuseMount -from tasks.cephfs.cephfs_test_case import CephFSTestCase - -log = logging.getLogger(__name__) - - -class TestReadahead(CephFSTestCase): - def test_flush(self): - if not isinstance(self.mount_a, FuseMount): - self.skipTest("FUSE needed for measuring op counts") - - # Create 32MB file - self.mount_a.run_shell(["dd", "if=/dev/urandom", "of=foo", "bs=1M", "count=32"]) - - # Unmount and remount the client to flush cache - self.mount_a.umount_wait() - self.mount_a.mount() - self.mount_a.wait_until_mounted() - - initial_op_r = self.mount_a.admin_socket(['perf', 'dump', 'objecter'])['objecter']['op_r'] - self.mount_a.run_shell(["dd", "if=foo", "of=/dev/null", "bs=128k", "count=32"]) - op_r = self.mount_a.admin_socket(['perf', 'dump', 'objecter'])['objecter']['op_r'] - assert op_r >= initial_op_r - op_r -= initial_op_r - log.info("read operations: {0}".format(op_r)) - - # with exponentially increasing readahead, we should see fewer than 10 operations - # but this test simply checks if the client is doing a remote read for each local read - if op_r >= 32: - raise RuntimeError("readahead not working") diff --git a/src/ceph/qa/tasks/cephfs/test_recovery_pool.py b/src/ceph/qa/tasks/cephfs/test_recovery_pool.py deleted file mode 100644 index 097342a..0000000 --- a/src/ceph/qa/tasks/cephfs/test_recovery_pool.py +++ /dev/null @@ -1,220 +0,0 @@ - -""" -Test our tools for recovering metadata from the data pool into an alternate pool -""" -import json - -import logging -import os -from textwrap import dedent -import traceback -from collections import namedtuple, defaultdict - -from teuthology.orchestra.run import CommandFailedError -from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology - -log = logging.getLogger(__name__) - - -ValidationError = namedtuple("ValidationError", ["exception", "backtrace"]) - - -class OverlayWorkload(object): - def __init__(self, orig_fs, recovery_fs, orig_mount, recovery_mount): - self._orig_fs = orig_fs - self._recovery_fs = recovery_fs - self._orig_mount = orig_mount - self._recovery_mount = recovery_mount - self._initial_state = None - - # Accumulate backtraces for every failed validation, and return them. Backtraces - # are rather verbose, but we only see them when something breaks, and they - # let us see which check failed without having to decorate each check with - # a string - self._errors = [] - - def assert_equal(self, a, b): - try: - if a != b: - raise AssertionError("{0} != {1}".format(a, b)) - except AssertionError as e: - self._errors.append( - ValidationError(e, traceback.format_exc(3)) - ) - - def write(self): - """ - Write the workload files to the mount - """ - raise NotImplementedError() - - def validate(self): - """ - Read from the mount and validate that the workload files are present (i.e. have - survived or been reconstructed from the test scenario) - """ - raise NotImplementedError() - - def damage(self): - """ - Damage the filesystem pools in ways that will be interesting to recover from. By - default just wipe everything in the metadata pool - """ - # Delete every object in the metadata pool - objects = self._orig_fs.rados(["ls"]).split("\n") - for o in objects: - self._orig_fs.rados(["rm", o]) - - def flush(self): - """ - Called after client unmount, after write: flush whatever you want - """ - self._orig_fs.mds_asok(["flush", "journal"]) - self._recovery_fs.mds_asok(["flush", "journal"]) - - -class SimpleOverlayWorkload(OverlayWorkload): - """ - Single file, single directory, check that it gets recovered and so does its size - """ - def write(self): - self._orig_mount.run_shell(["mkdir", "subdir"]) - self._orig_mount.write_n_mb("subdir/sixmegs", 6) - self._initial_state = self._orig_mount.stat("subdir/sixmegs") - - def validate(self): - self._recovery_mount.run_shell(["ls", "subdir"]) - st = self._recovery_mount.stat("subdir/sixmegs") - self.assert_equal(st['st_size'], self._initial_state['st_size']) - return self._errors - -class TestRecoveryPool(CephFSTestCase): - MDSS_REQUIRED = 2 - CLIENTS_REQUIRED = 2 - REQUIRE_RECOVERY_FILESYSTEM = True - - def is_marked_damaged(self, rank): - mds_map = self.fs.get_mds_map() - return rank in mds_map['damaged'] - - def _rebuild_metadata(self, workload, other_pool=None, workers=1): - """ - That when all objects in metadata pool are removed, we can rebuild a metadata pool - based on the contents of a data pool, and a client can see and read our files. - """ - - # First, inject some files - - workload.write() - - # Unmount the client and flush the journal: the tool should also cope with - # situations where there is dirty metadata, but we'll test that separately - self.mount_a.umount_wait() - self.mount_b.umount_wait() - workload.flush() - - # Create the alternate pool if requested - recovery_fs = self.recovery_fs.name - recovery_pool = self.recovery_fs.get_metadata_pool_name() - self.recovery_fs.data_scan(['init', '--force-init', - '--filesystem', recovery_fs, - '--alternate-pool', recovery_pool]) - self.recovery_fs.mon_manager.raw_cluster_cmd('-s') - self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "session"]) - self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "snap"]) - self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "inode"]) - - # Stop the MDS - self.fs.mds_stop() - self.fs.mds_fail() - - # After recovery, we need the MDS to not be strict about stats (in production these options - # are off by default, but in QA we need to explicitly disable them) - self.fs.set_ceph_conf('mds', 'mds verify scatter', False) - self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False) - - # Apply any data damage the workload wants - workload.damage() - - # Reset the MDS map in case multiple ranks were in play: recovery procedure - # only understands how to rebuild metadata under rank 0 - self.fs.mon_manager.raw_cluster_cmd('fs', 'reset', self.fs.name, - '--yes-i-really-mean-it') - - def get_state(mds_id): - info = self.mds_cluster.get_mds_info(mds_id) - return info['state'] if info is not None else None - - self.fs.table_tool([self.fs.name + ":0", "reset", "session"]) - self.fs.table_tool([self.fs.name + ":0", "reset", "snap"]) - self.fs.table_tool([self.fs.name + ":0", "reset", "inode"]) - - # Run the recovery procedure - if False: - with self.assertRaises(CommandFailedError): - # Normal reset should fail when no objects are present, we'll use --force instead - self.fs.journal_tool(["journal", "reset"]) - - self.fs.mds_stop() - self.fs.data_scan(['scan_extents', '--alternate-pool', - recovery_pool, '--filesystem', self.fs.name, - self.fs.get_data_pool_name()]) - self.fs.data_scan(['scan_inodes', '--alternate-pool', - recovery_pool, '--filesystem', self.fs.name, - '--force-corrupt', '--force-init', - self.fs.get_data_pool_name()]) - self.fs.journal_tool(['--rank=' + self.fs.name + ":0", 'event', - 'recover_dentries', 'list', - '--alternate-pool', recovery_pool]) - - self.fs.data_scan(['init', '--force-init', '--filesystem', - self.fs.name]) - self.fs.data_scan(['scan_inodes', '--filesystem', self.fs.name, - '--force-corrupt', '--force-init', - self.fs.get_data_pool_name()]) - self.fs.journal_tool(['--rank=' + self.fs.name + ":0", 'event', - 'recover_dentries', 'list']) - - self.fs.journal_tool(['--rank=' + recovery_fs + ":0", 'journal', - 'reset', '--force']) - self.fs.journal_tool(['--rank=' + self.fs.name + ":0", 'journal', - 'reset', '--force']) - self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', - recovery_fs + ":0") - - # Mark the MDS repaired - self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0') - - # Start the MDS - self.fs.mds_restart() - self.recovery_fs.mds_restart() - self.fs.wait_for_daemons() - self.recovery_fs.wait_for_daemons() - for mds_id in self.recovery_fs.mds_ids: - self.fs.mon_manager.raw_cluster_cmd('tell', "mds." + mds_id, - 'injectargs', '--debug-mds=20') - self.fs.mon_manager.raw_cluster_cmd('daemon', "mds." + mds_id, - 'scrub_path', '/', - 'recursive', 'repair') - log.info(str(self.mds_cluster.status())) - - # Mount a client - self.mount_a.mount() - self.mount_b.mount(mount_fs_name=recovery_fs) - self.mount_a.wait_until_mounted() - self.mount_b.wait_until_mounted() - - # See that the files are present and correct - errors = workload.validate() - if errors: - log.error("Validation errors found: {0}".format(len(errors))) - for e in errors: - log.error(e.exception) - log.error(e.backtrace) - raise AssertionError("Validation failed, first error: {0}\n{1}".format( - errors[0].exception, errors[0].backtrace - )) - - def test_rebuild_simple(self): - self._rebuild_metadata(SimpleOverlayWorkload(self.fs, self.recovery_fs, - self.mount_a, self.mount_b)) diff --git a/src/ceph/qa/tasks/cephfs/test_scrub_checks.py b/src/ceph/qa/tasks/cephfs/test_scrub_checks.py deleted file mode 100644 index a2de527..0000000 --- a/src/ceph/qa/tasks/cephfs/test_scrub_checks.py +++ /dev/null @@ -1,245 +0,0 @@ -""" -MDS admin socket scrubbing-related tests. -""" -import json -import logging -import errno -import time -from teuthology.exceptions import CommandFailedError -import os -from tasks.cephfs.cephfs_test_case import CephFSTestCase - -log = logging.getLogger(__name__) - - -class TestScrubChecks(CephFSTestCase): - """ - Run flush and scrub commands on the specified files in the filesystem. This - task will run through a sequence of operations, but it is not comprehensive - on its own -- it doesn't manipulate the mds cache state to test on both - in- and out-of-memory parts of the hierarchy. So it's designed to be run - multiple times within a single test run, so that the test can manipulate - memory state. - - Usage: - mds_scrub_checks: - mds_rank: 0 - path: path/to/test/dir - client: 0 - run_seq: [0-9]+ - - Increment the run_seq on subsequent invocations within a single test run; - it uses that value to generate unique folder and file names. - """ - - MDSS_REQUIRED = 1 - CLIENTS_REQUIRED = 1 - - def test_scrub_checks(self): - self._checks(0) - self._checks(1) - - def _checks(self, run_seq): - mds_rank = 0 - test_dir = "scrub_test_path" - - abs_test_path = "/{0}".format(test_dir) - - log.info("mountpoint: {0}".format(self.mount_a.mountpoint)) - client_path = os.path.join(self.mount_a.mountpoint, test_dir) - log.info("client_path: {0}".format(client_path)) - - log.info("Cloning repo into place") - repo_path = self.clone_repo(self.mount_a, client_path) - - log.info("Initiating mds_scrub_checks on mds.{id_}, " + - "test_path {path}, run_seq {seq}".format( - id_=mds_rank, path=abs_test_path, seq=run_seq) - ) - - - success_validator = lambda j, r: self.json_validator(j, r, "return_code", 0) - - nep = "{test_path}/i/dont/exist".format(test_path=abs_test_path) - self.asok_command(mds_rank, "flush_path {nep}".format(nep=nep), - lambda j, r: self.json_validator(j, r, "return_code", -errno.ENOENT)) - self.asok_command(mds_rank, "scrub_path {nep}".format(nep=nep), - lambda j, r: self.json_validator(j, r, "return_code", -errno.ENOENT)) - - test_repo_path = "{test_path}/ceph-qa-suite".format(test_path=abs_test_path) - dirpath = "{repo_path}/suites".format(repo_path=test_repo_path) - - if run_seq == 0: - log.info("First run: flushing {dirpath}".format(dirpath=dirpath)) - command = "flush_path {dirpath}".format(dirpath=dirpath) - self.asok_command(mds_rank, command, success_validator) - command = "scrub_path {dirpath}".format(dirpath=dirpath) - self.asok_command(mds_rank, command, success_validator) - - filepath = "{repo_path}/suites/fs/verify/validater/valgrind.yaml".format( - repo_path=test_repo_path) - if run_seq == 0: - log.info("First run: flushing {filepath}".format(filepath=filepath)) - command = "flush_path {filepath}".format(filepath=filepath) - self.asok_command(mds_rank, command, success_validator) - command = "scrub_path {filepath}".format(filepath=filepath) - self.asok_command(mds_rank, command, success_validator) - - filepath = "{repo_path}/suites/fs/basic/clusters/fixed-3-cephfs.yaml". \ - format(repo_path=test_repo_path) - command = "scrub_path {filepath}".format(filepath=filepath) - self.asok_command(mds_rank, command, - lambda j, r: self.json_validator(j, r, "performed_validation", - False)) - - if run_seq == 0: - log.info("First run: flushing base dir /") - command = "flush_path /" - self.asok_command(mds_rank, command, success_validator) - command = "scrub_path /" - self.asok_command(mds_rank, command, success_validator) - - new_dir = "{repo_path}/new_dir_{i}".format(repo_path=repo_path, i=run_seq) - test_new_dir = "{repo_path}/new_dir_{i}".format(repo_path=test_repo_path, - i=run_seq) - self.mount_a.run_shell(["mkdir", new_dir]) - command = "flush_path {dir}".format(dir=test_new_dir) - self.asok_command(mds_rank, command, success_validator) - - new_file = "{repo_path}/new_file_{i}".format(repo_path=repo_path, - i=run_seq) - test_new_file = "{repo_path}/new_file_{i}".format(repo_path=test_repo_path, - i=run_seq) - self.mount_a.write_n_mb(new_file, 1) - - command = "flush_path {file}".format(file=test_new_file) - self.asok_command(mds_rank, command, success_validator) - - # check that scrub fails on errors - ino = self.mount_a.path_to_ino(new_file) - rados_obj_name = "{ino:x}.00000000".format(ino=ino) - command = "scrub_path {file}".format(file=test_new_file) - - # Missing parent xattr -> ENODATA - self.fs.rados(["rmxattr", rados_obj_name, "parent"], pool=self.fs.get_data_pool_name()) - self.asok_command(mds_rank, command, - lambda j, r: self.json_validator(j, r, "return_code", -errno.ENODATA)) - - # Missing object -> ENOENT - self.fs.rados(["rm", rados_obj_name], pool=self.fs.get_data_pool_name()) - self.asok_command(mds_rank, command, - lambda j, r: self.json_validator(j, r, "return_code", -errno.ENOENT)) - - command = "flush_path /" - self.asok_command(mds_rank, command, success_validator) - - def test_scrub_repair(self): - mds_rank = 0 - test_dir = "scrub_repair_path" - - self.mount_a.run_shell(["sudo", "mkdir", test_dir]) - self.mount_a.run_shell(["sudo", "touch", "{0}/file".format(test_dir)]) - dir_objname = "{:x}.00000000".format(self.mount_a.path_to_ino(test_dir)) - - self.mount_a.umount_wait() - - # flush journal entries to dirfrag objects, and expire journal - self.fs.mds_asok(['flush', 'journal']) - self.fs.mds_stop() - - # remove the dentry from dirfrag, cause incorrect fragstat/rstat - self.fs.rados(["rmomapkey", dir_objname, "file_head"], - pool=self.fs.get_metadata_pool_name()) - - self.fs.mds_fail_restart() - self.fs.wait_for_daemons() - - self.mount_a.mount() - self.mount_a.wait_until_mounted() - - # fragstat indicates the directory is not empty, rmdir should fail - with self.assertRaises(CommandFailedError) as ar: - self.mount_a.run_shell(["sudo", "rmdir", test_dir]) - self.assertEqual(ar.exception.exitstatus, 1) - - self.asok_command(mds_rank, "scrub_path /{0} repair".format(test_dir), - lambda j, r: self.json_validator(j, r, "return_code", 0)) - - # wait a few second for background repair - time.sleep(10) - - # fragstat should be fixed - self.mount_a.run_shell(["sudo", "rmdir", test_dir]) - - @staticmethod - def json_validator(json_out, rc, element, expected_value): - if rc != 0: - return False, "asok command returned error {rc}".format(rc=rc) - element_value = json_out.get(element) - if element_value != expected_value: - return False, "unexpectedly got {jv} instead of {ev}!".format( - jv=element_value, ev=expected_value) - return True, "Succeeded" - - def asok_command(self, mds_rank, command, validator): - log.info("Running command '{command}'".format(command=command)) - - command_list = command.split() - - # we just assume there's an active mds for every rank - mds_id = self.fs.get_active_names()[mds_rank] - proc = self.fs.mon_manager.admin_socket('mds', mds_id, - command_list, check_status=False) - rout = proc.exitstatus - sout = proc.stdout.getvalue() - - if sout.strip(): - jout = json.loads(sout) - else: - jout = None - - log.info("command '{command}' got response code " + - "'{rout}' and stdout '{sout}'".format( - command=command, rout=rout, sout=sout)) - - success, errstring = validator(jout, rout) - - if not success: - raise AsokCommandFailedError(command, rout, jout, errstring) - - return jout - - def clone_repo(self, client_mount, path): - repo = "ceph-qa-suite" - repo_path = os.path.join(path, repo) - client_mount.run_shell(["mkdir", "-p", path]) - - try: - client_mount.stat(repo_path) - except CommandFailedError: - client_mount.run_shell([ - "git", "clone", '--branch', 'giant', - "http://github.com/ceph/{repo}".format(repo=repo), - "{path}/{repo}".format(path=path, repo=repo) - ]) - - return repo_path - - -class AsokCommandFailedError(Exception): - """ - Exception thrown when we get an unexpected response - on an admin socket command - """ - - def __init__(self, command, rc, json_out, errstring): - self.command = command - self.rc = rc - self.json = json_out - self.errstring = errstring - - def __str__(self): - return "Admin socket: {command} failed with rc={rc}," + \ - "json output={json}, because '{es}'".format( - command=self.command, rc=self.rc, - json=self.json, es=self.errstring) diff --git a/src/ceph/qa/tasks/cephfs/test_sessionmap.py b/src/ceph/qa/tasks/cephfs/test_sessionmap.py deleted file mode 100644 index 9d12ab6..0000000 --- a/src/ceph/qa/tasks/cephfs/test_sessionmap.py +++ /dev/null @@ -1,235 +0,0 @@ -from StringIO import StringIO -import json -import logging -from unittest import SkipTest - -from tasks.cephfs.fuse_mount import FuseMount -from teuthology.exceptions import CommandFailedError -from tasks.cephfs.cephfs_test_case import CephFSTestCase - -log = logging.getLogger(__name__) - - -class TestSessionMap(CephFSTestCase): - CLIENTS_REQUIRED = 2 - MDSS_REQUIRED = 2 - - def test_tell_session_drop(self): - """ - That when a `tell` command is sent using the python CLI, - its MDS session is gone after it terminates - """ - self.mount_a.umount_wait() - self.mount_b.umount_wait() - - mds_id = self.fs.get_lone_mds_id() - self.fs.mon_manager.raw_cluster_cmd("tell", "mds.{0}".format(mds_id), "session", "ls") - - ls_data = self.fs.mds_asok(['session', 'ls']) - self.assertEqual(len(ls_data), 0) - - def _get_thread_count(self, mds_id): - remote = self.fs.mds_daemons[mds_id].remote - - ps_txt = remote.run( - args=["ps", "-ww", "axo", "nlwp,cmd"], - stdout=StringIO() - ).stdout.getvalue().strip() - lines = ps_txt.split("\n")[1:] - - for line in lines: - if "ceph-mds" in line and not "daemon-helper" in line: - if line.find("-i {0}".format(mds_id)) != -1: - log.info("Found ps line for daemon: {0}".format(line)) - return int(line.split()[0]) - - raise RuntimeError("No process found in ps output for MDS {0}: {1}".format( - mds_id, ps_txt - )) - - def test_tell_conn_close(self): - """ - That when a `tell` command is sent using the python CLI, - the thread count goes back to where it started (i.e. we aren't - leaving connections open) - """ - self.mount_a.umount_wait() - self.mount_b.umount_wait() - - mds_id = self.fs.get_lone_mds_id() - - initial_thread_count = self._get_thread_count(mds_id) - self.fs.mon_manager.raw_cluster_cmd("tell", "mds.{0}".format(mds_id), "session", "ls") - final_thread_count = self._get_thread_count(mds_id) - - self.assertEqual(initial_thread_count, final_thread_count) - - def test_mount_conn_close(self): - """ - That when a client unmounts, the thread count on the MDS goes back - to what it was before the client mounted - """ - self.mount_a.umount_wait() - self.mount_b.umount_wait() - - mds_id = self.fs.get_lone_mds_id() - - initial_thread_count = self._get_thread_count(mds_id) - self.mount_a.mount() - self.mount_a.wait_until_mounted() - self.assertGreater(self._get_thread_count(mds_id), initial_thread_count) - self.mount_a.umount_wait() - final_thread_count = self._get_thread_count(mds_id) - - self.assertEqual(initial_thread_count, final_thread_count) - - def test_version_splitting(self): - """ - That when many sessions are updated, they are correctly - split into multiple versions to obey mds_sessionmap_keys_per_op - """ - - # Start umounted - self.mount_a.umount_wait() - self.mount_b.umount_wait() - - # Configure MDS to write one OMAP key at once - self.set_conf('mds', 'mds_sessionmap_keys_per_op', 1) - self.fs.mds_fail_restart() - self.fs.wait_for_daemons() - - # I would like two MDSs, so that I can do an export dir later - self.fs.set_max_mds(2) - self.fs.wait_for_daemons() - - active_mds_names = self.fs.get_active_names() - rank_0_id = active_mds_names[0] - rank_1_id = active_mds_names[1] - log.info("Ranks 0 and 1 are {0} and {1}".format( - rank_0_id, rank_1_id)) - - # Bring the clients back - self.mount_a.mount() - self.mount_b.mount() - self.mount_a.create_files() # Kick the client into opening sessions - self.mount_b.create_files() - - # See that they've got sessions - self.assert_session_count(2, mds_id=rank_0_id) - - # See that we persist their sessions - self.fs.mds_asok(["flush", "journal"], rank_0_id) - table_json = json.loads(self.fs.table_tool(["0", "show", "session"])) - log.info("SessionMap: {0}".format(json.dumps(table_json, indent=2))) - self.assertEqual(table_json['0']['result'], 0) - self.assertEqual(len(table_json['0']['data']['Sessions']), 2) - - # Now, induce a "force_open_sessions" event by exporting a dir - self.mount_a.run_shell(["mkdir", "bravo"]) - self.mount_a.run_shell(["touch", "bravo/file"]) - self.mount_b.run_shell(["ls", "-l", "bravo/file"]) - - def get_omap_wrs(): - return self.fs.mds_asok(['perf', 'dump', 'objecter'], rank_1_id)['objecter']['omap_wr'] - - # Flush so that there are no dirty sessions on rank 1 - self.fs.mds_asok(["flush", "journal"], rank_1_id) - - # Export so that we get a force_open to rank 1 for the two sessions from rank 0 - initial_omap_wrs = get_omap_wrs() - self.fs.mds_asok(['export', 'dir', '/bravo', '1'], rank_0_id) - - # This is the critical (if rather subtle) check: that in the process of doing an export dir, - # we hit force_open_sessions, and as a result we end up writing out the sessionmap. There - # will be two sessions dirtied here, and because we have set keys_per_op to 1, we should see - # a single session get written out (the first of the two, triggered by the second getting marked - # dirty) - # The number of writes is two per session, because the header (sessionmap version) update and - # KV write both count. - self.wait_until_true( - lambda: get_omap_wrs() - initial_omap_wrs == 2, - timeout=10 # Long enough for an export to get acked - ) - - # Now end our sessions and check the backing sessionmap is updated correctly - self.mount_a.umount_wait() - self.mount_b.umount_wait() - - # In-memory sessionmap check - self.assert_session_count(0, mds_id=rank_0_id) - - # On-disk sessionmap check - self.fs.mds_asok(["flush", "journal"], rank_0_id) - table_json = json.loads(self.fs.table_tool(["0", "show", "session"])) - log.info("SessionMap: {0}".format(json.dumps(table_json, indent=2))) - self.assertEqual(table_json['0']['result'], 0) - self.assertEqual(len(table_json['0']['data']['Sessions']), 0) - - def _sudo_write_file(self, remote, path, data): - """ - Write data to a remote file as super user - - :param remote: Remote site. - :param path: Path on the remote being written to. - :param data: Data to be written. - - Both perms and owner are passed directly to chmod. - """ - remote.run( - args=[ - 'sudo', - 'python', - '-c', - 'import shutil, sys; shutil.copyfileobj(sys.stdin, file(sys.argv[1], "wb"))', - path, - ], - stdin=data, - ) - - def _configure_auth(self, mount, id_name, mds_caps, osd_caps=None, mon_caps=None): - """ - Set up auth credentials for a client mount, and write out the keyring - for the client to use. - """ - - if osd_caps is None: - osd_caps = "allow rw" - - if mon_caps is None: - mon_caps = "allow r" - - out = self.fs.mon_manager.raw_cluster_cmd( - "auth", "get-or-create", "client.{name}".format(name=id_name), - "mds", mds_caps, - "osd", osd_caps, - "mon", mon_caps - ) - mount.client_id = id_name - self._sudo_write_file(mount.client_remote, mount.get_keyring_path(), out) - self.set_conf("client.{name}".format(name=id_name), "keyring", mount.get_keyring_path()) - - def test_session_reject(self): - if not isinstance(self.mount_a, FuseMount): - raise SkipTest("Requires FUSE client to inject client metadata") - - self.mount_a.run_shell(["mkdir", "foo"]) - self.mount_a.run_shell(["mkdir", "foo/bar"]) - self.mount_a.umount_wait() - - # Mount B will be my rejected client - self.mount_b.umount_wait() - - # Configure a client that is limited to /foo/bar - self._configure_auth(self.mount_b, "badguy", "allow rw path=/foo/bar") - # Check he can mount that dir and do IO - self.mount_b.mount(mount_path="/foo/bar") - self.mount_b.wait_until_mounted() - self.mount_b.create_destroy() - self.mount_b.umount_wait() - - # Configure the client to claim that its mount point metadata is /baz - self.set_conf("client.badguy", "client_metadata", "root=/baz") - # Try to mount the client, see that it fails - with self.assert_cluster_log("client session with invalid root '/baz' denied"): - with self.assertRaises(CommandFailedError): - self.mount_b.mount(mount_path="/foo/bar") diff --git a/src/ceph/qa/tasks/cephfs/test_strays.py b/src/ceph/qa/tasks/cephfs/test_strays.py deleted file mode 100644 index b64f3e9..0000000 --- a/src/ceph/qa/tasks/cephfs/test_strays.py +++ /dev/null @@ -1,1049 +0,0 @@ -import json -import time -import logging -from textwrap import dedent -import datetime -import gevent -import datetime - -from teuthology.orchestra.run import CommandFailedError, Raw -from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology - -log = logging.getLogger(__name__) - - -class TestStrays(CephFSTestCase): - MDSS_REQUIRED = 2 - - OPS_THROTTLE = 1 - FILES_THROTTLE = 2 - - # Range of different file sizes used in throttle test's workload - throttle_workload_size_range = 16 - - @for_teuthology - def test_ops_throttle(self): - self._test_throttling(self.OPS_THROTTLE) - - @for_teuthology - def test_files_throttle(self): - self._test_throttling(self.FILES_THROTTLE) - - def test_dir_deletion(self): - """ - That when deleting a bunch of dentries and the containing - directory, everything gets purged. - Catches cases where the client might e.g. fail to trim - the unlinked dir from its cache. - """ - file_count = 1000 - create_script = dedent(""" - import os - - mount_path = "{mount_path}" - subdir = "delete_me" - size = {size} - file_count = {file_count} - os.mkdir(os.path.join(mount_path, subdir)) - for i in xrange(0, file_count): - filename = "{{0}}_{{1}}.bin".format(i, size) - f = open(os.path.join(mount_path, subdir, filename), 'w') - f.write(size * 'x') - f.close() - """.format( - mount_path=self.mount_a.mountpoint, - size=1024, - file_count=file_count - )) - - self.mount_a.run_python(create_script) - - # That the dirfrag object is created - self.fs.mds_asok(["flush", "journal"]) - dir_ino = self.mount_a.path_to_ino("delete_me") - self.assertTrue(self.fs.dirfrag_exists(dir_ino, 0)) - - # Remove everything - self.mount_a.run_shell(["rm", "-rf", "delete_me"]) - self.fs.mds_asok(["flush", "journal"]) - - # That all the removed files get created as strays - strays = self.get_mdc_stat("strays_created") - self.assertEqual(strays, file_count + 1) - - # That the strays all get enqueued for purge - self.wait_until_equal( - lambda: self.get_mdc_stat("strays_enqueued"), - strays, - timeout=600 - - ) - - # That all the purge operations execute - self.wait_until_equal( - lambda: self.get_stat("purge_queue", "pq_executed"), - strays, - timeout=600 - ) - - # That finally, the directory metadata object is gone - self.assertFalse(self.fs.dirfrag_exists(dir_ino, 0)) - - # That finally, the data objects are all gone - self.await_data_pool_empty() - - def _test_throttling(self, throttle_type): - self.data_log = [] - try: - return self._do_test_throttling(throttle_type) - except: - for l in self.data_log: - log.info(",".join([l_.__str__() for l_ in l])) - raise - - def _do_test_throttling(self, throttle_type): - """ - That the mds_max_purge_ops setting is respected - """ - - def set_throttles(files, ops): - """ - Helper for updating ops/files limits, and calculating effective - ops_per_pg setting to give the same ops limit. - """ - self.set_conf('mds', 'mds_max_purge_files', "%d" % files) - self.set_conf('mds', 'mds_max_purge_ops', "%d" % ops) - - pgs = self.fs.mon_manager.get_pool_property( - self.fs.get_data_pool_name(), - "pg_num" - ) - ops_per_pg = float(ops) / pgs - self.set_conf('mds', 'mds_max_purge_ops_per_pg', "%s" % ops_per_pg) - - # Test conditions depend on what we're going to be exercising. - # * Lift the threshold on whatever throttle we are *not* testing, so - # that the throttle of interest is the one that will be the bottleneck - # * Create either many small files (test file count throttling) or fewer - # large files (test op throttling) - if throttle_type == self.OPS_THROTTLE: - set_throttles(files=100000000, ops=16) - size_unit = 1024 * 1024 # big files, generate lots of ops - file_multiplier = 100 - elif throttle_type == self.FILES_THROTTLE: - # The default value of file limit is pretty permissive, so to avoid - # the test running too fast, create lots of files and set the limit - # pretty low. - set_throttles(ops=100000000, files=6) - size_unit = 1024 # small, numerous files - file_multiplier = 200 - else: - raise NotImplemented(throttle_type) - - # Pick up config changes - self.fs.mds_fail_restart() - self.fs.wait_for_daemons() - - create_script = dedent(""" - import os - - mount_path = "{mount_path}" - subdir = "delete_me" - size_unit = {size_unit} - file_multiplier = {file_multiplier} - os.mkdir(os.path.join(mount_path, subdir)) - for i in xrange(0, file_multiplier): - for size in xrange(0, {size_range}*size_unit, size_unit): - filename = "{{0}}_{{1}}.bin".format(i, size / size_unit) - f = open(os.path.join(mount_path, subdir, filename), 'w') - f.write(size * 'x') - f.close() - """.format( - mount_path=self.mount_a.mountpoint, - size_unit=size_unit, - file_multiplier=file_multiplier, - size_range=self.throttle_workload_size_range - )) - - self.mount_a.run_python(create_script) - - # We will run the deletion in the background, to reduce the risk of it completing before - # we have started monitoring the stray statistics. - def background(): - self.mount_a.run_shell(["rm", "-rf", "delete_me"]) - self.fs.mds_asok(["flush", "journal"]) - - background_thread = gevent.spawn(background) - - total_inodes = file_multiplier * self.throttle_workload_size_range + 1 - mds_max_purge_ops = int(self.fs.get_config("mds_max_purge_ops", 'mds')) - mds_max_purge_files = int(self.fs.get_config("mds_max_purge_files", 'mds')) - - # During this phase we look for the concurrent ops to exceed half - # the limit (a heuristic) and not exceed the limit (a correctness - # condition). - purge_timeout = 600 - elapsed = 0 - files_high_water = 0 - ops_high_water = 0 - - while True: - stats = self.fs.mds_asok(['perf', 'dump']) - mdc_stats = stats['mds_cache'] - pq_stats = stats['purge_queue'] - if elapsed >= purge_timeout: - raise RuntimeError("Timeout waiting for {0} inodes to purge, stats:{1}".format(total_inodes, mdc_stats)) - - num_strays = mdc_stats['num_strays'] - num_strays_purging = pq_stats['pq_executing'] - num_purge_ops = pq_stats['pq_executing_ops'] - - self.data_log.append([datetime.datetime.now(), num_strays, num_strays_purging, num_purge_ops]) - - files_high_water = max(files_high_water, num_strays_purging) - ops_high_water = max(ops_high_water, num_purge_ops) - - total_strays_created = mdc_stats['strays_created'] - total_strays_purged = pq_stats['pq_executed'] - - if total_strays_purged == total_inodes: - log.info("Complete purge in {0} seconds".format(elapsed)) - break - elif total_strays_purged > total_inodes: - raise RuntimeError("Saw more strays than expected, mdc stats: {0}".format(mdc_stats)) - else: - if throttle_type == self.OPS_THROTTLE: - # 11 is filer_max_purge_ops plus one for the backtrace: - # limit is allowed to be overshot by this much. - if num_purge_ops > mds_max_purge_ops + 11: - raise RuntimeError("num_purge_ops violates threshold {0}/{1}".format( - num_purge_ops, mds_max_purge_ops - )) - elif throttle_type == self.FILES_THROTTLE: - if num_strays_purging > mds_max_purge_files: - raise RuntimeError("num_strays_purging violates threshold {0}/{1}".format( - num_strays_purging, mds_max_purge_files - )) - else: - raise NotImplemented(throttle_type) - - log.info("Waiting for purge to complete {0}/{1}, {2}/{3}".format( - num_strays_purging, num_strays, - total_strays_purged, total_strays_created - )) - time.sleep(1) - elapsed += 1 - - background_thread.join() - - # Check that we got up to a respectable rate during the purge. This is totally - # racy, but should be safeish unless the cluster is pathologically slow, or - # insanely fast such that the deletions all pass before we have polled the - # statistics. - if throttle_type == self.OPS_THROTTLE: - if ops_high_water < mds_max_purge_ops / 2: - raise RuntimeError("Ops in flight high water is unexpectedly low ({0} / {1})".format( - ops_high_water, mds_max_purge_ops - )) - elif throttle_type == self.FILES_THROTTLE: - if files_high_water < mds_max_purge_files / 2: - raise RuntimeError("Files in flight high water is unexpectedly low ({0} / {1})".format( - ops_high_water, mds_max_purge_files - )) - - # Sanity check all MDC stray stats - stats = self.fs.mds_asok(['perf', 'dump']) - mdc_stats = stats['mds_cache'] - pq_stats = stats['purge_queue'] - self.assertEqual(mdc_stats['num_strays'], 0) - self.assertEqual(mdc_stats['num_strays_delayed'], 0) - self.assertEqual(pq_stats['pq_executing'], 0) - self.assertEqual(pq_stats['pq_executing_ops'], 0) - self.assertEqual(mdc_stats['strays_created'], total_inodes) - self.assertEqual(mdc_stats['strays_enqueued'], total_inodes) - self.assertEqual(pq_stats['pq_executed'], total_inodes) - - def get_mdc_stat(self, name, mds_id=None): - return self.get_stat("mds_cache", name, mds_id) - - def get_stat(self, subsys, name, mds_id=None): - return self.fs.mds_asok(['perf', 'dump', subsys, name], - mds_id=mds_id)[subsys][name] - - def _wait_for_counter(self, subsys, counter, expect_val, timeout=60, - mds_id=None): - self.wait_until_equal( - lambda: self.get_stat(subsys, counter, mds_id), - expect_val=expect_val, timeout=timeout, - reject_fn=lambda x: x > expect_val - ) - - def test_open_inode(self): - """ - That the case of a dentry unlinked while a client holds an - inode open is handled correctly. - - The inode should be moved into a stray dentry, while the original - dentry and directory should be purged. - - The inode's data should be purged when the client eventually closes - it. - """ - mount_a_client_id = self.mount_a.get_global_id() - - # Write some bytes to a file - size_mb = 8 - - # Hold the file open - p = self.mount_a.open_background("open_file") - self.mount_a.write_n_mb("open_file", size_mb) - open_file_ino = self.mount_a.path_to_ino("open_file") - - self.assertEqual(self.get_session(mount_a_client_id)['num_caps'], 2) - - # Unlink the dentry - self.mount_a.run_shell(["rm", "-f", "open_file"]) - - # Wait to see the stray count increment - self.wait_until_equal( - lambda: self.get_mdc_stat("num_strays"), - expect_val=1, timeout=60, reject_fn=lambda x: x > 1) - - # See that while the stray count has incremented, none have passed - # on to the purge queue - self.assertEqual(self.get_mdc_stat("strays_created"), 1) - self.assertEqual(self.get_mdc_stat("strays_enqueued"), 0) - - # See that the client still holds 2 caps - self.assertEqual(self.get_session(mount_a_client_id)['num_caps'], 2) - - # See that the data objects remain in the data pool - self.assertTrue(self.fs.data_objects_present(open_file_ino, size_mb * 1024 * 1024)) - - # Now close the file - self.mount_a.kill_background(p) - - # Wait to see the client cap count decrement - self.wait_until_equal( - lambda: self.get_session(mount_a_client_id)['num_caps'], - expect_val=1, timeout=60, reject_fn=lambda x: x > 2 or x < 1 - ) - # Wait to see the purge counter increment, stray count go to zero - self._wait_for_counter("mds_cache", "strays_enqueued", 1) - self.wait_until_equal( - lambda: self.get_mdc_stat("num_strays"), - expect_val=0, timeout=6, reject_fn=lambda x: x > 1 - ) - self._wait_for_counter("purge_queue", "pq_executed", 1) - - # See that the data objects no longer exist - self.assertTrue(self.fs.data_objects_absent(open_file_ino, size_mb * 1024 * 1024)) - - self.await_data_pool_empty() - - def test_hardlink_reintegration(self): - """ - That removal of primary dentry of hardlinked inode results - in reintegration of inode into the previously-remote dentry, - rather than lingering as a stray indefinitely. - """ - # Write some bytes to file_a - size_mb = 8 - self.mount_a.run_shell(["mkdir", "dir_1"]) - self.mount_a.write_n_mb("dir_1/file_a", size_mb) - ino = self.mount_a.path_to_ino("dir_1/file_a") - - # Create a hardlink named file_b - self.mount_a.run_shell(["mkdir", "dir_2"]) - self.mount_a.run_shell(["ln", "dir_1/file_a", "dir_2/file_b"]) - self.assertEqual(self.mount_a.path_to_ino("dir_2/file_b"), ino) - - # Flush journal - self.fs.mds_asok(['flush', 'journal']) - - # See that backtrace for the file points to the file_a path - pre_unlink_bt = self.fs.read_backtrace(ino) - self.assertEqual(pre_unlink_bt['ancestors'][0]['dname'], "file_a") - - # empty mds cache. otherwise mds reintegrates stray when unlink finishes - self.mount_a.umount_wait() - self.fs.mds_asok(['flush', 'journal']) - self.fs.mds_fail_restart() - self.fs.wait_for_daemons() - self.mount_a.mount() - - # Unlink file_a - self.mount_a.run_shell(["rm", "-f", "dir_1/file_a"]) - - # See that a stray was created - self.assertEqual(self.get_mdc_stat("num_strays"), 1) - self.assertEqual(self.get_mdc_stat("strays_created"), 1) - - # Wait, see that data objects are still present (i.e. that the - # stray did not advance to purging given time) - time.sleep(30) - self.assertTrue(self.fs.data_objects_present(ino, size_mb * 1024 * 1024)) - self.assertEqual(self.get_mdc_stat("strays_enqueued"), 0) - - # See that before reintegration, the inode's backtrace points to a stray dir - self.fs.mds_asok(['flush', 'journal']) - self.assertTrue(self.get_backtrace_path(ino).startswith("stray")) - - last_reintegrated = self.get_mdc_stat("strays_reintegrated") - - # Do a metadata operation on the remaining link (mv is heavy handed, but - # others like touch may be satisfied from caps without poking MDS) - self.mount_a.run_shell(["mv", "dir_2/file_b", "dir_2/file_c"]) - - # Stray reintegration should happen as a result of the eval_remote call - # on responding to a client request. - self.wait_until_equal( - lambda: self.get_mdc_stat("num_strays"), - expect_val=0, - timeout=60 - ) - - # See the reintegration counter increment - curr_reintegrated = self.get_mdc_stat("strays_reintegrated") - self.assertGreater(curr_reintegrated, last_reintegrated) - last_reintegrated = curr_reintegrated - - # Flush the journal - self.fs.mds_asok(['flush', 'journal']) - - # See that the backtrace for the file points to the remaining link's path - post_reint_bt = self.fs.read_backtrace(ino) - self.assertEqual(post_reint_bt['ancestors'][0]['dname'], "file_c") - - # mds should reintegrates stray when unlink finishes - self.mount_a.run_shell(["ln", "dir_2/file_c", "dir_2/file_d"]) - self.mount_a.run_shell(["rm", "-f", "dir_2/file_c"]) - - # Stray reintegration should happen as a result of the notify_stray call - # on completion of unlink - self.wait_until_equal( - lambda: self.get_mdc_stat("num_strays"), - expect_val=0, - timeout=60 - ) - - # See the reintegration counter increment - curr_reintegrated = self.get_mdc_stat("strays_reintegrated") - self.assertGreater(curr_reintegrated, last_reintegrated) - last_reintegrated = curr_reintegrated - - # Flush the journal - self.fs.mds_asok(['flush', 'journal']) - - # See that the backtrace for the file points to the newest link's path - post_reint_bt = self.fs.read_backtrace(ino) - self.assertEqual(post_reint_bt['ancestors'][0]['dname'], "file_d") - - # Now really delete it - self.mount_a.run_shell(["rm", "-f", "dir_2/file_d"]) - self._wait_for_counter("mds_cache", "strays_enqueued", 1) - self._wait_for_counter("purge_queue", "pq_executed", 1) - - self.assert_purge_idle() - self.assertTrue(self.fs.data_objects_absent(ino, size_mb * 1024 * 1024)) - - # We caused the inode to go stray 3 times - self.assertEqual(self.get_mdc_stat("strays_created"), 3) - # We purged it at the last - self.assertEqual(self.get_mdc_stat("strays_enqueued"), 1) - - def test_mv_hardlink_cleanup(self): - """ - That when doing a rename from A to B, and B has hardlinks, - then we make a stray for B which is then reintegrated - into one of his hardlinks. - """ - # Create file_a, file_b, and a hardlink to file_b - size_mb = 8 - self.mount_a.write_n_mb("file_a", size_mb) - file_a_ino = self.mount_a.path_to_ino("file_a") - - self.mount_a.write_n_mb("file_b", size_mb) - file_b_ino = self.mount_a.path_to_ino("file_b") - - self.mount_a.run_shell(["ln", "file_b", "linkto_b"]) - self.assertEqual(self.mount_a.path_to_ino("linkto_b"), file_b_ino) - - # mv file_a file_b - self.mount_a.run_shell(["mv", "file_a", "file_b"]) - - # Stray reintegration should happen as a result of the notify_stray call on - # completion of rename - self.wait_until_equal( - lambda: self.get_mdc_stat("num_strays"), - expect_val=0, - timeout=60 - ) - - self.assertEqual(self.get_mdc_stat("strays_created"), 1) - self.assertGreaterEqual(self.get_mdc_stat("strays_reintegrated"), 1) - - # No data objects should have been deleted, as both files still have linkage. - self.assertTrue(self.fs.data_objects_present(file_a_ino, size_mb * 1024 * 1024)) - self.assertTrue(self.fs.data_objects_present(file_b_ino, size_mb * 1024 * 1024)) - - self.fs.mds_asok(['flush', 'journal']) - - post_reint_bt = self.fs.read_backtrace(file_b_ino) - self.assertEqual(post_reint_bt['ancestors'][0]['dname'], "linkto_b") - - def _setup_two_ranks(self): - # Set up two MDSs - self.fs.set_max_mds(2) - - # See that we have two active MDSs - self.wait_until_equal(lambda: len(self.fs.get_active_names()), 2, 30, - reject_fn=lambda v: v > 2 or v < 1) - - active_mds_names = self.fs.get_active_names() - rank_0_id = active_mds_names[0] - rank_1_id = active_mds_names[1] - log.info("Ranks 0 and 1 are {0} and {1}".format( - rank_0_id, rank_1_id)) - - # Get rid of other MDS daemons so that it's easier to know which - # daemons to expect in which ranks after restarts - for unneeded_mds in set(self.mds_cluster.mds_ids) - {rank_0_id, rank_1_id}: - self.mds_cluster.mds_stop(unneeded_mds) - self.mds_cluster.mds_fail(unneeded_mds) - - return rank_0_id, rank_1_id - - def _force_migrate(self, to_id, path, watch_ino): - """ - :param to_id: MDS id to move it to - :param path: Filesystem path (string) to move - :param watch_ino: Inode number to look for at destination to confirm move - :return: None - """ - self.mount_a.run_shell(["setfattr", "-n", "ceph.dir.pin", "-v", "1", path]) - - # Poll the MDS cache dump to watch for the export completing - migrated = False - migrate_timeout = 60 - migrate_elapsed = 0 - while not migrated: - data = self.fs.mds_asok(["dump", "cache"], to_id) - for inode_data in data: - if inode_data['ino'] == watch_ino: - log.debug("Found ino in cache: {0}".format(json.dumps(inode_data, indent=2))) - if inode_data['is_auth'] is True: - migrated = True - break - - if not migrated: - if migrate_elapsed > migrate_timeout: - raise RuntimeError("Migration hasn't happened after {0}s!".format(migrate_elapsed)) - else: - migrate_elapsed += 1 - time.sleep(1) - - def _is_stopped(self, rank): - mds_map = self.fs.get_mds_map() - return rank not in [i['rank'] for i in mds_map['info'].values()] - - def test_purge_on_shutdown(self): - """ - That when an MDS rank is shut down, its purge queue is - drained in the process. - """ - rank_0_id, rank_1_id = self._setup_two_ranks() - - self.set_conf("mds.{0}".format(rank_1_id), 'mds_max_purge_files', "0") - self.mds_cluster.mds_fail_restart(rank_1_id) - self.fs.wait_for_daemons() - - file_count = 5 - - self.mount_a.create_n_files("delete_me/file", file_count) - - self._force_migrate(rank_1_id, "delete_me", - self.mount_a.path_to_ino("delete_me/file_0")) - - self.mount_a.run_shell(["rm", "-rf", Raw("delete_me/*")]) - self.mount_a.umount_wait() - - # See all the strays go into purge queue - self._wait_for_counter("mds_cache", "strays_created", file_count, mds_id=rank_1_id) - self._wait_for_counter("mds_cache", "strays_enqueued", file_count, mds_id=rank_1_id) - self.assertEqual(self.get_stat("mds_cache", "num_strays", mds_id=rank_1_id), 0) - - # See nothing get purged from the purge queue (yet) - time.sleep(10) - self.assertEqual(self.get_stat("purge_queue", "pq_executed", mds_id=rank_1_id), 0) - - # Shut down rank 1 - self.fs.set_max_mds(1) - self.fs.deactivate(1) - - # It shouldn't proceed past stopping because its still not allowed - # to purge - time.sleep(10) - self.assertEqual(self.get_stat("purge_queue", "pq_executed", mds_id=rank_1_id), 0) - self.assertFalse(self._is_stopped(1)) - - # Permit the daemon to start purging again - self.fs.mon_manager.raw_cluster_cmd('tell', 'mds.{0}'.format(rank_1_id), - 'injectargs', - "--mds_max_purge_files 100") - - # It should now proceed through shutdown - self.wait_until_true( - lambda: self._is_stopped(1), - timeout=60 - ) - - # ...and in the process purge all that data - self.await_data_pool_empty() - - def test_migration_on_shutdown(self): - """ - That when an MDS rank is shut down, any non-purgeable strays - get migrated to another rank. - """ - - rank_0_id, rank_1_id = self._setup_two_ranks() - - # Create a non-purgeable stray in a ~mds1 stray directory - # by doing a hard link and deleting the original file - self.mount_a.run_shell(["mkdir", "dir_1", "dir_2"]) - self.mount_a.run_shell(["touch", "dir_1/original"]) - self.mount_a.run_shell(["ln", "dir_1/original", "dir_2/linkto"]) - - self._force_migrate(rank_1_id, "dir_1", - self.mount_a.path_to_ino("dir_1/original")) - - # empty mds cache. otherwise mds reintegrates stray when unlink finishes - self.mount_a.umount_wait() - self.fs.mds_asok(['flush', 'journal'], rank_0_id) - self.fs.mds_asok(['flush', 'journal'], rank_1_id) - self.fs.mds_fail_restart() - self.fs.wait_for_daemons() - - active_mds_names = self.fs.get_active_names() - rank_0_id = active_mds_names[0] - rank_1_id = active_mds_names[1] - - self.mount_a.mount() - - self.mount_a.run_shell(["rm", "-f", "dir_1/original"]) - self.mount_a.umount_wait() - - self._wait_for_counter("mds_cache", "strays_created", 1, - mds_id=rank_1_id) - - # Shut down rank 1 - self.fs.mon_manager.raw_cluster_cmd_result('mds', 'set', "max_mds", "1") - self.fs.mon_manager.raw_cluster_cmd_result('mds', 'deactivate', "1") - - # Wait til we get to a single active MDS mdsmap state - self.wait_until_true(lambda: self._is_stopped(1), timeout=120) - - # See that the stray counter on rank 0 has incremented - self.assertEqual(self.get_mdc_stat("strays_created", rank_0_id), 1) - - def assert_backtrace(self, ino, expected_path): - """ - Assert that the backtrace in the data pool for an inode matches - an expected /foo/bar path. - """ - expected_elements = expected_path.strip("/").split("/") - bt = self.fs.read_backtrace(ino) - actual_elements = list(reversed([dn['dname'] for dn in bt['ancestors']])) - self.assertListEqual(expected_elements, actual_elements) - - def get_backtrace_path(self, ino): - bt = self.fs.read_backtrace(ino) - elements = reversed([dn['dname'] for dn in bt['ancestors']]) - return "/".join(elements) - - def assert_purge_idle(self): - """ - Assert that the MDS perf counters indicate no strays exist and - no ongoing purge activity. Sanity check for when PurgeQueue should - be idle. - """ - mdc_stats = self.fs.mds_asok(['perf', 'dump', "mds_cache"])['mds_cache'] - pq_stats = self.fs.mds_asok(['perf', 'dump', "purge_queue"])['purge_queue'] - self.assertEqual(mdc_stats["num_strays"], 0) - self.assertEqual(mdc_stats["num_strays_delayed"], 0) - self.assertEqual(pq_stats["pq_executing"], 0) - self.assertEqual(pq_stats["pq_executing_ops"], 0) - - def test_mv_cleanup(self): - """ - That when doing a rename from A to B, and B has no hardlinks, - then we make a stray for B and purge him. - """ - # Create file_a and file_b, write some to both - size_mb = 8 - self.mount_a.write_n_mb("file_a", size_mb) - file_a_ino = self.mount_a.path_to_ino("file_a") - self.mount_a.write_n_mb("file_b", size_mb) - file_b_ino = self.mount_a.path_to_ino("file_b") - - self.fs.mds_asok(['flush', 'journal']) - self.assert_backtrace(file_a_ino, "file_a") - self.assert_backtrace(file_b_ino, "file_b") - - # mv file_a file_b - self.mount_a.run_shell(['mv', 'file_a', 'file_b']) - - # See that stray counter increments - self.assertEqual(self.get_mdc_stat("strays_created"), 1) - # Wait for purge counter to increment - self._wait_for_counter("mds_cache", "strays_enqueued", 1) - self._wait_for_counter("purge_queue", "pq_executed", 1) - - self.assert_purge_idle() - - # file_b should have been purged - self.assertTrue(self.fs.data_objects_absent(file_b_ino, size_mb * 1024 * 1024)) - - # Backtrace should have updated from file_a to file_b - self.fs.mds_asok(['flush', 'journal']) - self.assert_backtrace(file_a_ino, "file_b") - - # file_a's data should still exist - self.assertTrue(self.fs.data_objects_present(file_a_ino, size_mb * 1024 * 1024)) - - def _pool_df(self, pool_name): - """ - Return a dict like - { - "kb_used": 0, - "bytes_used": 0, - "max_avail": 19630292406, - "objects": 0 - } - - :param pool_name: Which pool (must exist) - """ - out = self.fs.mon_manager.raw_cluster_cmd("df", "--format=json-pretty") - for p in json.loads(out)['pools']: - if p['name'] == pool_name: - return p['stats'] - - raise RuntimeError("Pool '{0}' not found".format(pool_name)) - - def await_data_pool_empty(self): - self.wait_until_true( - lambda: self._pool_df( - self.fs.get_data_pool_name() - )['objects'] == 0, - timeout=60) - - def test_snapshot_remove(self): - """ - That removal of a snapshot that references a now-unlinked file results - in purging on the stray for the file. - """ - # Enable snapshots - self.fs.mon_manager.raw_cluster_cmd("mds", "set", "allow_new_snaps", "true", - "--yes-i-really-mean-it") - - # Create a dir with a file in it - size_mb = 8 - self.mount_a.run_shell(["mkdir", "snapdir"]) - self.mount_a.run_shell(["mkdir", "snapdir/subdir"]) - self.mount_a.write_test_pattern("snapdir/subdir/file_a", size_mb * 1024 * 1024) - file_a_ino = self.mount_a.path_to_ino("snapdir/subdir/file_a") - - # Snapshot the dir - self.mount_a.run_shell(["mkdir", "snapdir/.snap/snap1"]) - - # Cause the head revision to deviate from the snapshot - self.mount_a.write_n_mb("snapdir/subdir/file_a", size_mb) - - # Flush the journal so that backtraces, dirfrag objects will actually be written - self.fs.mds_asok(["flush", "journal"]) - - # Unlink the file - self.mount_a.run_shell(["rm", "-f", "snapdir/subdir/file_a"]) - self.mount_a.run_shell(["rmdir", "snapdir/subdir"]) - - # Unmount the client because when I come back to check the data is still - # in the file I don't want to just see what's in the page cache. - self.mount_a.umount_wait() - - self.assertEqual(self.get_mdc_stat("strays_created"), 2) - - # FIXME: at this stage we see a purge and the stray count drops to - # zero, but there's actually still a stray, so at the very - # least the StrayManager stats code is slightly off - - self.mount_a.mount() - - # See that the data from the snapshotted revision of the file is still present - # and correct - self.mount_a.validate_test_pattern("snapdir/.snap/snap1/subdir/file_a", size_mb * 1024 * 1024) - - # Remove the snapshot - self.mount_a.run_shell(["rmdir", "snapdir/.snap/snap1"]) - - # Purging file_a doesn't happen until after we've flushed the journal, because - # it is referenced by the snapshotted subdir, and the snapshot isn't really - # gone until the journal references to it are gone - self.fs.mds_asok(["flush", "journal"]) - - # Wait for purging to complete, which requires the OSDMap to propagate to the OSDs. - # See also: http://tracker.ceph.com/issues/20072 - self.wait_until_true( - lambda: self.fs.data_objects_absent(file_a_ino, size_mb * 1024 * 1024), - timeout=60 - ) - - # See that a purge happens now - self._wait_for_counter("mds_cache", "strays_enqueued", 2) - self._wait_for_counter("purge_queue", "pq_executed", 2) - - self.await_data_pool_empty() - - def test_fancy_layout(self): - """ - purge stray file with fancy layout - """ - - file_name = "fancy_layout_file" - self.mount_a.run_shell(["touch", file_name]) - - file_layout = "stripe_unit=1048576 stripe_count=4 object_size=8388608" - self.mount_a.setfattr(file_name, "ceph.file.layout", file_layout) - - # 35MB requires 7 objects - size_mb = 35 - self.mount_a.write_n_mb(file_name, size_mb) - - self.mount_a.run_shell(["rm", "-f", file_name]) - self.fs.mds_asok(["flush", "journal"]) - - # can't use self.fs.data_objects_absent here, it does not support fancy layout - self.await_data_pool_empty() - - def test_dirfrag_limit(self): - """ - That the directory fragment size cannot exceed mds_bal_fragment_size_max (using a limit of 50 in all configurations). - - That fragmentation (forced) will allow more entries to be created. - - That unlinking fails when the stray directory fragment becomes too large and that unlinking may continue once those strays are purged. - """ - - self.fs.set_allow_dirfrags(True) - - LOW_LIMIT = 50 - for mds in self.fs.get_daemon_names(): - self.fs.mds_asok(["config", "set", "mds_bal_fragment_size_max", str(LOW_LIMIT)], mds) - - try: - self.mount_a.run_python(dedent(""" - import os - path = os.path.join("{path}", "subdir") - os.mkdir(path) - for n in range(0, {file_count}): - open(os.path.join(path, "%s" % n), 'w').write("%s" % n) - """.format( - path=self.mount_a.mountpoint, - file_count=LOW_LIMIT+1 - ))) - except CommandFailedError: - pass # ENOSPAC - else: - raise RuntimeError("fragment size exceeded") - - # Now test that we can go beyond the limit if we fragment the directory - - self.mount_a.run_python(dedent(""" - import os - path = os.path.join("{path}", "subdir2") - os.mkdir(path) - for n in range(0, {file_count}): - open(os.path.join(path, "%s" % n), 'w').write("%s" % n) - dfd = os.open(path, os.O_DIRECTORY) - os.fsync(dfd) - """.format( - path=self.mount_a.mountpoint, - file_count=LOW_LIMIT - ))) - - # Ensure that subdir2 is fragmented - mds_id = self.fs.get_active_names()[0] - self.fs.mds_asok(["dirfrag", "split", "/subdir2", "0/0", "1"], mds_id) - - # remount+flush (release client caps) - self.mount_a.umount_wait() - self.fs.mds_asok(["flush", "journal"], mds_id) - self.mount_a.mount() - self.mount_a.wait_until_mounted() - - # Create 50% more files than the current fragment limit - self.mount_a.run_python(dedent(""" - import os - path = os.path.join("{path}", "subdir2") - for n in range({file_count}, ({file_count}*3)//2): - open(os.path.join(path, "%s" % n), 'w').write("%s" % n) - """.format( - path=self.mount_a.mountpoint, - file_count=LOW_LIMIT - ))) - - # Now test the stray directory size is limited and recovers - strays_before = self.get_mdc_stat("strays_created") - try: - self.mount_a.run_python(dedent(""" - import os - path = os.path.join("{path}", "subdir3") - os.mkdir(path) - for n in range({file_count}): - fpath = os.path.join(path, "%s" % n) - f = open(fpath, 'w') - f.write("%s" % n) - f.close() - os.unlink(fpath) - """.format( - path=self.mount_a.mountpoint, - file_count=LOW_LIMIT*10 # 10 stray directories, should collide before this count - ))) - except CommandFailedError: - pass # ENOSPAC - else: - raise RuntimeError("fragment size exceeded") - - strays_after = self.get_mdc_stat("strays_created") - self.assertGreaterEqual(strays_after-strays_before, LOW_LIMIT) - - self._wait_for_counter("mds_cache", "strays_enqueued", strays_after) - self._wait_for_counter("purge_queue", "pq_executed", strays_after) - - self.mount_a.run_python(dedent(""" - import os - path = os.path.join("{path}", "subdir4") - os.mkdir(path) - for n in range({file_count}): - fpath = os.path.join(path, "%s" % n) - f = open(fpath, 'w') - f.write("%s" % n) - f.close() - os.unlink(fpath) - """.format( - path=self.mount_a.mountpoint, - file_count=LOW_LIMIT - ))) - - def test_purge_queue_upgrade(self): - """ - That when starting on a system with no purge queue in the metadata - pool, we silently create one. - :return: - """ - - self.mds_cluster.mds_stop() - self.mds_cluster.mds_fail() - self.fs.rados(["rm", "500.00000000"]) - self.mds_cluster.mds_restart() - self.fs.wait_for_daemons() - - def test_purge_queue_op_rate(self): - """ - A busy purge queue is meant to aggregate operations sufficiently - that our RADOS ops to the metadata pool are not O(files). Check - that that is so. - :return: - """ - - # For low rates of deletion, the rate of metadata ops actually - # will be o(files), so to see the desired behaviour we have to give - # the system a significant quantity, i.e. an order of magnitude - # more than the number of files it will purge at one time. - - max_purge_files = 2 - - self.set_conf('mds', 'mds_bal_frag', 'false') - self.set_conf('mds', 'mds_max_purge_files', "%d" % max_purge_files) - self.fs.mds_fail_restart() - self.fs.wait_for_daemons() - - phase_1_files = 256 - phase_2_files = 512 - - self.mount_a.run_shell(["mkdir", "phase1"]) - self.mount_a.create_n_files("phase1/file", phase_1_files) - - self.mount_a.run_shell(["mkdir", "phase2"]) - self.mount_a.create_n_files("phase2/file", phase_2_files) - - def unlink_and_count_ops(path, expected_deletions): - initial_ops = self.get_stat("objecter", "op") - initial_pq_executed = self.get_stat("purge_queue", "pq_executed") - - self.mount_a.run_shell(["rm", "-rf", path]) - - self._wait_for_counter( - "purge_queue", "pq_executed", initial_pq_executed + expected_deletions - ) - - final_ops = self.get_stat("objecter", "op") - - # Calculation of the *overhead* operations, i.e. do not include - # the operations where we actually delete files. - return final_ops - initial_ops - expected_deletions - - self.fs.mds_asok(['flush', 'journal']) - phase1_ops = unlink_and_count_ops("phase1/", phase_1_files + 1) - - self.fs.mds_asok(['flush', 'journal']) - phase2_ops = unlink_and_count_ops("phase2/", phase_2_files + 1) - - log.info("Phase 1: {0}".format(phase1_ops)) - log.info("Phase 2: {0}".format(phase2_ops)) - - # The success criterion is that deleting double the number - # of files doesn't generate double the number of overhead ops - # -- this comparison is a rough approximation of that rule. - self.assertTrue(phase2_ops < phase1_ops * 1.25) - - # Finally, check that our activity did include properly quiescing - # the queue (i.e. call to Journaler::write_head in the right place), - # by restarting the MDS and checking that it doesn't try re-executing - # any of the work we did. - self.fs.mds_asok(['flush', 'journal']) # flush to ensure no strays - # hanging around - self.fs.mds_fail_restart() - self.fs.wait_for_daemons() - time.sleep(10) - self.assertEqual(self.get_stat("purge_queue", "pq_executed"), 0) - - def test_replicated_delete_speed(self): - """ - That deletions of replicated metadata are not pathologically slow - """ - rank_0_id, rank_1_id = self._setup_two_ranks() - - self.set_conf("mds.{0}".format(rank_1_id), 'mds_max_purge_files', "0") - self.mds_cluster.mds_fail_restart(rank_1_id) - self.fs.wait_for_daemons() - - file_count = 10 - - self.mount_a.create_n_files("delete_me/file", file_count) - - self._force_migrate(rank_1_id, "delete_me", - self.mount_a.path_to_ino("delete_me/file_0")) - - begin = datetime.datetime.now() - self.mount_a.run_shell(["rm", "-rf", Raw("delete_me/*")]) - end = datetime.datetime.now() - - # What we're really checking here is that we are completing client - # operations immediately rather than delaying until the next tick. - tick_period = float(self.fs.get_config("mds_tick_interval", - service_type="mds")) - - duration = (end - begin).total_seconds() - self.assertLess(duration, (file_count * tick_period) * 0.25) - diff --git a/src/ceph/qa/tasks/cephfs/test_volume_client.py b/src/ceph/qa/tasks/cephfs/test_volume_client.py deleted file mode 100644 index 0876af9..0000000 --- a/src/ceph/qa/tasks/cephfs/test_volume_client.py +++ /dev/null @@ -1,1016 +0,0 @@ -import json -import logging -import time -import os -from textwrap import dedent -from tasks.cephfs.cephfs_test_case import CephFSTestCase -from tasks.cephfs.fuse_mount import FuseMount -from teuthology.exceptions import CommandFailedError - -log = logging.getLogger(__name__) - - -class TestVolumeClient(CephFSTestCase): - # One for looking at the global filesystem, one for being - # the VolumeClient, two for mounting the created shares - CLIENTS_REQUIRED = 4 - - def _volume_client_python(self, client, script, vol_prefix=None, ns_prefix=None): - # Can't dedent this *and* the script we pass in, because they might have different - # levels of indentation to begin with, so leave this string zero-indented - if vol_prefix: - vol_prefix = "\"" + vol_prefix + "\"" - if ns_prefix: - ns_prefix = "\"" + ns_prefix + "\"" - return client.run_python(""" -from ceph_volume_client import CephFSVolumeClient, VolumePath -import logging -log = logging.getLogger("ceph_volume_client") -log.addHandler(logging.StreamHandler()) -log.setLevel(logging.DEBUG) -vc = CephFSVolumeClient("manila", "{conf_path}", "ceph", {vol_prefix}, {ns_prefix}) -vc.connect() -{payload} -vc.disconnect() - """.format(payload=script, conf_path=client.config_path, vol_prefix=vol_prefix, ns_prefix=ns_prefix)) - - def _sudo_write_file(self, remote, path, data): - """ - Write data to a remote file as super user - - :param remote: Remote site. - :param path: Path on the remote being written to. - :param data: Data to be written. - - Both perms and owner are passed directly to chmod. - """ - remote.run( - args=[ - 'sudo', - 'python', - '-c', - 'import shutil, sys; shutil.copyfileobj(sys.stdin, file(sys.argv[1], "wb"))', - path, - ], - stdin=data, - ) - - def _configure_vc_auth(self, mount, id_name): - """ - Set up auth credentials for the VolumeClient user - """ - out = self.fs.mon_manager.raw_cluster_cmd( - "auth", "get-or-create", "client.{name}".format(name=id_name), - "mds", "allow *", - "osd", "allow rw", - "mon", "allow *" - ) - mount.client_id = id_name - self._sudo_write_file(mount.client_remote, mount.get_keyring_path(), out) - self.set_conf("client.{name}".format(name=id_name), "keyring", mount.get_keyring_path()) - - def _configure_guest_auth(self, volumeclient_mount, guest_mount, - guest_entity, mount_path, - namespace_prefix=None, readonly=False, - tenant_id=None): - """ - Set up auth credentials for the guest client to mount a volume. - - :param volumeclient_mount: mount used as the handle for driving - volumeclient. - :param guest_mount: mount used by the guest client. - :param guest_entity: auth ID used by the guest client. - :param mount_path: path of the volume. - :param namespace_prefix: name prefix of the RADOS namespace, which - is used for the volume's layout. - :param readonly: defaults to False. If set to 'True' only read-only - mount access is granted to the guest. - :param tenant_id: (OpenStack) tenant ID of the guest client. - """ - - head, volume_id = os.path.split(mount_path) - head, group_id = os.path.split(head) - head, volume_prefix = os.path.split(head) - volume_prefix = "/" + volume_prefix - - # Authorize the guest client's auth ID to mount the volume. - key = self._volume_client_python(volumeclient_mount, dedent(""" - vp = VolumePath("{group_id}", "{volume_id}") - auth_result = vc.authorize(vp, "{guest_entity}", readonly={readonly}, - tenant_id="{tenant_id}") - print auth_result['auth_key'] - """.format( - group_id=group_id, - volume_id=volume_id, - guest_entity=guest_entity, - readonly=readonly, - tenant_id=tenant_id)), volume_prefix, namespace_prefix - ) - - # CephFSVolumeClient's authorize() does not return the secret - # key to a caller who isn't multi-tenant aware. Explicitly - # query the key for such a client. - if not tenant_id: - key = self.fs.mon_manager.raw_cluster_cmd( - "auth", "get-key", "client.{name}".format(name=guest_entity), - ) - - # The guest auth ID should exist. - existing_ids = [a['entity'] for a in self.auth_list()] - self.assertIn("client.{0}".format(guest_entity), existing_ids) - - # Create keyring file for the guest client. - keyring_txt = dedent(""" - [client.{guest_entity}] - key = {key} - - """.format( - guest_entity=guest_entity, - key=key - )) - guest_mount.client_id = guest_entity - self._sudo_write_file(guest_mount.client_remote, - guest_mount.get_keyring_path(), - keyring_txt) - - # Add a guest client section to the ceph config file. - self.set_conf("client.{0}".format(guest_entity), "client quota", "True") - self.set_conf("client.{0}".format(guest_entity), "debug client", "20") - self.set_conf("client.{0}".format(guest_entity), "debug objecter", "20") - self.set_conf("client.{0}".format(guest_entity), - "keyring", guest_mount.get_keyring_path()) - - def test_default_prefix(self): - group_id = "grpid" - volume_id = "volid" - DEFAULT_VOL_PREFIX = "volumes" - DEFAULT_NS_PREFIX = "fsvolumens_" - - self.mount_b.umount_wait() - self._configure_vc_auth(self.mount_b, "manila") - - #create a volume with default prefix - self._volume_client_python(self.mount_b, dedent(""" - vp = VolumePath("{group_id}", "{volume_id}") - vc.create_volume(vp, 10, data_isolated=True) - """.format( - group_id=group_id, - volume_id=volume_id, - ))) - - # The dir should be created - self.mount_a.stat(os.path.join(DEFAULT_VOL_PREFIX, group_id, volume_id)) - - #namespace should be set - ns_in_attr = self.mount_a.getfattr(os.path.join(DEFAULT_VOL_PREFIX, group_id, volume_id), "ceph.dir.layout.pool_namespace") - namespace = "{0}{1}".format(DEFAULT_NS_PREFIX, volume_id) - self.assertEqual(namespace, ns_in_attr) - - - def test_lifecycle(self): - """ - General smoke test for create, extend, destroy - """ - - # I'm going to use mount_c later as a guest for mounting the created - # shares - self.mounts[2].umount_wait() - - # I'm going to leave mount_b unmounted and just use it as a handle for - # driving volumeclient. It's a little hacky but we don't have a more - # general concept for librados/libcephfs clients as opposed to full - # blown mounting clients. - self.mount_b.umount_wait() - self._configure_vc_auth(self.mount_b, "manila") - - guest_entity = "guest" - group_id = "grpid" - volume_id = "volid" - - volume_prefix = "/myprefix" - namespace_prefix = "mynsprefix_" - - # Create a 100MB volume - volume_size = 100 - mount_path = self._volume_client_python(self.mount_b, dedent(""" - vp = VolumePath("{group_id}", "{volume_id}") - create_result = vc.create_volume(vp, 1024*1024*{volume_size}) - print create_result['mount_path'] - """.format( - group_id=group_id, - volume_id=volume_id, - volume_size=volume_size - )), volume_prefix, namespace_prefix) - - # The dir should be created - self.mount_a.stat(os.path.join("myprefix", group_id, volume_id)) - - # Authorize and configure credentials for the guest to mount the - # the volume. - self._configure_guest_auth(self.mount_b, self.mounts[2], guest_entity, - mount_path, namespace_prefix) - self.mounts[2].mount(mount_path=mount_path) - - # The kernel client doesn't have the quota-based df behaviour, - # or quotas at all, so only exercise the client behaviour when - # running fuse. - if isinstance(self.mounts[2], FuseMount): - # df should see volume size, same as the quota set on volume's dir - self.assertEqual(self.mounts[2].df()['total'], - volume_size * 1024 * 1024) - self.assertEqual( - self.mount_a.getfattr( - os.path.join(volume_prefix.strip("/"), group_id, volume_id), - "ceph.quota.max_bytes"), - "%s" % (volume_size * 1024 * 1024)) - - # df granularity is 4MB block so have to write at least that much - data_bin_mb = 4 - self.mounts[2].write_n_mb("data.bin", data_bin_mb) - - # Write something outside volume to check this space usage is - # not reported in the volume's DF. - other_bin_mb = 8 - self.mount_a.write_n_mb("other.bin", other_bin_mb) - - # global: df should see all the writes (data + other). This is a > - # rather than a == because the global spaced used includes all pools - def check_df(): - used = self.mount_a.df()['used'] - return used >= (other_bin_mb * 1024 * 1024) - - self.wait_until_true(check_df, timeout=30) - - # Hack: do a metadata IO to kick rstats - self.mounts[2].run_shell(["touch", "foo"]) - - # volume: df should see the data_bin_mb consumed from quota, same - # as the rbytes for the volume's dir - self.wait_until_equal( - lambda: self.mounts[2].df()['used'], - data_bin_mb * 1024 * 1024, timeout=60) - self.wait_until_equal( - lambda: self.mount_a.getfattr( - os.path.join(volume_prefix.strip("/"), group_id, volume_id), - "ceph.dir.rbytes"), - "%s" % (data_bin_mb * 1024 * 1024), timeout=60) - - # sync so that file data are persist to rados - self.mounts[2].run_shell(["sync"]) - - # Our data should stay in particular rados namespace - pool_name = self.mount_a.getfattr(os.path.join("myprefix", group_id, volume_id), "ceph.dir.layout.pool") - namespace = "{0}{1}".format(namespace_prefix, volume_id) - ns_in_attr = self.mount_a.getfattr(os.path.join("myprefix", group_id, volume_id), "ceph.dir.layout.pool_namespace") - self.assertEqual(namespace, ns_in_attr) - - objects_in_ns = set(self.fs.rados(["ls"], pool=pool_name, namespace=namespace).split("\n")) - self.assertNotEqual(objects_in_ns, set()) - - # De-authorize the guest - self._volume_client_python(self.mount_b, dedent(""" - vp = VolumePath("{group_id}", "{volume_id}") - vc.deauthorize(vp, "{guest_entity}") - vc.evict("{guest_entity}") - """.format( - group_id=group_id, - volume_id=volume_id, - guest_entity=guest_entity - )), volume_prefix, namespace_prefix) - - # Once deauthorized, the client should be unable to do any more metadata ops - # The way that the client currently behaves here is to block (it acts like - # it has lost network, because there is nothing to tell it that is messages - # are being dropped because it's identity is gone) - background = self.mounts[2].write_n_mb("rogue.bin", 1, wait=False) - time.sleep(10) # Approximate check for 'stuck' as 'still running after 10s' - self.assertFalse(background.finished) - - # After deauthorisation, the client ID should be gone (this was the only - # volume it was authorised for) - self.assertNotIn("client.{0}".format(guest_entity), [e['entity'] for e in self.auth_list()]) - - # Clean up the dead mount (ceph-fuse's behaviour here is a bit undefined) - self.mounts[2].kill() - self.mounts[2].kill_cleanup() - try: - background.wait() - except CommandFailedError: - # We killed the mount out from under you - pass - - self._volume_client_python(self.mount_b, dedent(""" - vp = VolumePath("{group_id}", "{volume_id}") - vc.delete_volume(vp) - vc.purge_volume(vp) - """.format( - group_id=group_id, - volume_id=volume_id, - )), volume_prefix, namespace_prefix) - - def test_idempotency(self): - """ - That the volumeclient interface works when calling everything twice - """ - self.mount_b.umount_wait() - self._configure_vc_auth(self.mount_b, "manila") - - guest_entity = "guest" - group_id = "grpid" - volume_id = "volid" - self._volume_client_python(self.mount_b, dedent(""" - vp = VolumePath("{group_id}", "{volume_id}") - vc.create_volume(vp, 10) - vc.create_volume(vp, 10) - vc.authorize(vp, "{guest_entity}") - vc.authorize(vp, "{guest_entity}") - vc.deauthorize(vp, "{guest_entity}") - vc.deauthorize(vp, "{guest_entity}") - vc.delete_volume(vp) - vc.delete_volume(vp) - vc.purge_volume(vp) - vc.purge_volume(vp) - - vc.create_volume(vp, 10, data_isolated=True) - vc.create_volume(vp, 10, data_isolated=True) - vc.authorize(vp, "{guest_entity}") - vc.authorize(vp, "{guest_entity}") - vc.deauthorize(vp, "{guest_entity}") - vc.deauthorize(vp, "{guest_entity}") - vc.evict("{guest_entity}") - vc.evict("{guest_entity}") - vc.delete_volume(vp, data_isolated=True) - vc.delete_volume(vp, data_isolated=True) - vc.purge_volume(vp, data_isolated=True) - vc.purge_volume(vp, data_isolated=True) - """.format( - group_id=group_id, - volume_id=volume_id, - guest_entity=guest_entity - ))) - - def test_data_isolated(self): - """ - That data isolated shares get their own pool - :return: - """ - - # Because the teuthology config template sets mon_max_pg_per_osd to - # 10000 (i.e. it just tries to ignore health warnings), reset it to something - # sane before using volume_client, to avoid creating pools with absurdly large - # numbers of PGs. - self.set_conf("global", "mon max pg per osd", "300") - for mon_daemon_state in self.ctx.daemons.iter_daemons_of_role('mon'): - mon_daemon_state.restart() - - self.mount_b.umount_wait() - self._configure_vc_auth(self.mount_b, "manila") - - # Calculate how many PGs we'll expect the new volume pool to have - osd_map = json.loads(self.fs.mon_manager.raw_cluster_cmd('osd', 'dump', '--format=json-pretty')) - max_per_osd = int(self.fs.get_config('mon_max_pg_per_osd')) - osd_count = len(osd_map['osds']) - max_overall = osd_count * max_per_osd - - existing_pg_count = 0 - for p in osd_map['pools']: - existing_pg_count += p['pg_num'] - - expected_pg_num = (max_overall - existing_pg_count) / 10 - log.info("max_per_osd {0}".format(max_per_osd)) - log.info("osd_count {0}".format(osd_count)) - log.info("max_overall {0}".format(max_overall)) - log.info("existing_pg_count {0}".format(existing_pg_count)) - log.info("expected_pg_num {0}".format(expected_pg_num)) - - pools_a = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['pools'] - - group_id = "grpid" - volume_id = "volid" - self._volume_client_python(self.mount_b, dedent(""" - vp = VolumePath("{group_id}", "{volume_id}") - vc.create_volume(vp, 10, data_isolated=True) - """.format( - group_id=group_id, - volume_id=volume_id, - ))) - - pools_b = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['pools'] - - # Should have created one new pool - new_pools = set(p['pool_name'] for p in pools_b) - set([p['pool_name'] for p in pools_a]) - self.assertEqual(len(new_pools), 1) - - # It should have followed the heuristic for PG count - # (this is an overly strict test condition, so we may want to remove - # it at some point as/when the logic gets fancier) - created_pg_num = self.fs.mon_manager.get_pool_property(list(new_pools)[0], "pg_num") - self.assertEqual(expected_pg_num, created_pg_num) - - def test_15303(self): - """ - Reproducer for #15303 "Client holds incorrect complete flag on dir - after losing caps" (http://tracker.ceph.com/issues/15303) - """ - for m in self.mounts: - m.umount_wait() - - # Create a dir on mount A - self.mount_a.mount() - self.mount_a.run_shell(["mkdir", "parent1"]) - self.mount_a.run_shell(["mkdir", "parent2"]) - self.mount_a.run_shell(["mkdir", "parent1/mydir"]) - - # Put some files in it from mount B - self.mount_b.mount() - self.mount_b.run_shell(["touch", "parent1/mydir/afile"]) - self.mount_b.umount_wait() - - # List the dir's contents on mount A - self.assertListEqual(self.mount_a.ls("parent1/mydir"), - ["afile"]) - - def test_evict_client(self): - """ - That a volume client can be evicted based on its auth ID and the volume - path it has mounted. - """ - - if not isinstance(self.mount_a, FuseMount): - self.skipTest("Requires FUSE client to inject client metadata") - - # mounts[1] would be used as handle for driving VolumeClient. mounts[2] - # and mounts[3] would be used as guests to mount the volumes/shares. - - for i in range(1, 4): - self.mounts[i].umount_wait() - - volumeclient_mount = self.mounts[1] - self._configure_vc_auth(volumeclient_mount, "manila") - guest_mounts = (self.mounts[2], self.mounts[3]) - - guest_entity = "guest" - group_id = "grpid" - mount_paths = [] - volume_ids = [] - - # Create two volumes. Authorize 'guest' auth ID to mount the two - # volumes. Mount the two volumes. Write data to the volumes. - for i in range(2): - # Create volume. - volume_ids.append("volid_{0}".format(str(i))) - mount_paths.append( - self._volume_client_python(volumeclient_mount, dedent(""" - vp = VolumePath("{group_id}", "{volume_id}") - create_result = vc.create_volume(vp, 10 * 1024 * 1024) - print create_result['mount_path'] - """.format( - group_id=group_id, - volume_id=volume_ids[i] - )))) - - # Authorize 'guest' auth ID to mount the volume. - self._configure_guest_auth(volumeclient_mount, guest_mounts[i], - guest_entity, mount_paths[i]) - - # Mount the volume. - guest_mounts[i].mountpoint_dir_name = 'mnt.{id}.{suffix}'.format( - id=guest_entity, suffix=str(i)) - guest_mounts[i].mount(mount_path=mount_paths[i]) - guest_mounts[i].write_n_mb("data.bin", 1) - - - # Evict client, guest_mounts[0], using auth ID 'guest' and has mounted - # one volume. - self._volume_client_python(self.mount_b, dedent(""" - vp = VolumePath("{group_id}", "{volume_id}") - vc.deauthorize(vp, "{guest_entity}") - vc.evict("{guest_entity}", volume_path=vp) - """.format( - group_id=group_id, - volume_id=volume_ids[0], - guest_entity=guest_entity - ))) - - # Evicted guest client, guest_mounts[0], should not be able to do - # anymore metadata ops. It should start failing all operations - # when it sees that its own address is in the blacklist. - try: - guest_mounts[0].write_n_mb("rogue.bin", 1) - except CommandFailedError: - pass - else: - raise RuntimeError("post-eviction write should have failed!") - - # The blacklisted guest client should now be unmountable - guest_mounts[0].umount_wait() - - # Guest client, guest_mounts[1], using the same auth ID 'guest', but - # has mounted the other volume, should be able to use its volume - # unaffected. - guest_mounts[1].write_n_mb("data.bin.1", 1) - - # Cleanup. - for i in range(2): - self._volume_client_python(volumeclient_mount, dedent(""" - vp = VolumePath("{group_id}", "{volume_id}") - vc.deauthorize(vp, "{guest_entity}") - vc.delete_volume(vp) - vc.purge_volume(vp) - """.format( - group_id=group_id, - volume_id=volume_ids[i], - guest_entity=guest_entity - ))) - - - def test_purge(self): - """ - Reproducer for #15266, exception trying to purge volumes that - contain non-ascii filenames. - - Additionally test any other purge corner cases here. - """ - # I'm going to leave mount_b unmounted and just use it as a handle for - # driving volumeclient. It's a little hacky but we don't have a more - # general concept for librados/libcephfs clients as opposed to full - # blown mounting clients. - self.mount_b.umount_wait() - self._configure_vc_auth(self.mount_b, "manila") - - group_id = "grpid" - # Use a unicode volume ID (like Manila), to reproduce #15266 - volume_id = u"volid" - - # Create - mount_path = self._volume_client_python(self.mount_b, dedent(""" - vp = VolumePath("{group_id}", u"{volume_id}") - create_result = vc.create_volume(vp, 10) - print create_result['mount_path'] - """.format( - group_id=group_id, - volume_id=volume_id - ))) - - # Strip leading "/" - mount_path = mount_path[1:] - - # A file with non-ascii characters - self.mount_a.run_shell(["touch", os.path.join(mount_path, u"b\u00F6b")]) - - # A file with no permissions to do anything - self.mount_a.run_shell(["touch", os.path.join(mount_path, "noperms")]) - self.mount_a.run_shell(["chmod", "0000", os.path.join(mount_path, "noperms")]) - - self._volume_client_python(self.mount_b, dedent(""" - vp = VolumePath("{group_id}", u"{volume_id}") - vc.delete_volume(vp) - vc.purge_volume(vp) - """.format( - group_id=group_id, - volume_id=volume_id - ))) - - # Check it's really gone - self.assertEqual(self.mount_a.ls("volumes/_deleting"), []) - self.assertEqual(self.mount_a.ls("volumes/"), ["_deleting", group_id]) - - def test_readonly_authorization(self): - """ - That guest clients can be restricted to read-only mounts of volumes. - """ - - volumeclient_mount = self.mounts[1] - guest_mount = self.mounts[2] - volumeclient_mount.umount_wait() - guest_mount.umount_wait() - - # Configure volumeclient_mount as the handle for driving volumeclient. - self._configure_vc_auth(volumeclient_mount, "manila") - - guest_entity = "guest" - group_id = "grpid" - volume_id = "volid" - - # Create a volume. - mount_path = self._volume_client_python(volumeclient_mount, dedent(""" - vp = VolumePath("{group_id}", "{volume_id}") - create_result = vc.create_volume(vp, 1024*1024*10) - print create_result['mount_path'] - """.format( - group_id=group_id, - volume_id=volume_id, - ))) - - # Authorize and configure credentials for the guest to mount the - # the volume with read-write access. - self._configure_guest_auth(volumeclient_mount, guest_mount, guest_entity, - mount_path, readonly=False) - - # Mount the volume, and write to it. - guest_mount.mount(mount_path=mount_path) - guest_mount.write_n_mb("data.bin", 1) - - # Change the guest auth ID's authorization to read-only mount access. - self._volume_client_python(volumeclient_mount, dedent(""" - vp = VolumePath("{group_id}", "{volume_id}") - vc.deauthorize(vp, "{guest_entity}") - """.format( - group_id=group_id, - volume_id=volume_id, - guest_entity=guest_entity - ))) - self._configure_guest_auth(volumeclient_mount, guest_mount, guest_entity, - mount_path, readonly=True) - - # The effect of the change in access level to read-only is not - # immediate. The guest sees the change only after a remount of - # the volume. - guest_mount.umount_wait() - guest_mount.mount(mount_path=mount_path) - - # Read existing content of the volume. - self.assertListEqual(guest_mount.ls(guest_mount.mountpoint), ["data.bin"]) - # Cannot write into read-only volume. - with self.assertRaises(CommandFailedError): - guest_mount.write_n_mb("rogue.bin", 1) - - def test_get_authorized_ids(self): - """ - That for a volume, the authorized IDs and their access levels - can be obtained using CephFSVolumeClient's get_authorized_ids(). - """ - volumeclient_mount = self.mounts[1] - volumeclient_mount.umount_wait() - - # Configure volumeclient_mount as the handle for driving volumeclient. - self._configure_vc_auth(volumeclient_mount, "manila") - - group_id = "grpid" - volume_id = "volid" - guest_entity_1 = "guest1" - guest_entity_2 = "guest2" - - log.info("print group ID: {0}".format(group_id)) - - # Create a volume. - auths = self._volume_client_python(volumeclient_mount, dedent(""" - vp = VolumePath("{group_id}", "{volume_id}") - vc.create_volume(vp, 1024*1024*10) - auths = vc.get_authorized_ids(vp) - print auths - """.format( - group_id=group_id, - volume_id=volume_id, - ))) - # Check the list of authorized IDs for the volume. - expected_result = None - self.assertEqual(str(expected_result), auths) - - # Allow two auth IDs access to the volume. - auths = self._volume_client_python(volumeclient_mount, dedent(""" - vp = VolumePath("{group_id}", "{volume_id}") - vc.authorize(vp, "{guest_entity_1}", readonly=False) - vc.authorize(vp, "{guest_entity_2}", readonly=True) - auths = vc.get_authorized_ids(vp) - print auths - """.format( - group_id=group_id, - volume_id=volume_id, - guest_entity_1=guest_entity_1, - guest_entity_2=guest_entity_2, - ))) - # Check the list of authorized IDs and their access levels. - expected_result = [(u'guest1', u'rw'), (u'guest2', u'r')] - self.assertItemsEqual(str(expected_result), auths) - - # Disallow both the auth IDs' access to the volume. - auths = self._volume_client_python(volumeclient_mount, dedent(""" - vp = VolumePath("{group_id}", "{volume_id}") - vc.deauthorize(vp, "{guest_entity_1}") - vc.deauthorize(vp, "{guest_entity_2}") - auths = vc.get_authorized_ids(vp) - print auths - """.format( - group_id=group_id, - volume_id=volume_id, - guest_entity_1=guest_entity_1, - guest_entity_2=guest_entity_2, - ))) - # Check the list of authorized IDs for the volume. - expected_result = None - self.assertItemsEqual(str(expected_result), auths) - - def test_multitenant_volumes(self): - """ - That volume access can be restricted to a tenant. - - That metadata used to enforce tenant isolation of - volumes is stored as a two-way mapping between auth - IDs and volumes that they're authorized to access. - """ - volumeclient_mount = self.mounts[1] - volumeclient_mount.umount_wait() - - # Configure volumeclient_mount as the handle for driving volumeclient. - self._configure_vc_auth(volumeclient_mount, "manila") - - group_id = "groupid" - volume_id = "volumeid" - - # Guest clients belonging to different tenants, but using the same - # auth ID. - auth_id = "guest" - guestclient_1 = { - "auth_id": auth_id, - "tenant_id": "tenant1", - } - guestclient_2 = { - "auth_id": auth_id, - "tenant_id": "tenant2", - } - - # Create a volume. - self._volume_client_python(volumeclient_mount, dedent(""" - vp = VolumePath("{group_id}", "{volume_id}") - vc.create_volume(vp, 1024*1024*10) - """.format( - group_id=group_id, - volume_id=volume_id, - ))) - - # Check that volume metadata file is created on volume creation. - vol_metadata_filename = "_{0}:{1}.meta".format(group_id, volume_id) - self.assertIn(vol_metadata_filename, self.mounts[0].ls("volumes")) - - # Authorize 'guestclient_1', using auth ID 'guest' and belonging to - # 'tenant1', with 'rw' access to the volume. - self._volume_client_python(volumeclient_mount, dedent(""" - vp = VolumePath("{group_id}", "{volume_id}") - vc.authorize(vp, "{auth_id}", tenant_id="{tenant_id}") - """.format( - group_id=group_id, - volume_id=volume_id, - auth_id=guestclient_1["auth_id"], - tenant_id=guestclient_1["tenant_id"] - ))) - - # Check that auth metadata file for auth ID 'guest', is - # created on authorizing 'guest' access to the volume. - auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"]) - self.assertIn(auth_metadata_filename, self.mounts[0].ls("volumes")) - - # Verify that the auth metadata file stores the tenant ID that the - # auth ID belongs to, the auth ID's authorized access levels - # for different volumes, versioning details, etc. - expected_auth_metadata = { - u"version": 2, - u"compat_version": 1, - u"dirty": False, - u"tenant_id": u"tenant1", - u"volumes": { - u"groupid/volumeid": { - u"dirty": False, - u"access_level": u"rw", - } - } - } - - auth_metadata = self._volume_client_python(volumeclient_mount, dedent(""" - vp = VolumePath("{group_id}", "{volume_id}") - auth_metadata = vc._auth_metadata_get("{auth_id}") - print auth_metadata - """.format( - group_id=group_id, - volume_id=volume_id, - auth_id=guestclient_1["auth_id"], - ))) - - self.assertItemsEqual(str(expected_auth_metadata), auth_metadata) - - # Verify that the volume metadata file stores info about auth IDs - # and their access levels to the volume, versioning details, etc. - expected_vol_metadata = { - u"version": 2, - u"compat_version": 1, - u"auths": { - u"guest": { - u"dirty": False, - u"access_level": u"rw" - } - } - } - - vol_metadata = self._volume_client_python(volumeclient_mount, dedent(""" - vp = VolumePath("{group_id}", "{volume_id}") - volume_metadata = vc._volume_metadata_get(vp) - print volume_metadata - """.format( - group_id=group_id, - volume_id=volume_id, - ))) - self.assertItemsEqual(str(expected_vol_metadata), vol_metadata) - - # Cannot authorize 'guestclient_2' to access the volume. - # It uses auth ID 'guest', which has already been used by a - # 'guestclient_1' belonging to an another tenant for accessing - # the volume. - with self.assertRaises(CommandFailedError): - self._volume_client_python(volumeclient_mount, dedent(""" - vp = VolumePath("{group_id}", "{volume_id}") - vc.authorize(vp, "{auth_id}", tenant_id="{tenant_id}") - """.format( - group_id=group_id, - volume_id=volume_id, - auth_id=guestclient_2["auth_id"], - tenant_id=guestclient_2["tenant_id"] - ))) - - # Check that auth metadata file is cleaned up on removing - # auth ID's only access to a volume. - self._volume_client_python(volumeclient_mount, dedent(""" - vp = VolumePath("{group_id}", "{volume_id}") - vc.deauthorize(vp, "{guest_entity}") - """.format( - group_id=group_id, - volume_id=volume_id, - guest_entity=guestclient_1["auth_id"] - ))) - - self.assertNotIn(auth_metadata_filename, self.mounts[0].ls("volumes")) - - # Check that volume metadata file is cleaned up on volume deletion. - self._volume_client_python(volumeclient_mount, dedent(""" - vp = VolumePath("{group_id}", "{volume_id}") - vc.delete_volume(vp) - """.format( - group_id=group_id, - volume_id=volume_id, - ))) - self.assertNotIn(vol_metadata_filename, self.mounts[0].ls("volumes")) - - def test_recover_metadata(self): - """ - That volume client can recover from partial auth updates using - metadata files, which store auth info and its update status info. - """ - volumeclient_mount = self.mounts[1] - volumeclient_mount.umount_wait() - - # Configure volumeclient_mount as the handle for driving volumeclient. - self._configure_vc_auth(volumeclient_mount, "manila") - - group_id = "groupid" - volume_id = "volumeid" - - guestclient = { - "auth_id": "guest", - "tenant_id": "tenant", - } - - # Create a volume. - self._volume_client_python(volumeclient_mount, dedent(""" - vp = VolumePath("{group_id}", "{volume_id}") - vc.create_volume(vp, 1024*1024*10) - """.format( - group_id=group_id, - volume_id=volume_id, - ))) - - # Authorize 'guestclient' access to the volume. - self._volume_client_python(volumeclient_mount, dedent(""" - vp = VolumePath("{group_id}", "{volume_id}") - vc.authorize(vp, "{auth_id}", tenant_id="{tenant_id}") - """.format( - group_id=group_id, - volume_id=volume_id, - auth_id=guestclient["auth_id"], - tenant_id=guestclient["tenant_id"] - ))) - - # Check that auth metadata file for auth ID 'guest' is created. - auth_metadata_filename = "${0}.meta".format(guestclient["auth_id"]) - self.assertIn(auth_metadata_filename, self.mounts[0].ls("volumes")) - - # Induce partial auth update state by modifying the auth metadata file, - # and then run recovery procedure. - self._volume_client_python(volumeclient_mount, dedent(""" - vp = VolumePath("{group_id}", "{volume_id}") - auth_metadata = vc._auth_metadata_get("{auth_id}") - auth_metadata['dirty'] = True - vc._auth_metadata_set("{auth_id}", auth_metadata) - vc.recover() - """.format( - group_id=group_id, - volume_id=volume_id, - auth_id=guestclient["auth_id"], - ))) - - def test_put_object(self): - vc_mount = self.mounts[1] - vc_mount.umount_wait() - self._configure_vc_auth(vc_mount, "manila") - - obj_data = 'test data' - obj_name = 'test_vc_obj_1' - pool_name = self.fs.get_data_pool_names()[0] - - self._volume_client_python(vc_mount, dedent(""" - vc.put_object("{pool_name}", "{obj_name}", b"{obj_data}") - """.format( - pool_name = pool_name, - obj_name = obj_name, - obj_data = obj_data - ))) - - read_data = self.fs.rados(['get', obj_name, '-'], pool=pool_name) - self.assertEqual(obj_data, read_data) - - def test_get_object(self): - vc_mount = self.mounts[1] - vc_mount.umount_wait() - self._configure_vc_auth(vc_mount, "manila") - - obj_data = 'test_data' - obj_name = 'test_vc_ob_2' - pool_name = self.fs.get_data_pool_names()[0] - - self.fs.rados(['put', obj_name, '-'], pool=pool_name, stdin_data=obj_data) - - self._volume_client_python(vc_mount, dedent(""" - data_read = vc.get_object("{pool_name}", "{obj_name}") - assert data_read == b"{obj_data}" - """.format( - pool_name = pool_name, - obj_name = obj_name, - obj_data = obj_data - ))) - - def test_delete_object(self): - vc_mount = self.mounts[1] - vc_mount.umount_wait() - self._configure_vc_auth(vc_mount, "manila") - - obj_data = 'test data' - obj_name = 'test_vc_obj_3' - pool_name = self.fs.get_data_pool_names()[0] - - self.fs.rados(['put', obj_name, '-'], pool=pool_name, stdin_data=obj_data) - - self._volume_client_python(vc_mount, dedent(""" - data_read = vc.delete_object("{pool_name}", "{obj_name}") - """.format( - pool_name = pool_name, - obj_name = obj_name, - ))) - - with self.assertRaises(CommandFailedError): - self.fs.rados(['stat', obj_name], pool=pool_name) - - # Check idempotency -- no error raised trying to delete non-existent - # object - self._volume_client_python(vc_mount, dedent(""" - data_read = vc.delete_object("{pool_name}", "{obj_name}") - """.format( - pool_name = pool_name, - obj_name = obj_name, - ))) - - def test_21501(self): - """ - Reproducer for #21501 "ceph_volume_client: sets invalid caps for - existing IDs with no caps" (http://tracker.ceph.com/issues/21501) - """ - - vc_mount = self.mounts[1] - vc_mount.umount_wait() - - # Configure vc_mount as the handle for driving volumeclient - self._configure_vc_auth(vc_mount, "manila") - - # Create a volume - group_id = "grpid" - volume_id = "volid" - mount_path = self._volume_client_python(vc_mount, dedent(""" - vp = VolumePath("{group_id}", "{volume_id}") - create_result = vc.create_volume(vp, 1024*1024*10) - print create_result['mount_path'] - """.format( - group_id=group_id, - volume_id=volume_id - ))) - - # Create an auth ID with no caps - guest_id = '21501' - self.fs.mon_manager.raw_cluster_cmd_result( - 'auth', 'get-or-create', 'client.{0}'.format(guest_id)) - - guest_mount = self.mounts[2] - guest_mount.umount_wait() - - # Set auth caps for the auth ID using the volumeclient - self._configure_guest_auth(vc_mount, guest_mount, guest_id, mount_path) - - # Mount the volume in the guest using the auth ID to assert that the - # auth caps are valid - guest_mount.mount(mount_path=mount_path) |