diff options
Diffstat (limited to 'src/ceph/qa/tasks/mgr/test_failover.py')
-rw-r--r-- | src/ceph/qa/tasks/mgr/test_failover.py | 144 |
1 files changed, 0 insertions, 144 deletions
diff --git a/src/ceph/qa/tasks/mgr/test_failover.py b/src/ceph/qa/tasks/mgr/test_failover.py deleted file mode 100644 index 0dd9cb7..0000000 --- a/src/ceph/qa/tasks/mgr/test_failover.py +++ /dev/null @@ -1,144 +0,0 @@ - -import logging -import json - -from tasks.mgr.mgr_test_case import MgrTestCase - - -log = logging.getLogger(__name__) - - -class TestFailover(MgrTestCase): - MGRS_REQUIRED = 2 - - def test_timeout(self): - """ - That when an active mgr stops responding, a standby is promoted - after mon_mgr_beacon_grace. - """ - - # Query which mgr is active - original_active = self.mgr_cluster.get_active_id() - original_standbys = self.mgr_cluster.get_standby_ids() - - # Stop that daemon - self.mgr_cluster.mgr_stop(original_active) - - # Assert that the other mgr becomes active - self.wait_until_true( - lambda: self.mgr_cluster.get_active_id() in original_standbys, - timeout=60 - ) - - self.mgr_cluster.mgr_restart(original_active) - self.wait_until_true( - lambda: original_active in self.mgr_cluster.get_standby_ids(), - timeout=10 - ) - - def test_timeout_nostandby(self): - """ - That when an active mgr stop responding, and no standby is - available, the active mgr is removed from the map anyway. - """ - # Query which mgr is active - original_active = self.mgr_cluster.get_active_id() - original_standbys = self.mgr_cluster.get_standby_ids() - - for s in original_standbys: - self.mgr_cluster.mgr_stop(s) - self.mgr_cluster.mgr_fail(s) - - self.assertListEqual(self.mgr_cluster.get_standby_ids(), []) - self.assertEqual(self.mgr_cluster.get_active_id(), original_active) - - grace = int(self.mgr_cluster.get_config("mon_mgr_beacon_grace")) - log.info("Should time out in about {0} seconds".format(grace)) - - self.mgr_cluster.mgr_stop(original_active) - - # Now wait for the mon to notice the mgr is gone and remove it - # from the map. - self.wait_until_equal( - lambda: self.mgr_cluster.get_active_id(), - "", - timeout=grace * 2 - ) - - self.assertListEqual(self.mgr_cluster.get_standby_ids(), []) - self.assertEqual(self.mgr_cluster.get_active_id(), "") - - def test_explicit_fail(self): - """ - That when a user explicitly fails a daemon, a standby immediately - replaces it. - :return: - """ - # Query which mgr is active - original_active = self.mgr_cluster.get_active_id() - original_standbys = self.mgr_cluster.get_standby_ids() - - self.mgr_cluster.mgr_fail(original_active) - - # A standby should take over - self.wait_until_true( - lambda: self.mgr_cluster.get_active_id() in original_standbys, - timeout=60 - ) - - # The one we failed should come back as a standby (he isn't - # really dead) - self.wait_until_true( - lambda: original_active in self.mgr_cluster.get_standby_ids(), - timeout=10 - ) - - # Both daemons should have fully populated metadata - # (regression test for http://tracker.ceph.com/issues/21260) - meta = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd( - "mgr", "metadata")) - id_to_meta = dict([(i['id'], i) for i in meta]) - for i in [original_active] + original_standbys: - self.assertIn(i, id_to_meta) - self.assertIn('ceph_version', id_to_meta[i]) - - # We should be able to fail back over again: the exercises - # our re-initialization of the python runtime within - # a single process lifetime. - - # Get rid of any bystander standbys so that the original_active - # will be selected as next active. - new_active = self.mgr_cluster.get_active_id() - for daemon in original_standbys: - if daemon != new_active: - self.mgr_cluster.mgr_stop(daemon) - self.mgr_cluster.mgr_fail(daemon) - - self.assertListEqual(self.mgr_cluster.get_standby_ids(), - [original_active]) - - self.mgr_cluster.mgr_stop(new_active) - self.mgr_cluster.mgr_fail(new_active) - - self.assertEqual(self.mgr_cluster.get_active_id(), original_active) - self.assertEqual(self.mgr_cluster.get_standby_ids(), []) - - def test_standby_timeout(self): - """ - That when a standby daemon stops sending beacons, it is - removed from the list of standbys - :return: - """ - original_active = self.mgr_cluster.get_active_id() - original_standbys = self.mgr_cluster.get_standby_ids() - - victim = original_standbys[0] - self.mgr_cluster.mgr_stop(victim) - - expect_standbys = set(original_standbys) - {victim} - - self.wait_until_true( - lambda: set(self.mgr_cluster.get_standby_ids()) == expect_standbys, - timeout=60 - ) - self.assertEqual(self.mgr_cluster.get_active_id(), original_active) |