src/ceph/qa/tasks/mon_seesaw.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198

from cStringIO import StringIO

import contextlib
import logging
import random

from teuthology import misc as teuthology
from teuthology.orchestra import run

from ceph_manager import CephManager, write_conf


log = logging.getLogger(__name__)


def _get_mons(ctx):
    return [name[len('mon.'):] for name in teuthology.get_mon_names(ctx)]


# teuthology prepares the monitor IPs (and ports) in get_mons(), we can
# enumerate all monitor ports ([6789..]), and find the next available one.
def _get_next_port(ctx, ip, cluster):
    # assuming we have only one cluster here.
    used = []
    for name in teuthology.get_mon_names(ctx, cluster):
        addr = ctx.ceph[cluster].conf[name]['mon addr']
        mon_ip, mon_port = addr.split(':')
        if mon_ip != ip:
            continue
        used.append(int(mon_port))
    port = 6789
    used.sort()
    for p in used:
        if p != port:
            break
        port += 1
    return port


def _setup_mon(ctx, manager, remote, mon, name, data_path, conf_path):
    # co-locate a new monitor on remote where an existing monitor is hosted
    cluster = manager.cluster
    remote.run(args=['sudo', 'mkdir', '-p', data_path])
    keyring_path = '/etc/ceph/{cluster}.keyring'.format(
        cluster=manager.cluster)
    testdir = teuthology.get_testdir(ctx)
    monmap_path = '{tdir}/{cluster}.monmap'.format(tdir=testdir,
                                                   cluster=cluster)
    manager.raw_cluster_cmd('mon', 'getmap', '-o', monmap_path)
    if manager.controller != remote:
        monmap = teuthology.get_file(manager.controller, monmap_path)
        teuthology.write_file(remote, monmap_path, StringIO(monmap))
    remote.run(
        args=[
            'sudo',
            'ceph-mon',
            '--cluster', cluster,
            '--mkfs',
            '-i', mon,
            '--monmap', monmap_path,
            '--keyring', keyring_path])
    if manager.controller != remote:
        teuthology.delete_file(remote, monmap_path)
    # raw_cluster_cmd() is performed using sudo, so sudo here also.
    teuthology.delete_file(manager.controller, monmap_path, sudo=True)
    # update ceph.conf so that the ceph CLI is able to connect to the cluster
    if conf_path:
        ip = remote.ip_address
        port = _get_next_port(ctx, ip, cluster)
        mon_addr = '{ip}:{port}'.format(ip=ip, port=port)
        ctx.ceph[cluster].conf[name] = {'mon addr': mon_addr}
        write_conf(ctx, conf_path, cluster)


def _teardown_mon(ctx, manager, remote, name, data_path, conf_path):
    cluster = manager.cluster
    del ctx.ceph[cluster].conf[name]
    write_conf(ctx, conf_path, cluster)
    remote.run(args=['sudo', 'rm', '-rf', data_path])


@contextlib.contextmanager
def _prepare_mon(ctx, manager, remote, mon):
    cluster = manager.cluster
    data_path = '/var/lib/ceph/mon/{cluster}-{id}'.format(
        cluster=cluster, id=mon)
    conf_path = '/etc/ceph/{cluster}.conf'.format(cluster=cluster)
    name = 'mon.{0}'.format(mon)
    _setup_mon(ctx, manager, remote, mon, name, data_path, conf_path)
    yield
    _teardown_mon(ctx, manager, remote, name,
                  data_path, conf_path)


# run_daemon() in ceph.py starts a herd of daemons of the same type, but
# _run_daemon() starts only one instance.
@contextlib.contextmanager
def _run_daemon(ctx, remote, cluster, type_, id_):
    testdir = teuthology.get_testdir(ctx)
    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
    daemon_signal = 'kill'
    run_cmd = [
        'sudo',
        'adjust-ulimits',
        'ceph-coverage',
        coverage_dir,
        'daemon-helper',
        daemon_signal,
    ]
    run_cmd_tail = [
        'ceph-%s' % (type_),
        '-f',
        '--cluster', cluster,
        '-i', id_]
    run_cmd.extend(run_cmd_tail)
    ctx.daemons.add_daemon(remote, type_, id_,
                           cluster=cluster,
                           args=run_cmd,
                           logger=log.getChild(type_),
                           stdin=run.PIPE,
                           wait=False)
    daemon = ctx.daemons.get_daemon(type_, id_, cluster)
    yield daemon
    daemon.stop()


@contextlib.contextmanager
def task(ctx, config):
    """
    replace a monitor with a newly added one, and then revert this change

    How it works::
    1. add a mon with specified id (mon.victim_prime)
    2. wait for quorum
    3. remove a monitor with specified id (mon.victim), mon.victim will commit
       suicide
    4. wait for quorum
    5. <yield>
    5. add mon.a back, and start it
    6. wait for quorum
    7. remove mon.a_prime

    Options::
    victim       the id of the mon to be removed (pick a random mon by default)
    replacer     the id of the new mon (use "${victim}_prime" if not specified)
    """
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
    manager = CephManager(mon, ctx=ctx, logger=log.getChild('ceph_manager'))

    if config is None:
        config = {}
    assert isinstance(config, dict), \
        "task ceph only supports a dictionary for configuration"
    overrides = ctx.config.get('overrides', {})
    teuthology.deep_merge(config, overrides.get('mon_seesaw', {}))
    victim = config.get('victim', random.choice(_get_mons(ctx)))
    replacer = config.get('replacer', '{0}_prime'.format(victim))
    remote = manager.find_remote('mon', victim)
    quorum = manager.get_mon_quorum()
    cluster = manager.cluster
    log.info('replacing {victim} with {replacer}'.format(victim=victim,
                                                         replacer=replacer))
    with _prepare_mon(ctx, manager, remote, replacer):
        with _run_daemon(ctx, remote, cluster, 'mon', replacer):
            # replacer will join the quorum automatically
            manager.wait_for_mon_quorum_size(len(quorum) + 1, 10)
            # if we don't remove the victim from monmap, there is chance that
            # we are leaving the new joiner with a monmap of 2 mon, and it will
            # not able to reach the other one, it will be keeping probing for
            # ever.
            log.info('removing {mon}'.format(mon=victim))
            manager.raw_cluster_cmd('mon', 'remove', victim)
            manager.wait_for_mon_quorum_size(len(quorum), 10)
            # the victim will commit suicide after being removed from
            # monmap, let's wait until it stops.
            ctx.daemons.get_daemon('mon', victim, cluster).wait(10)
            try:
                # perform other tasks
                yield
            finally:
                # bring the victim back online
                # nuke the monstore of victim, otherwise it will refuse to boot
                # with following message:
                #
                # not in monmap and have been in a quorum before; must have
                # been removed
                log.info('re-adding {mon}'.format(mon=victim))
                data_path = '/var/lib/ceph/mon/{cluster}-{id}'.format(
                    cluster=cluster, id=victim)
                remote.run(args=['sudo', 'rm', '-rf', data_path])
                name = 'mon.{0}'.format(victim)
                _setup_mon(ctx, manager, remote, victim, name, data_path, None)
                log.info('reviving {mon}'.format(mon=victim))
                manager.revive_mon(victim)
                manager.wait_for_mon_quorum_size(len(quorum) + 1, 10)
                manager.raw_cluster_cmd('mon', 'remove', replacer)
                manager.wait_for_mon_quorum_size(len(quorum), 10)