summaryrefslogtreecommitdiffstats
path: root/src/ceph/qa/tasks/rbd.py
blob: d45636a5692d74010d2cb3b0750b280043a89ccb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
"""
Rbd testing task
"""
import contextlib
import logging
import os
import tempfile

from cStringIO import StringIO
from teuthology.orchestra import run
from teuthology import misc as teuthology
from teuthology import contextutil
from teuthology.parallel import parallel
from teuthology.task.common_fs_utils import generic_mkfs
from teuthology.task.common_fs_utils import generic_mount
from teuthology.task.common_fs_utils import default_image_name

log = logging.getLogger(__name__)

@contextlib.contextmanager
def create_image(ctx, config):
    """
    Create an rbd image.

    For example::

        tasks:
        - ceph:
        - rbd.create_image:
            client.0:
                image_name: testimage
                image_size: 100
                image_format: 1
            client.1:

    Image size is expressed as a number of megabytes; default value
    is 10240.

    Image format value must be either 1 or 2; default value is 1.

    """
    assert isinstance(config, dict) or isinstance(config, list), \
        "task create_image only supports a list or dictionary for configuration"

    if isinstance(config, dict):
        images = config.items()
    else:
        images = [(role, None) for role in config]

    testdir = teuthology.get_testdir(ctx)
    for role, properties in images:
        if properties is None:
            properties = {}
        name = properties.get('image_name', default_image_name(role))
        size = properties.get('image_size', 10240)
        fmt = properties.get('image_format', 1)
        (remote,) = ctx.cluster.only(role).remotes.keys()
        log.info('Creating image {name} with size {size}'.format(name=name,
                                                                 size=size))
        args = [
                'adjust-ulimits',
                'ceph-coverage'.format(tdir=testdir),
                '{tdir}/archive/coverage'.format(tdir=testdir),
                'rbd',
                '-p', 'rbd',
                'create',
                '--size', str(size),
                name,
            ]
        # omit format option if using the default (format 1)
        # since old versions of don't support it
        if int(fmt) != 1:
            args += ['--image-format', str(fmt)]
        remote.run(args=args)
    try:
        yield
    finally:
        log.info('Deleting rbd images...')
        for role, properties in images:
            if properties is None:
                properties = {}
            name = properties.get('image_name', default_image_name(role))
            (remote,) = ctx.cluster.only(role).remotes.keys()
            remote.run(
                args=[
                    'adjust-ulimits',
                    'ceph-coverage',
                    '{tdir}/archive/coverage'.format(tdir=testdir),
                    'rbd',
                    '-p', 'rbd',
                    'rm',
                    name,
                    ],
                )

@contextlib.contextmanager
def clone_image(ctx, config):
    """
    Clones a parent imag

    For example::

        tasks:
        - ceph:
        - rbd.clone_image:
            client.0:
                parent_name: testimage
                image_name: cloneimage
    """
    assert isinstance(config, dict) or isinstance(config, list), \
        "task clone_image only supports a list or dictionary for configuration"

    if isinstance(config, dict):
        images = config.items()
    else:
        images = [(role, None) for role in config]

    testdir = teuthology.get_testdir(ctx)
    for role, properties in images:
        if properties is None:
            properties = {}

        name = properties.get('image_name', default_image_name(role))
        parent_name = properties.get('parent_name')
        assert parent_name is not None, \
            "parent_name is required"
        parent_spec = '{name}@{snap}'.format(name=parent_name, snap=name)

        (remote,) = ctx.cluster.only(role).remotes.keys()
        log.info('Clone image {parent} to {child}'.format(parent=parent_name,
                                                          child=name))
        for cmd in [('snap', 'create', parent_spec),
                    ('snap', 'protect', parent_spec),
                    ('clone', parent_spec, name)]:
            args = [
                    'adjust-ulimits',
                    'ceph-coverage'.format(tdir=testdir),
                    '{tdir}/archive/coverage'.format(tdir=testdir),
                    'rbd', '-p', 'rbd'
                    ]
            args.extend(cmd)
            remote.run(args=args)

    try:
        yield
    finally:
        log.info('Deleting rbd clones...')
        for role, properties in images:
            if properties is None:
                properties = {}
            name = properties.get('image_name', default_image_name(role))
            parent_name = properties.get('parent_name')
            parent_spec = '{name}@{snap}'.format(name=parent_name, snap=name)

            (remote,) = ctx.cluster.only(role).remotes.keys()

            for cmd in [('rm', name),
                        ('snap', 'unprotect', parent_spec),
                        ('snap', 'rm', parent_spec)]:
                args = [
                        'adjust-ulimits',
                        'ceph-coverage'.format(tdir=testdir),
                        '{tdir}/archive/coverage'.format(tdir=testdir),
                        'rbd', '-p', 'rbd'
                        ]
                args.extend(cmd)
                remote.run(args=args)

@contextlib.contextmanager
def modprobe(ctx, config):
    """
    Load the rbd kernel module..

    For example::

        tasks:
        - ceph:
        - rbd.create_image: [client.0]
        - rbd.modprobe: [client.0]
    """
    log.info('Loading rbd kernel module...')
    for role in config:
        (remote,) = ctx.cluster.only(role).remotes.keys()
        remote.run(
            args=[
                'sudo',
                'modprobe',
                'rbd',
                ],
            )
    try:
        yield
    finally:
        log.info('Unloading rbd kernel module...')
        for role in config:
            (remote,) = ctx.cluster.only(role).remotes.keys()
            remote.run(
                args=[
                    'sudo',
                    'modprobe',
                    '-r',
                    'rbd',
                    # force errors to be ignored; necessary if more
                    # than one device was created, which may mean
                    # the module isn't quite ready to go the first
                    # time through.
                    run.Raw('||'),
                    'true',
                    ],
                )

@contextlib.contextmanager
def dev_create(ctx, config):
    """
    Map block devices to rbd images.

    For example::

        tasks:
        - ceph:
        - rbd.create_image: [client.0]
        - rbd.modprobe: [client.0]
        - rbd.dev_create:
            client.0: testimage.client.0
    """
    assert isinstance(config, dict) or isinstance(config, list), \
        "task dev_create only supports a list or dictionary for configuration"

    if isinstance(config, dict):
        role_images = config.items()
    else:
        role_images = [(role, None) for role in config]

    log.info('Creating rbd block devices...')

    testdir = teuthology.get_testdir(ctx)

    for role, image in role_images:
        if image is None:
            image = default_image_name(role)
        (remote,) = ctx.cluster.only(role).remotes.keys()

        remote.run(
            args=[
                'sudo',
                'adjust-ulimits',
                'ceph-coverage',
                '{tdir}/archive/coverage'.format(tdir=testdir),
                'rbd',
                '--user', role.rsplit('.')[-1],
                '-p', 'rbd',
                'map',
                image,
                run.Raw('&&'),
                # wait for the symlink to be created by udev
                'while', 'test', '!', '-e', '/dev/rbd/rbd/{image}'.format(image=image), run.Raw(';'), 'do',
                'sleep', '1', run.Raw(';'),
                'done',
                ],
            )
    try:
        yield
    finally:
        log.info('Unmapping rbd devices...')
        for role, image in role_images:
            if image is None:
                image = default_image_name(role)
            (remote,) = ctx.cluster.only(role).remotes.keys()
            remote.run(
                args=[
                    'LD_LIBRARY_PATH={tdir}/binary/usr/local/lib'.format(tdir=testdir),
                    'sudo',
                    'adjust-ulimits',
                    'ceph-coverage',
                    '{tdir}/archive/coverage'.format(tdir=testdir),
                    'rbd',
                    '-p', 'rbd',
                    'unmap',
                    '/dev/rbd/rbd/{imgname}'.format(imgname=image),
                    run.Raw('&&'),
                    # wait for the symlink to be deleted by udev
                    'while', 'test', '-e', '/dev/rbd/rbd/{image}'.format(image=image),
                    run.Raw(';'),
                    'do',
                    'sleep', '1', run.Raw(';'),
                    'done',
                    ],
                )


def rbd_devname_rtn(ctx, image):
    return '/dev/rbd/rbd/{image}'.format(image=image)    

def canonical_path(ctx, role, path):
    """
    Determine the canonical path for a given path on the host
    representing the given role.  A canonical path contains no
    . or .. components, and includes no symbolic links.
    """
    version_fp = StringIO()
    ctx.cluster.only(role).run(
        args=[ 'readlink', '-f', path ],
        stdout=version_fp,
        )
    canonical_path = version_fp.getvalue().rstrip('\n')
    version_fp.close()
    return canonical_path

@contextlib.contextmanager
def run_xfstests(ctx, config):
    """
    Run xfstests over specified devices.

    Warning: both the test and scratch devices specified will be
    overwritten.  Normally xfstests modifies (but does not destroy)
    the test device, but for now the run script used here re-makes
    both filesystems.

    Note: Only one instance of xfstests can run on a single host at
    a time, although this is not enforced.

    This task in its current form needs some improvement.  For
    example, it assumes all roles provided in the config are
    clients, and that the config provided is a list of key/value
    pairs.  For now please use the xfstests() interface, below.

    For example::

        tasks:
        - ceph:
        - rbd.run_xfstests:
            client.0:
                count: 2
                test_dev: 'test_dev'
                scratch_dev: 'scratch_dev'
                fs_type: 'xfs'
                tests: 'generic/100 xfs/003 xfs/005 xfs/006 generic/015'
                exclude:
                - generic/42
                randomize: true
    """
    with parallel() as p:
        for role, properties in config.items():
            p.spawn(run_xfstests_one_client, ctx, role, properties)
    yield

def run_xfstests_one_client(ctx, role, properties):
    """
    Spawned routine to handle xfs tests for a single client
    """
    testdir = teuthology.get_testdir(ctx)
    try:
        count = properties.get('count')
        test_dev = properties.get('test_dev')
        assert test_dev is not None, \
            "task run_xfstests requires test_dev to be defined"
        test_dev = canonical_path(ctx, role, test_dev)

        scratch_dev = properties.get('scratch_dev')
        assert scratch_dev is not None, \
            "task run_xfstests requires scratch_dev to be defined"
        scratch_dev = canonical_path(ctx, role, scratch_dev)

        fs_type = properties.get('fs_type')
        tests = properties.get('tests')
        exclude_list = properties.get('exclude')
        randomize = properties.get('randomize')

        (remote,) = ctx.cluster.only(role).remotes.keys()

        # Fetch the test script
        test_root = teuthology.get_testdir(ctx)
        test_script = 'run_xfstests.sh'
        test_path = os.path.join(test_root, test_script)

        xfstests_url = properties.get('xfstests_url')
        assert xfstests_url is not None, \
            "task run_xfstests requires xfstests_url to be defined"

        xfstests_krbd_url = xfstests_url + '/' + test_script

        log.info('Fetching {script} for {role} from {url}'.format(
            script=test_script,
            role=role,
            url=xfstests_krbd_url))

        args = [ 'wget', '-O', test_path, '--', xfstests_krbd_url ]
        remote.run(args=args)

        log.info('Running xfstests on {role}:'.format(role=role))
        log.info('   iteration count: {count}:'.format(count=count))
        log.info('       test device: {dev}'.format(dev=test_dev))
        log.info('    scratch device: {dev}'.format(dev=scratch_dev))
        log.info('     using fs_type: {fs_type}'.format(fs_type=fs_type))
        log.info('      tests to run: {tests}'.format(tests=tests))
        log.info('      exclude list: {}'.format(' '.join(exclude_list)))
        log.info('         randomize: {randomize}'.format(randomize=randomize))

        if exclude_list:
            with tempfile.NamedTemporaryFile(bufsize=0, prefix='exclude') as exclude_file:
                for test in exclude_list:
                    exclude_file.write("{}\n".format(test))
                remote.put_file(exclude_file.name, exclude_file.name)

        # Note that the device paths are interpreted using
        # readlink -f <path> in order to get their canonical
        # pathname (so it matches what the kernel remembers).
        args = [
            '/usr/bin/sudo',
            'TESTDIR={tdir}'.format(tdir=testdir),
            'adjust-ulimits',
            'ceph-coverage',
            '{tdir}/archive/coverage'.format(tdir=testdir),
            '/bin/bash',
            test_path,
            '-c', str(count),
            '-f', fs_type,
            '-t', test_dev,
            '-s', scratch_dev,
            ]
        if exclude_list:
            args.extend(['-x', exclude_file.name])
        if randomize:
            args.append('-r')
        if tests:
            args.extend(['--', tests])
        remote.run(args=args, logger=log.getChild(role))
    finally:
        log.info('Removing {script} on {role}'.format(script=test_script,
                                                      role=role))
        remote.run(args=['rm', '-f', test_path])

@contextlib.contextmanager
def xfstests(ctx, config):
    """
    Run xfstests over rbd devices.  This interface sets up all
    required configuration automatically if not otherwise specified.
    Note that only one instance of xfstests can run on a single host
    at a time.  By default, the set of tests specified is run once.
    If a (non-zero) count value is supplied, the complete set of
    tests will be run that number of times.

    For example::

        tasks:
        - ceph:
        # Image sizes are in MB
        - rbd.xfstests:
            client.0:
                count: 3
                test_image: 'test_image'
                test_size: 250
                test_format: 2
                scratch_image: 'scratch_image'
                scratch_size: 250
                scratch_format: 1
                fs_type: 'xfs'
                tests: 'generic/100 xfs/003 xfs/005 xfs/006 generic/015'
                exclude:
                - generic/42
                randomize: true
                xfstests_branch: master
                xfstests_url: 'https://raw.github.com/ceph/branch/master/qa'
    """
    if config is None:
        config = { 'all': None }
    assert isinstance(config, dict) or isinstance(config, list), \
        "task xfstests only supports a list or dictionary for configuration"
    if isinstance(config, dict):
        config = teuthology.replace_all_with_clients(ctx.cluster, config)
        runs = config.items()
    else:
        runs = [(role, None) for role in config]

    running_xfstests = {}
    for role, properties in runs:
        assert role.startswith('client.'), \
            "task xfstests can only run on client nodes"
        for host, roles_for_host in ctx.cluster.remotes.items():
            if role in roles_for_host:
                assert host not in running_xfstests, \
                    "task xfstests allows only one instance at a time per host"
                running_xfstests[host] = True

    images_config = {}
    scratch_config = {}
    modprobe_config = {}
    image_map_config = {}
    scratch_map_config = {}
    xfstests_config = {}
    for role, properties in runs:
        if properties is None:
            properties = {}

        test_image = properties.get('test_image', 'test_image.{role}'.format(role=role))
        test_size = properties.get('test_size', 10000) # 10G
        test_fmt = properties.get('test_format', 1)
        scratch_image = properties.get('scratch_image', 'scratch_image.{role}'.format(role=role))
        scratch_size = properties.get('scratch_size', 10000) # 10G
        scratch_fmt = properties.get('scratch_format', 1)

        images_config[role] = dict(
            image_name=test_image,
            image_size=test_size,
            image_format=test_fmt,
            )

        scratch_config[role] = dict(
            image_name=scratch_image,
            image_size=scratch_size,
            image_format=scratch_fmt,
            )

        xfstests_branch = properties.get('xfstests_branch', 'master')
        xfstests_url = properties.get('xfstests_url', 'https://raw.github.com/ceph/ceph/{branch}/qa'.format(branch=xfstests_branch))

        xfstests_config[role] = dict(
            count=properties.get('count', 1),
            test_dev='/dev/rbd/rbd/{image}'.format(image=test_image),
            scratch_dev='/dev/rbd/rbd/{image}'.format(image=scratch_image),
            fs_type=properties.get('fs_type', 'xfs'),
            randomize=properties.get('randomize', False),
            tests=properties.get('tests'),
            exclude=properties.get('exclude', []),
            xfstests_url=xfstests_url,
            )

        log.info('Setting up xfstests using RBD images:')
        log.info('      test ({size} MB): {image}'.format(size=test_size,
                                                        image=test_image))
        log.info('   scratch ({size} MB): {image}'.format(size=scratch_size,
                                                        image=scratch_image))
        modprobe_config[role] = None
        image_map_config[role] = test_image
        scratch_map_config[role] = scratch_image

    with contextutil.nested(
        lambda: create_image(ctx=ctx, config=images_config),
        lambda: create_image(ctx=ctx, config=scratch_config),
        lambda: modprobe(ctx=ctx, config=modprobe_config),
        lambda: dev_create(ctx=ctx, config=image_map_config),
        lambda: dev_create(ctx=ctx, config=scratch_map_config),
        lambda: run_xfstests(ctx=ctx, config=xfstests_config),
        ):
        yield


@contextlib.contextmanager
def task(ctx, config):
    """
    Create and mount an rbd image.

    For example, you can specify which clients to run on::

        tasks:
        - ceph:
        - rbd: [client.0, client.1]

    There are a few image options::

        tasks:
        - ceph:
        - rbd:
            client.0: # uses defaults
            client.1:
                image_name: foo
                image_size: 2048
                image_format: 2
                fs_type: xfs

    To use default options on all clients::

        tasks:
        - ceph:
        - rbd:
            all:

    To create 20GiB images and format them with xfs on all clients::

        tasks:
        - ceph:
        - rbd:
            all:
              image_size: 20480
              fs_type: xfs
    """
    if config is None:
        config = { 'all': None }
    norm_config = config
    if isinstance(config, dict):
        norm_config = teuthology.replace_all_with_clients(ctx.cluster, config)
    if isinstance(norm_config, dict):
        role_images = {}
        for role, properties in norm_config.iteritems():
            if properties is None:
                properties = {}
            role_images[role] = properties.get('image_name')
    else:
        role_images = norm_config

    log.debug('rbd config is: %s', norm_config)

    with contextutil.nested(
        lambda: create_image(ctx=ctx, config=norm_config),
        lambda: modprobe(ctx=ctx, config=norm_config),
        lambda: dev_create(ctx=ctx, config=role_images),
        lambda: generic_mkfs(ctx=ctx, config=norm_config,
                devname_rtn=rbd_devname_rtn),
        lambda: generic_mount(ctx=ctx, config=role_images,
                devname_rtn=rbd_devname_rtn),
        ):
        yield