summaryrefslogtreecommitdiffstats
path: root/kernel/drivers/md/raid10.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/drivers/md/raid10.c')
-rw-r--r--kernel/drivers/md/raid10.c291
1 files changed, 107 insertions, 184 deletions
diff --git a/kernel/drivers/md/raid10.c b/kernel/drivers/md/raid10.c
index fe0122771..ce959b4ae 100644
--- a/kernel/drivers/md/raid10.c
+++ b/kernel/drivers/md/raid10.c
@@ -39,6 +39,7 @@
* far_copies (stored in second byte of layout)
* far_offset (stored in bit 16 of layout )
* use_far_sets (stored in bit 17 of layout )
+ * use_far_sets_bugfixed (stored in bit 18 of layout )
*
* The data to be stored is divided into chunks using chunksize. Each device
* is divided into far_copies sections. In each section, chunks are laid out
@@ -101,7 +102,7 @@ static int _enough(struct r10conf *conf, int previous, int ignore);
static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
int *skipped);
static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio);
-static void end_reshape_write(struct bio *bio, int error);
+static void end_reshape_write(struct bio *bio);
static void end_reshape(struct r10conf *conf);
static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
@@ -307,9 +308,9 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
} else
done = 1;
if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
- clear_bit(BIO_UPTODATE, &bio->bi_flags);
+ bio->bi_error = -EIO;
if (done) {
- bio_endio(bio, 0);
+ bio_endio(bio);
/*
* Wake up any possible resync thread that waits for the device
* to go idle.
@@ -358,9 +359,9 @@ static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio,
return r10_bio->devs[slot].devnum;
}
-static void raid10_end_read_request(struct bio *bio, int error)
+static void raid10_end_read_request(struct bio *bio)
{
- int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+ int uptodate = !bio->bi_error;
struct r10bio *r10_bio = bio->bi_private;
int slot, dev;
struct md_rdev *rdev;
@@ -438,9 +439,8 @@ static void one_write_done(struct r10bio *r10_bio)
}
}
-static void raid10_end_write_request(struct bio *bio, int error)
+static void raid10_end_write_request(struct bio *bio)
{
- int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct r10bio *r10_bio = bio->bi_private;
int dev;
int dec_rdev = 1;
@@ -460,7 +460,7 @@ static void raid10_end_write_request(struct bio *bio, int error)
/*
* this branch is our 'one mirror IO has finished' event handler:
*/
- if (!uptodate) {
+ if (bio->bi_error) {
if (repl)
/* Never record new bad blocks to replacement,
* just fail it.
@@ -672,93 +672,6 @@ static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev)
return (vchunk << geo->chunk_shift) + offset;
}
-/**
- * raid10_mergeable_bvec -- tell bio layer if a two requests can be merged
- * @mddev: the md device
- * @bvm: properties of new bio
- * @biovec: the request that could be merged to it.
- *
- * Return amount of bytes we can accept at this offset
- * This requires checking for end-of-chunk if near_copies != raid_disks,
- * and for subordinate merge_bvec_fns if merge_check_needed.
- */
-static int raid10_mergeable_bvec(struct mddev *mddev,
- struct bvec_merge_data *bvm,
- struct bio_vec *biovec)
-{
- struct r10conf *conf = mddev->private;
- sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
- int max;
- unsigned int chunk_sectors;
- unsigned int bio_sectors = bvm->bi_size >> 9;
- struct geom *geo = &conf->geo;
-
- chunk_sectors = (conf->geo.chunk_mask & conf->prev.chunk_mask) + 1;
- if (conf->reshape_progress != MaxSector &&
- ((sector >= conf->reshape_progress) !=
- conf->mddev->reshape_backwards))
- geo = &conf->prev;
-
- if (geo->near_copies < geo->raid_disks) {
- max = (chunk_sectors - ((sector & (chunk_sectors - 1))
- + bio_sectors)) << 9;
- if (max < 0)
- /* bio_add cannot handle a negative return */
- max = 0;
- if (max <= biovec->bv_len && bio_sectors == 0)
- return biovec->bv_len;
- } else
- max = biovec->bv_len;
-
- if (mddev->merge_check_needed) {
- struct {
- struct r10bio r10_bio;
- struct r10dev devs[conf->copies];
- } on_stack;
- struct r10bio *r10_bio = &on_stack.r10_bio;
- int s;
- if (conf->reshape_progress != MaxSector) {
- /* Cannot give any guidance during reshape */
- if (max <= biovec->bv_len && bio_sectors == 0)
- return biovec->bv_len;
- return 0;
- }
- r10_bio->sector = sector;
- raid10_find_phys(conf, r10_bio);
- rcu_read_lock();
- for (s = 0; s < conf->copies; s++) {
- int disk = r10_bio->devs[s].devnum;
- struct md_rdev *rdev = rcu_dereference(
- conf->mirrors[disk].rdev);
- if (rdev && !test_bit(Faulty, &rdev->flags)) {
- struct request_queue *q =
- bdev_get_queue(rdev->bdev);
- if (q->merge_bvec_fn) {
- bvm->bi_sector = r10_bio->devs[s].addr
- + rdev->data_offset;
- bvm->bi_bdev = rdev->bdev;
- max = min(max, q->merge_bvec_fn(
- q, bvm, biovec));
- }
- }
- rdev = rcu_dereference(conf->mirrors[disk].replacement);
- if (rdev && !test_bit(Faulty, &rdev->flags)) {
- struct request_queue *q =
- bdev_get_queue(rdev->bdev);
- if (q->merge_bvec_fn) {
- bvm->bi_sector = r10_bio->devs[s].addr
- + rdev->data_offset;
- bvm->bi_bdev = rdev->bdev;
- max = min(max, q->merge_bvec_fn(
- q, bvm, biovec));
- }
- }
- }
- rcu_read_unlock();
- }
- return max;
-}
-
/*
* This routine returns the disk from which the requested read should
* be done. There is a per-array 'next expected sequential IO' sector
@@ -821,12 +734,10 @@ retry:
disk = r10_bio->devs[slot].devnum;
rdev = rcu_dereference(conf->mirrors[disk].replacement);
if (rdev == NULL || test_bit(Faulty, &rdev->flags) ||
- test_bit(Unmerged, &rdev->flags) ||
r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
rdev = rcu_dereference(conf->mirrors[disk].rdev);
if (rdev == NULL ||
- test_bit(Faulty, &rdev->flags) ||
- test_bit(Unmerged, &rdev->flags))
+ test_bit(Faulty, &rdev->flags))
continue;
if (!test_bit(In_sync, &rdev->flags) &&
r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
@@ -914,7 +825,7 @@ static int raid10_congested(struct mddev *mddev, int bits)
struct r10conf *conf = mddev->private;
int i, ret = 0;
- if ((bits & (1 << BDI_async_congested)) &&
+ if ((bits & (1 << WB_async_congested)) &&
conf->pending_count >= max_queued_requests)
return 1;
@@ -957,7 +868,7 @@ static void flush_pending_writes(struct r10conf *conf)
if (unlikely((bio->bi_rw & REQ_DISCARD) &&
!blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
/* Just ignore it */
- bio_endio(bio, 0);
+ bio_endio(bio);
else
generic_make_request(bio);
bio = next;
@@ -1133,7 +1044,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
if (unlikely((bio->bi_rw & REQ_DISCARD) &&
!blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
/* Just ignore it */
- bio_endio(bio, 0);
+ bio_endio(bio);
else
generic_make_request(bio);
bio = next;
@@ -1217,7 +1128,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
* non-zero, then it is the number of not-completed requests.
*/
bio->bi_phys_segments = 0;
- clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+ bio_clear_flag(bio, BIO_SEG_VALID);
if (rw == READ) {
/*
@@ -1326,11 +1237,9 @@ retry_write:
blocked_rdev = rrdev;
break;
}
- if (rdev && (test_bit(Faulty, &rdev->flags)
- || test_bit(Unmerged, &rdev->flags)))
+ if (rdev && (test_bit(Faulty, &rdev->flags)))
rdev = NULL;
- if (rrdev && (test_bit(Faulty, &rrdev->flags)
- || test_bit(Unmerged, &rrdev->flags)))
+ if (rrdev && (test_bit(Faulty, &rrdev->flags)))
rrdev = NULL;
r10_bio->devs[i].bio = NULL;
@@ -1589,6 +1498,8 @@ static void status(struct seq_file *seq, struct mddev *mddev)
seq_printf(seq, " %d offset-copies", conf->geo.far_copies);
else
seq_printf(seq, " %d far-copies", conf->geo.far_copies);
+ if (conf->geo.far_set_size != conf->geo.raid_disks)
+ seq_printf(seq, " %d devices per set", conf->geo.far_set_size);
}
seq_printf(seq, " [%d/%d] [", conf->geo.raid_disks,
conf->geo.raid_disks - mddev->degraded);
@@ -1681,6 +1592,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
set_bit(Blocked, &rdev->flags);
set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
+ set_bit(MD_CHANGE_PENDING, &mddev->flags);
spin_unlock_irqrestore(&conf->device_lock, flags);
printk(KERN_ALERT
"md/raid10:%s: Disk failure on %s, disabling device.\n"
@@ -1777,7 +1689,6 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
int mirror;
int first = 0;
int last = conf->geo.raid_disks - 1;
- struct request_queue *q = bdev_get_queue(rdev->bdev);
if (mddev->recovery_cp < MaxSector)
/* only hot-add to in-sync arrays, as recovery is
@@ -1787,14 +1698,12 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
if (rdev->saved_raid_disk < 0 && !_enough(conf, 1, -1))
return -EINVAL;
+ if (md_integrity_add_rdev(rdev, mddev))
+ return -ENXIO;
+
if (rdev->raid_disk >= 0)
first = last = rdev->raid_disk;
- if (q->merge_bvec_fn) {
- set_bit(Unmerged, &rdev->flags);
- mddev->merge_check_needed = 1;
- }
-
if (rdev->saved_raid_disk >= first &&
conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
mirror = rdev->saved_raid_disk;
@@ -1833,20 +1742,6 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
rcu_assign_pointer(p->rdev, rdev);
break;
}
- if (err == 0 && test_bit(Unmerged, &rdev->flags)) {
- /* Some requests might not have seen this new
- * merge_bvec_fn. We must wait for them to complete
- * before merging the device fully.
- * First we make sure any code which has tested
- * our function has submitted the request, then
- * we wait for all outstanding requests to complete.
- */
- synchronize_sched();
- freeze_array(conf, 0);
- unfreeze_array(conf);
- clear_bit(Unmerged, &rdev->flags);
- }
- md_integrity_add_rdev(rdev, mddev);
if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
@@ -1916,7 +1811,7 @@ abort:
return err;
}
-static void end_sync_read(struct bio *bio, int error)
+static void end_sync_read(struct bio *bio)
{
struct r10bio *r10_bio = bio->bi_private;
struct r10conf *conf = r10_bio->mddev->private;
@@ -1928,7 +1823,7 @@ static void end_sync_read(struct bio *bio, int error)
} else
d = find_bio_disk(conf, r10_bio, bio, NULL, NULL);
- if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+ if (!bio->bi_error)
set_bit(R10BIO_Uptodate, &r10_bio->state);
else
/* The write handler will notice the lack of
@@ -1977,9 +1872,8 @@ static void end_sync_request(struct r10bio *r10_bio)
}
}
-static void end_sync_write(struct bio *bio, int error)
+static void end_sync_write(struct bio *bio)
{
- int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct r10bio *r10_bio = bio->bi_private;
struct mddev *mddev = r10_bio->mddev;
struct r10conf *conf = mddev->private;
@@ -1996,7 +1890,7 @@ static void end_sync_write(struct bio *bio, int error)
else
rdev = conf->mirrors[d].rdev;
- if (!uptodate) {
+ if (bio->bi_error) {
if (repl)
md_error(mddev, rdev);
else {
@@ -2044,7 +1938,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
/* find the first device with a block */
for (i=0; i<conf->copies; i++)
- if (test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags))
+ if (!r10_bio->devs[i].bio->bi_error)
break;
if (i == conf->copies)
@@ -2052,6 +1946,8 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
first = i;
fbio = r10_bio->devs[i].bio;
+ fbio->bi_iter.bi_size = r10_bio->sectors << 9;
+ fbio->bi_iter.bi_idx = 0;
vcnt = (r10_bio->sectors + (PAGE_SIZE >> 9) - 1) >> (PAGE_SHIFT - 9);
/* now find blocks with errors */
@@ -2064,7 +1960,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
continue;
if (i == first)
continue;
- if (test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags)) {
+ if (!r10_bio->devs[i].bio->bi_error) {
/* We know that the bi_io_vec layout is the same for
* both 'first' and 'i', so we just compare them.
* All vec entries are PAGE_SIZE;
@@ -2095,21 +1991,14 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
bio_reset(tbio);
tbio->bi_vcnt = vcnt;
- tbio->bi_iter.bi_size = r10_bio->sectors << 9;
+ tbio->bi_iter.bi_size = fbio->bi_iter.bi_size;
tbio->bi_rw = WRITE;
tbio->bi_private = r10_bio;
tbio->bi_iter.bi_sector = r10_bio->devs[i].addr;
-
- for (j=0; j < vcnt ; j++) {
- tbio->bi_io_vec[j].bv_offset = 0;
- tbio->bi_io_vec[j].bv_len = PAGE_SIZE;
-
- memcpy(page_address(tbio->bi_io_vec[j].bv_page),
- page_address(fbio->bi_io_vec[j].bv_page),
- PAGE_SIZE);
- }
tbio->bi_end_io = end_sync_write;
+ bio_copy_data(tbio, fbio);
+
d = r10_bio->devs[i].devnum;
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
atomic_inc(&r10_bio->remaining);
@@ -2124,17 +2013,14 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
* that are active
*/
for (i = 0; i < conf->copies; i++) {
- int j, d;
+ int d;
tbio = r10_bio->devs[i].repl_bio;
if (!tbio || !tbio->bi_end_io)
continue;
if (r10_bio->devs[i].bio->bi_end_io != end_sync_write
&& r10_bio->devs[i].bio != fbio)
- for (j = 0; j < vcnt; j++)
- memcpy(page_address(tbio->bi_io_vec[j].bv_page),
- page_address(fbio->bi_io_vec[j].bv_page),
- PAGE_SIZE);
+ bio_copy_data(tbio, fbio);
d = r10_bio->devs[i].devnum;
atomic_inc(&r10_bio->remaining);
md_sync_acct(conf->mirrors[d].replacement->bdev,
@@ -2404,7 +2290,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
d = r10_bio->devs[sl].devnum;
rdev = rcu_dereference(conf->mirrors[d].rdev);
if (rdev &&
- !test_bit(Unmerged, &rdev->flags) &&
test_bit(In_sync, &rdev->flags) &&
is_badblock(rdev, r10_bio->devs[sl].addr + sect, s,
&first_bad, &bad_sectors) == 0) {
@@ -2458,7 +2343,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
d = r10_bio->devs[sl].devnum;
rdev = rcu_dereference(conf->mirrors[d].rdev);
if (!rdev ||
- test_bit(Unmerged, &rdev->flags) ||
!test_bit(In_sync, &rdev->flags))
continue;
@@ -2590,7 +2474,7 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
choose_data_offset(r10_bio, rdev) +
(sector - r10_bio->sector));
wbio->bi_bdev = rdev->bdev;
- if (submit_bio_wait(WRITE, wbio) == 0)
+ if (submit_bio_wait(WRITE, wbio) < 0)
/* Failure! */
ok = rdev_set_badblocks(rdev, sector,
sectors, 0)
@@ -2716,8 +2600,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
rdev = conf->mirrors[dev].rdev;
if (r10_bio->devs[m].bio == NULL)
continue;
- if (test_bit(BIO_UPTODATE,
- &r10_bio->devs[m].bio->bi_flags)) {
+ if (!r10_bio->devs[m].bio->bi_error) {
rdev_clear_badblocks(
rdev,
r10_bio->devs[m].addr,
@@ -2732,8 +2615,8 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
rdev = conf->mirrors[dev].replacement;
if (r10_bio->devs[m].repl_bio == NULL)
continue;
- if (test_bit(BIO_UPTODATE,
- &r10_bio->devs[m].repl_bio->bi_flags)) {
+
+ if (!r10_bio->devs[m].repl_bio->bi_error) {
rdev_clear_badblocks(
rdev,
r10_bio->devs[m].addr,
@@ -2748,6 +2631,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
}
put_buf(r10_bio);
} else {
+ bool fail = false;
for (m = 0; m < conf->copies; m++) {
int dev = r10_bio->devs[m].devnum;
struct bio *bio = r10_bio->devs[m].bio;
@@ -2758,8 +2642,8 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
r10_bio->devs[m].addr,
r10_bio->sectors, 0);
rdev_dec_pending(rdev, conf->mddev);
- } else if (bio != NULL &&
- !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
+ } else if (bio != NULL && bio->bi_error) {
+ fail = true;
if (!narrow_write_error(r10_bio, m)) {
md_error(conf->mddev, rdev);
set_bit(R10BIO_Degraded,
@@ -2777,10 +2661,17 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
rdev_dec_pending(rdev, conf->mddev);
}
}
- if (test_bit(R10BIO_WriteError,
- &r10_bio->state))
- close_write(r10_bio);
- raid_end_bio_io(r10_bio);
+ if (fail) {
+ spin_lock_irq(&conf->device_lock);
+ list_add(&r10_bio->retry_list, &conf->bio_end_io_list);
+ spin_unlock_irq(&conf->device_lock);
+ md_wakeup_thread(conf->mddev->thread);
+ } else {
+ if (test_bit(R10BIO_WriteError,
+ &r10_bio->state))
+ close_write(r10_bio);
+ raid_end_bio_io(r10_bio);
+ }
}
}
@@ -2795,6 +2686,29 @@ static void raid10d(struct md_thread *thread)
md_check_recovery(mddev);
+ if (!list_empty_careful(&conf->bio_end_io_list) &&
+ !test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
+ LIST_HEAD(tmp);
+ spin_lock_irqsave(&conf->device_lock, flags);
+ if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
+ list_add(&tmp, &conf->bio_end_io_list);
+ list_del_init(&conf->bio_end_io_list);
+ }
+ spin_unlock_irqrestore(&conf->device_lock, flags);
+ while (!list_empty(&tmp)) {
+ r10_bio = list_first_entry(&tmp, struct r10bio,
+ retry_list);
+ list_del(&r10_bio->retry_list);
+ if (mddev->degraded)
+ set_bit(R10BIO_Degraded, &r10_bio->state);
+
+ if (test_bit(R10BIO_WriteError,
+ &r10_bio->state))
+ close_write(r10_bio);
+ raid_end_bio_io(r10_bio);
+ }
+ }
+
blk_start_plug(&plug);
for (;;) {
@@ -3237,7 +3151,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
/* resync. Schedule a read for every block at this virt offset */
int count = 0;
- bitmap_cond_end_sync(mddev->bitmap, sector_nr);
+ bitmap_cond_end_sync(mddev->bitmap, sector_nr, 0);
if (!bitmap_start_sync(mddev->bitmap, sector_nr,
&sync_blocks, mddev->degraded) &&
@@ -3273,7 +3187,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
bio = r10_bio->devs[i].bio;
bio_reset(bio);
- clear_bit(BIO_UPTODATE, &bio->bi_flags);
+ bio->bi_error = -EIO;
if (conf->mirrors[d].rdev == NULL ||
test_bit(Faulty, &conf->mirrors[d].rdev->flags))
continue;
@@ -3310,7 +3224,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
/* Need to set up for writing to the replacement */
bio = r10_bio->devs[i].repl_bio;
bio_reset(bio);
- clear_bit(BIO_UPTODATE, &bio->bi_flags);
+ bio->bi_error = -EIO;
sector = r10_bio->devs[i].addr;
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
@@ -3367,7 +3281,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
/* remove last page from this bio */
bio2->bi_vcnt--;
bio2->bi_iter.bi_size -= len;
- __clear_bit(BIO_SEG_VALID, &bio2->bi_flags);
+ bio_clear_flag(bio2, BIO_SEG_VALID);
}
goto bio_full;
}
@@ -3387,7 +3301,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
if (bio->bi_end_io == end_sync_read) {
md_sync_acct(bio->bi_bdev, nr_sectors);
- set_bit(BIO_UPTODATE, &bio->bi_flags);
+ bio->bi_error = 0;
generic_make_request(bio);
}
}
@@ -3487,7 +3401,7 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new)
disks = mddev->raid_disks + mddev->delta_disks;
break;
}
- if (layout >> 18)
+ if (layout >> 19)
return -1;
if (chunk < (PAGE_SIZE >> 9) ||
!is_power_of_2(chunk))
@@ -3499,7 +3413,22 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new)
geo->near_copies = nc;
geo->far_copies = fc;
geo->far_offset = fo;
- geo->far_set_size = (layout & (1<<17)) ? disks / fc : disks;
+ switch (layout >> 17) {
+ case 0: /* original layout. simple but not always optimal */
+ geo->far_set_size = disks;
+ break;
+ case 1: /* "improved" layout which was buggy. Hopefully no-one is
+ * actually using this, but leave code here just in case.*/
+ geo->far_set_size = disks/fc;
+ WARN(geo->far_set_size < fc,
+ "This RAID10 layout does not provide data safety - please backup and create new array\n");
+ break;
+ case 2: /* "improved" layout fixed to match documentation */
+ geo->far_set_size = fc * nc;
+ break;
+ default: /* Not a valid layout */
+ return -1;
+ }
geo->chunk_mask = chunk - 1;
geo->chunk_shift = ffz(~chunk);
return nc*fc;
@@ -3569,6 +3498,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
conf->reshape_safe = conf->reshape_progress;
spin_lock_init(&conf->device_lock);
INIT_LIST_HEAD(&conf->retry_list);
+ INIT_LIST_HEAD(&conf->bio_end_io_list);
spin_lock_init(&conf->resync_lock);
init_waitqueue_head(&conf->wait_barrier);
@@ -3585,8 +3515,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
printk(KERN_ERR "md/raid10:%s: couldn't allocate memory.\n",
mdname(mddev));
if (conf) {
- if (conf->r10bio_pool)
- mempool_destroy(conf->r10bio_pool);
+ mempool_destroy(conf->r10bio_pool);
kfree(conf->mirrors);
safe_put_page(conf->tmppage);
kfree(conf);
@@ -3653,8 +3582,6 @@ static int run(struct mddev *mddev)
disk->rdev = rdev;
}
q = bdev_get_queue(rdev->bdev);
- if (q->merge_bvec_fn)
- mddev->merge_check_needed = 1;
diff = (rdev->new_data_offset - rdev->data_offset);
if (!mddev->reshape_backwards)
diff = -diff;
@@ -3783,8 +3710,7 @@ static int run(struct mddev *mddev)
out_free_conf:
md_unregister_thread(&mddev->thread);
- if (conf->r10bio_pool)
- mempool_destroy(conf->r10bio_pool);
+ mempool_destroy(conf->r10bio_pool);
safe_put_page(conf->tmppage);
kfree(conf->mirrors);
kfree(conf);
@@ -3797,8 +3723,7 @@ static void raid10_free(struct mddev *mddev, void *priv)
{
struct r10conf *conf = priv;
- if (conf->r10bio_pool)
- mempool_destroy(conf->r10bio_pool);
+ mempool_destroy(conf->r10bio_pool);
safe_put_page(conf->tmppage);
kfree(conf->mirrors);
kfree(conf->mirrors_old);
@@ -4225,7 +4150,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
* at a time, possibly less if that exceeds RESYNC_PAGES,
* or we hit a bad block or something.
* This might mean we pause for normal IO in the middle of
- * a chunk, but that is not a problem was mddev->reshape_position
+ * a chunk, but that is not a problem as mddev->reshape_position
* can record any location.
*
* If we will want to write to a location that isn't
@@ -4249,7 +4174,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
*
* In all this the minimum difference in data offsets
* (conf->offset_diff - always positive) allows a bit of slack,
- * so next can be after 'safe', but not by more than offset_disk
+ * so next can be after 'safe', but not by more than offset_diff
*
* We need to prepare all the bios here before we start any IO
* to ensure the size we choose is acceptable to all devices.
@@ -4392,7 +4317,7 @@ read_more:
read_bio->bi_end_io = end_sync_read;
read_bio->bi_rw = READ;
read_bio->bi_flags &= (~0UL << BIO_RESET_BITS);
- __set_bit(BIO_UPTODATE, &read_bio->bi_flags);
+ read_bio->bi_error = 0;
read_bio->bi_vcnt = 0;
read_bio->bi_iter.bi_size = 0;
r10_bio->master_bio = read_bio;
@@ -4449,7 +4374,7 @@ read_more:
/* Remove last page from this bio */
bio2->bi_vcnt--;
bio2->bi_iter.bi_size -= len;
- __clear_bit(BIO_SEG_VALID, &bio2->bi_flags);
+ bio_clear_flag(bio2, BIO_SEG_VALID);
}
goto bio_full;
}
@@ -4614,9 +4539,8 @@ static int handle_reshape_read_error(struct mddev *mddev,
return 0;
}
-static void end_reshape_write(struct bio *bio, int error)
+static void end_reshape_write(struct bio *bio)
{
- int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct r10bio *r10_bio = bio->bi_private;
struct mddev *mddev = r10_bio->mddev;
struct r10conf *conf = mddev->private;
@@ -4633,7 +4557,7 @@ static void end_reshape_write(struct bio *bio, int error)
rdev = conf->mirrors[d].rdev;
}
- if (!uptodate) {
+ if (bio->bi_error) {
/* FIXME should record badblock */
md_error(mddev, rdev);
}
@@ -4710,7 +4634,6 @@ static struct md_personality raid10_personality =
.start_reshape = raid10_start_reshape,
.finish_reshape = raid10_finish_reshape,
.congested = raid10_congested,
- .mergeable_bvec = raid10_mergeable_bvec,
};
static int __init raid_init(void)