diff options
Diffstat (limited to 'kernel/drivers/md')
-rw-r--r-- | kernel/drivers/md/bcache/super.c | 48 | ||||
-rw-r--r-- | kernel/drivers/md/dm-cache-metadata.c | 122 | ||||
-rw-r--r-- | kernel/drivers/md/dm-cache-metadata.h | 4 | ||||
-rw-r--r-- | kernel/drivers/md/dm-cache-target.c | 12 | ||||
-rw-r--r-- | kernel/drivers/md/dm-crypt.c | 38 | ||||
-rw-r--r-- | kernel/drivers/md/dm-flakey.c | 34 | ||||
-rw-r--r-- | kernel/drivers/md/dm-log-writes.c | 10 | ||||
-rw-r--r-- | kernel/drivers/md/dm-mpath.c | 6 | ||||
-rw-r--r-- | kernel/drivers/md/dm-raid1.c | 1 | ||||
-rw-r--r-- | kernel/drivers/md/dm-snap.c | 9 | ||||
-rw-r--r-- | kernel/drivers/md/dm-table.c | 36 | ||||
-rw-r--r-- | kernel/drivers/md/dm-thin-metadata.c | 5 | ||||
-rw-r--r-- | kernel/drivers/md/dm.c | 43 | ||||
-rw-r--r-- | kernel/drivers/md/md.c | 16 | ||||
-rw-r--r-- | kernel/drivers/md/multipath.c | 4 | ||||
-rw-r--r-- | kernel/drivers/md/persistent-data/dm-space-map-metadata.c | 14 | ||||
-rw-r--r-- | kernel/drivers/md/raid1.c | 7 | ||||
-rw-r--r-- | kernel/drivers/md/raid10.c | 7 | ||||
-rw-r--r-- | kernel/drivers/md/raid5.c | 60 | ||||
-rw-r--r-- | kernel/drivers/md/raid5.h | 4 |
20 files changed, 307 insertions, 173 deletions
diff --git a/kernel/drivers/md/bcache/super.c b/kernel/drivers/md/bcache/super.c index 8d0ead98e..3d5c0ba13 100644 --- a/kernel/drivers/md/bcache/super.c +++ b/kernel/drivers/md/bcache/super.c @@ -1015,8 +1015,12 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) */ atomic_set(&dc->count, 1); - if (bch_cached_dev_writeback_start(dc)) + /* Block writeback thread, but spawn it */ + down_write(&dc->writeback_lock); + if (bch_cached_dev_writeback_start(dc)) { + up_write(&dc->writeback_lock); return -ENOMEM; + } if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { bch_sectors_dirty_init(dc); @@ -1028,6 +1032,9 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) bch_cached_dev_run(dc); bcache_device_link(&dc->disk, c, "bdev"); + /* Allow the writeback thread to proceed */ + up_write(&dc->writeback_lock); + pr_info("Caching %s as %s on set %pU", bdevname(dc->bdev, buf), dc->disk.disk->disk_name, dc->disk.c->sb.set_uuid); @@ -1366,6 +1373,9 @@ static void cache_set_flush(struct closure *cl) struct btree *b; unsigned i; + if (!c) + closure_return(cl); + bch_cache_accounting_destroy(&c->accounting); kobject_put(&c->internal); @@ -1808,7 +1818,7 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca) free = roundup_pow_of_two(ca->sb.nbuckets) >> 10; if (!init_fifo(&ca->free[RESERVE_BTREE], 8, GFP_KERNEL) || - !init_fifo(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) || + !init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) || !init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) || !init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) || !init_fifo(&ca->free_inc, free << 2, GFP_KERNEL) || @@ -1828,11 +1838,12 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca) return 0; } -static void register_cache(struct cache_sb *sb, struct page *sb_page, +static int register_cache(struct cache_sb *sb, struct page *sb_page, struct block_device *bdev, struct cache *ca) { char name[BDEVNAME_SIZE]; - const char *err = "cannot allocate memory"; + const char *err = NULL; + int ret = 0; memcpy(&ca->sb, sb, sizeof(struct cache_sb)); ca->bdev = bdev; @@ -1847,27 +1858,35 @@ static void register_cache(struct cache_sb *sb, struct page *sb_page, if (blk_queue_discard(bdev_get_queue(ca->bdev))) ca->discard = CACHE_DISCARD(&ca->sb); - if (cache_alloc(sb, ca) != 0) + ret = cache_alloc(sb, ca); + if (ret != 0) goto err; - err = "error creating kobject"; - if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache")) - goto err; + if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache")) { + err = "error calling kobject_add"; + ret = -ENOMEM; + goto out; + } mutex_lock(&bch_register_lock); err = register_cache_set(ca); mutex_unlock(&bch_register_lock); - if (err) - goto err; + if (err) { + ret = -ENODEV; + goto out; + } pr_info("registered cache device %s", bdevname(bdev, name)); + out: kobject_put(&ca->kobj); - return; + err: - pr_notice("error opening %s: %s", bdevname(bdev, name), err); - goto out; + if (err) + pr_notice("error opening %s: %s", bdevname(bdev, name), err); + + return ret; } /* Global interfaces/init */ @@ -1965,7 +1984,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, if (!ca) goto err_close; - register_cache(sb, sb_page, bdev, ca); + if (register_cache(sb, sb_page, bdev, ca) != 0) + goto err_close; } out: if (sb_page) diff --git a/kernel/drivers/md/dm-cache-metadata.c b/kernel/drivers/md/dm-cache-metadata.c index f6543f3a9..3970cda10 100644 --- a/kernel/drivers/md/dm-cache-metadata.c +++ b/kernel/drivers/md/dm-cache-metadata.c @@ -867,18 +867,55 @@ static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd, return 0; } -#define WRITE_LOCK(cmd) \ - if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) \ - return -EINVAL; \ - down_write(&cmd->root_lock) +static bool cmd_write_lock(struct dm_cache_metadata *cmd) +{ + down_write(&cmd->root_lock); + if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { + up_write(&cmd->root_lock); + return false; + } + return true; +} + +#define WRITE_LOCK(cmd) \ + do { \ + if (!cmd_write_lock((cmd))) \ + return -EINVAL; \ + } while(0) -#define WRITE_LOCK_VOID(cmd) \ - if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) \ - return; \ - down_write(&cmd->root_lock) +#define WRITE_LOCK_VOID(cmd) \ + do { \ + if (!cmd_write_lock((cmd))) \ + return; \ + } while(0) #define WRITE_UNLOCK(cmd) \ - up_write(&cmd->root_lock) + up_write(&(cmd)->root_lock) + +static bool cmd_read_lock(struct dm_cache_metadata *cmd) +{ + down_read(&cmd->root_lock); + if (cmd->fail_io) { + up_read(&cmd->root_lock); + return false; + } + return true; +} + +#define READ_LOCK(cmd) \ + do { \ + if (!cmd_read_lock((cmd))) \ + return -EINVAL; \ + } while(0) + +#define READ_LOCK_VOID(cmd) \ + do { \ + if (!cmd_read_lock((cmd))) \ + return; \ + } while(0) + +#define READ_UNLOCK(cmd) \ + up_read(&(cmd)->root_lock) int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size) { @@ -1015,22 +1052,20 @@ int dm_cache_load_discards(struct dm_cache_metadata *cmd, { int r; - down_read(&cmd->root_lock); + READ_LOCK(cmd); r = __load_discards(cmd, fn, context); - up_read(&cmd->root_lock); + READ_UNLOCK(cmd); return r; } -dm_cblock_t dm_cache_size(struct dm_cache_metadata *cmd) +int dm_cache_size(struct dm_cache_metadata *cmd, dm_cblock_t *result) { - dm_cblock_t r; - - down_read(&cmd->root_lock); - r = cmd->cache_blocks; - up_read(&cmd->root_lock); + READ_LOCK(cmd); + *result = cmd->cache_blocks; + READ_UNLOCK(cmd); - return r; + return 0; } static int __remove(struct dm_cache_metadata *cmd, dm_cblock_t cblock) @@ -1188,9 +1223,9 @@ int dm_cache_load_mappings(struct dm_cache_metadata *cmd, { int r; - down_read(&cmd->root_lock); + READ_LOCK(cmd); r = __load_mappings(cmd, policy, fn, context); - up_read(&cmd->root_lock); + READ_UNLOCK(cmd); return r; } @@ -1215,18 +1250,18 @@ static int __dump_mappings(struct dm_cache_metadata *cmd) void dm_cache_dump(struct dm_cache_metadata *cmd) { - down_read(&cmd->root_lock); + READ_LOCK_VOID(cmd); __dump_mappings(cmd); - up_read(&cmd->root_lock); + READ_UNLOCK(cmd); } int dm_cache_changed_this_transaction(struct dm_cache_metadata *cmd) { int r; - down_read(&cmd->root_lock); + READ_LOCK(cmd); r = cmd->changed; - up_read(&cmd->root_lock); + READ_UNLOCK(cmd); return r; } @@ -1276,9 +1311,9 @@ int dm_cache_set_dirty(struct dm_cache_metadata *cmd, void dm_cache_metadata_get_stats(struct dm_cache_metadata *cmd, struct dm_cache_statistics *stats) { - down_read(&cmd->root_lock); + READ_LOCK_VOID(cmd); *stats = cmd->stats; - up_read(&cmd->root_lock); + READ_UNLOCK(cmd); } void dm_cache_metadata_set_stats(struct dm_cache_metadata *cmd, @@ -1312,9 +1347,9 @@ int dm_cache_get_free_metadata_block_count(struct dm_cache_metadata *cmd, { int r = -EINVAL; - down_read(&cmd->root_lock); + READ_LOCK(cmd); r = dm_sm_get_nr_free(cmd->metadata_sm, result); - up_read(&cmd->root_lock); + READ_UNLOCK(cmd); return r; } @@ -1324,9 +1359,9 @@ int dm_cache_get_metadata_dev_size(struct dm_cache_metadata *cmd, { int r = -EINVAL; - down_read(&cmd->root_lock); + READ_LOCK(cmd); r = dm_sm_get_nr_blocks(cmd->metadata_sm, result); - up_read(&cmd->root_lock); + READ_UNLOCK(cmd); return r; } @@ -1417,7 +1452,13 @@ int dm_cache_write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy * int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result) { - return blocks_are_unmapped_or_clean(cmd, 0, cmd->cache_blocks, result); + int r; + + READ_LOCK(cmd); + r = blocks_are_unmapped_or_clean(cmd, 0, cmd->cache_blocks, result); + READ_UNLOCK(cmd); + + return r; } void dm_cache_metadata_set_read_only(struct dm_cache_metadata *cmd) @@ -1440,10 +1481,7 @@ int dm_cache_metadata_set_needs_check(struct dm_cache_metadata *cmd) struct dm_block *sblock; struct cache_disk_superblock *disk_super; - /* - * We ignore fail_io for this function. - */ - down_write(&cmd->root_lock); + WRITE_LOCK(cmd); set_bit(NEEDS_CHECK, &cmd->flags); r = superblock_lock(cmd, &sblock); @@ -1458,19 +1496,17 @@ int dm_cache_metadata_set_needs_check(struct dm_cache_metadata *cmd) dm_bm_unlock(sblock); out: - up_write(&cmd->root_lock); + WRITE_UNLOCK(cmd); return r; } -bool dm_cache_metadata_needs_check(struct dm_cache_metadata *cmd) +int dm_cache_metadata_needs_check(struct dm_cache_metadata *cmd, bool *result) { - bool needs_check; + READ_LOCK(cmd); + *result = !!test_bit(NEEDS_CHECK, &cmd->flags); + READ_UNLOCK(cmd); - down_read(&cmd->root_lock); - needs_check = !!test_bit(NEEDS_CHECK, &cmd->flags); - up_read(&cmd->root_lock); - - return needs_check; + return 0; } int dm_cache_metadata_abort(struct dm_cache_metadata *cmd) diff --git a/kernel/drivers/md/dm-cache-metadata.h b/kernel/drivers/md/dm-cache-metadata.h index 2ffee21f3..852874419 100644 --- a/kernel/drivers/md/dm-cache-metadata.h +++ b/kernel/drivers/md/dm-cache-metadata.h @@ -66,7 +66,7 @@ void dm_cache_metadata_close(struct dm_cache_metadata *cmd); * origin blocks to map to. */ int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size); -dm_cblock_t dm_cache_size(struct dm_cache_metadata *cmd); +int dm_cache_size(struct dm_cache_metadata *cmd, dm_cblock_t *result); int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd, sector_t discard_block_size, @@ -137,7 +137,7 @@ int dm_cache_write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy * */ int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result); -bool dm_cache_metadata_needs_check(struct dm_cache_metadata *cmd); +int dm_cache_metadata_needs_check(struct dm_cache_metadata *cmd, bool *result); int dm_cache_metadata_set_needs_check(struct dm_cache_metadata *cmd); void dm_cache_metadata_set_read_only(struct dm_cache_metadata *cmd); void dm_cache_metadata_set_read_write(struct dm_cache_metadata *cmd); diff --git a/kernel/drivers/md/dm-cache-target.c b/kernel/drivers/md/dm-cache-target.c index 2fd4c8296..515f83e7d 100644 --- a/kernel/drivers/md/dm-cache-target.c +++ b/kernel/drivers/md/dm-cache-target.c @@ -987,9 +987,14 @@ static void notify_mode_switch(struct cache *cache, enum cache_metadata_mode mod static void set_cache_mode(struct cache *cache, enum cache_metadata_mode new_mode) { - bool needs_check = dm_cache_metadata_needs_check(cache->cmd); + bool needs_check; enum cache_metadata_mode old_mode = get_cache_mode(cache); + if (dm_cache_metadata_needs_check(cache->cmd, &needs_check)) { + DMERR("unable to read needs_check flag, setting failure mode"); + new_mode = CM_FAIL; + } + if (new_mode == CM_WRITE && needs_check) { DMERR("%s: unable to switch cache to write mode until repaired.", cache_device_name(cache)); @@ -3513,6 +3518,7 @@ static void cache_status(struct dm_target *ti, status_type_t type, char buf[BDEVNAME_SIZE]; struct cache *cache = ti->private; dm_cblock_t residency; + bool needs_check; switch (type) { case STATUSTYPE_INFO: @@ -3586,7 +3592,9 @@ static void cache_status(struct dm_target *ti, status_type_t type, else DMEMIT("rw "); - if (dm_cache_metadata_needs_check(cache->cmd)) + r = dm_cache_metadata_needs_check(cache->cmd, &needs_check); + + if (r || needs_check) DMEMIT("needs_check "); else DMEMIT("- "); diff --git a/kernel/drivers/md/dm-crypt.c b/kernel/drivers/md/dm-crypt.c index 3147c8d09..de628883e 100644 --- a/kernel/drivers/md/dm-crypt.c +++ b/kernel/drivers/md/dm-crypt.c @@ -112,8 +112,7 @@ struct iv_tcw_private { * and encrypts / decrypts at the same time. */ enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID, - DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD, - DM_CRYPT_EXIT_THREAD}; + DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD }; /* * The fields in here must be read only after initialization. @@ -1204,18 +1203,20 @@ continue_locked: if (!RB_EMPTY_ROOT(&cc->write_tree)) goto pop_from_list; - if (unlikely(test_bit(DM_CRYPT_EXIT_THREAD, &cc->flags))) { - spin_unlock_irq(&cc->write_thread_wait.lock); - break; - } - - __set_current_state(TASK_INTERRUPTIBLE); + set_current_state(TASK_INTERRUPTIBLE); __add_wait_queue(&cc->write_thread_wait, &wait); spin_unlock_irq(&cc->write_thread_wait.lock); + if (unlikely(kthread_should_stop())) { + set_task_state(current, TASK_RUNNING); + remove_wait_queue(&cc->write_thread_wait, &wait); + break; + } + schedule(); + set_task_state(current, TASK_RUNNING); spin_lock_irq(&cc->write_thread_wait.lock); __remove_wait_queue(&cc->write_thread_wait, &wait); goto continue_locked; @@ -1499,12 +1500,15 @@ static int crypt_set_key(struct crypt_config *cc, char *key) if (!cc->key_size && strcmp(key, "-")) goto out; + /* clear the flag since following operations may invalidate previously valid key */ + clear_bit(DM_CRYPT_KEY_VALID, &cc->flags); + if (cc->key_size && crypt_decode_key(cc->key, key, cc->key_size) < 0) goto out; - set_bit(DM_CRYPT_KEY_VALID, &cc->flags); - r = crypt_setkey_allcpus(cc); + if (!r) + set_bit(DM_CRYPT_KEY_VALID, &cc->flags); out: /* Hex key string not needed after here, so wipe it. */ @@ -1530,13 +1534,8 @@ static void crypt_dtr(struct dm_target *ti) if (!cc) return; - if (cc->write_thread) { - spin_lock_irq(&cc->write_thread_wait.lock); - set_bit(DM_CRYPT_EXIT_THREAD, &cc->flags); - wake_up_locked(&cc->write_thread_wait); - spin_unlock_irq(&cc->write_thread_wait.lock); + if (cc->write_thread) kthread_stop(cc->write_thread); - } if (cc->io_queue) destroy_workqueue(cc->io_queue); @@ -1920,6 +1919,13 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; } + /* + * Check if bio is too large, split as needed. + */ + if (unlikely(bio->bi_iter.bi_size > (BIO_MAX_PAGES << PAGE_SHIFT)) && + bio_data_dir(bio) == WRITE) + dm_accept_partial_bio(bio, ((BIO_MAX_PAGES << PAGE_SHIFT) >> SECTOR_SHIFT)); + io = dm_per_bio_data(bio, cc->per_bio_data_size); crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector)); io->ctx.req = (struct ablkcipher_request *)(io + 1); diff --git a/kernel/drivers/md/dm-flakey.c b/kernel/drivers/md/dm-flakey.c index 09e2afcaf..78f403b45 100644 --- a/kernel/drivers/md/dm-flakey.c +++ b/kernel/drivers/md/dm-flakey.c @@ -200,11 +200,13 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (!(fc->up_interval + fc->down_interval)) { ti->error = "Total (up + down) interval is zero"; + r = -EINVAL; goto bad; } if (fc->up_interval + fc->down_interval < fc->up_interval) { ti->error = "Interval overflow"; + r = -EINVAL; goto bad; } @@ -289,10 +291,14 @@ static int flakey_map(struct dm_target *ti, struct bio *bio) pb->bio_submitted = true; /* - * Map reads as normal. + * Error reads if neither corrupt_bio_byte or drop_writes are set. + * Otherwise, flakey_end_io() will decide if the reads should be modified. */ - if (bio_data_dir(bio) == READ) + if (bio_data_dir(bio) == READ) { + if (!fc->corrupt_bio_byte && !test_bit(DROP_WRITES, &fc->flags)) + return -EIO; goto map_bio; + } /* * Drop writes? @@ -328,14 +334,22 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error) struct flakey_c *fc = ti->private; struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); - /* - * Corrupt successful READs while in down state. - * If flags were specified, only corrupt those that match. - */ - if (fc->corrupt_bio_byte && !error && pb->bio_submitted && - (bio_data_dir(bio) == READ) && (fc->corrupt_bio_rw == READ) && - all_corrupt_bio_flags_match(bio, fc)) - corrupt_bio_data(bio, fc); + if (!error && pb->bio_submitted && (bio_data_dir(bio) == READ)) { + if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) && + all_corrupt_bio_flags_match(bio, fc)) { + /* + * Corrupt successful matching READs while in down state. + */ + corrupt_bio_data(bio, fc); + + } else if (!test_bit(DROP_WRITES, &fc->flags)) { + /* + * Error read during the down_interval if drop_writes + * wasn't configured. + */ + return -EIO; + } + } return error; } diff --git a/kernel/drivers/md/dm-log-writes.c b/kernel/drivers/md/dm-log-writes.c index 624589d51..c8b513ee1 100644 --- a/kernel/drivers/md/dm-log-writes.c +++ b/kernel/drivers/md/dm-log-writes.c @@ -258,12 +258,12 @@ static int log_one_block(struct log_writes_c *lc, goto out; sector++; - bio = bio_alloc(GFP_KERNEL, block->vec_cnt); + atomic_inc(&lc->io_blocks); + bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt, BIO_MAX_PAGES)); if (!bio) { DMERR("Couldn't alloc log bio"); goto error; } - atomic_inc(&lc->io_blocks); bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = sector; bio->bi_bdev = lc->logdev->bdev; @@ -280,7 +280,7 @@ static int log_one_block(struct log_writes_c *lc, if (ret != block->vecs[i].bv_len) { atomic_inc(&lc->io_blocks); submit_bio(WRITE, bio); - bio = bio_alloc(GFP_KERNEL, block->vec_cnt - i); + bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt - i, BIO_MAX_PAGES)); if (!bio) { DMERR("Couldn't alloc log bio"); goto error; @@ -456,9 +456,9 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - ret = -EINVAL; lc->log_kthread = kthread_run(log_writes_kthread, lc, "log-write"); - if (!lc->log_kthread) { + if (IS_ERR(lc->log_kthread)) { + ret = PTR_ERR(lc->log_kthread); ti->error = "Couldn't alloc kthread"; dm_put_device(ti, lc->dev); dm_put_device(ti, lc->logdev); diff --git a/kernel/drivers/md/dm-mpath.c b/kernel/drivers/md/dm-mpath.c index cfa29f574..5b2ef9660 100644 --- a/kernel/drivers/md/dm-mpath.c +++ b/kernel/drivers/md/dm-mpath.c @@ -1220,10 +1220,10 @@ static void activate_path(struct work_struct *work) { struct pgpath *pgpath = container_of(work, struct pgpath, activate_path.work); + struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev); - if (pgpath->is_active) - scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev), - pg_init_done, pgpath); + if (pgpath->is_active && !blk_queue_dying(q)) + scsi_dh_activate(q, pg_init_done, pgpath); else pg_init_done(pgpath, SCSI_DH_DEV_OFFLINED); } diff --git a/kernel/drivers/md/dm-raid1.c b/kernel/drivers/md/dm-raid1.c index f2a363a89..115bd3846 100644 --- a/kernel/drivers/md/dm-raid1.c +++ b/kernel/drivers/md/dm-raid1.c @@ -1288,6 +1288,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error) dm_bio_restore(bd, bio); bio_record->details.bi_bdev = NULL; + bio->bi_error = 0; queue_bio(ms, bio, rw); return DM_ENDIO_INCOMPLETE; diff --git a/kernel/drivers/md/dm-snap.c b/kernel/drivers/md/dm-snap.c index 61f184ad0..e108deebb 100644 --- a/kernel/drivers/md/dm-snap.c +++ b/kernel/drivers/md/dm-snap.c @@ -1106,6 +1106,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) int i; int r = -EINVAL; char *origin_path, *cow_path; + dev_t origin_dev, cow_dev; unsigned args_used, num_flush_bios = 1; fmode_t origin_mode = FMODE_READ; @@ -1136,11 +1137,19 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->error = "Cannot get origin device"; goto bad_origin; } + origin_dev = s->origin->bdev->bd_dev; cow_path = argv[0]; argv++; argc--; + cow_dev = dm_get_dev_t(cow_path); + if (cow_dev && cow_dev == origin_dev) { + ti->error = "COW device cannot be the same as origin device"; + r = -EINVAL; + goto bad_cow; + } + r = dm_get_device(ti, cow_path, dm_table_get_mode(ti->table), &s->cow); if (r) { ti->error = "Cannot get COW device"; diff --git a/kernel/drivers/md/dm-table.c b/kernel/drivers/md/dm-table.c index 061152a43..cb5d0daf5 100644 --- a/kernel/drivers/md/dm-table.c +++ b/kernel/drivers/md/dm-table.c @@ -365,6 +365,26 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, } /* + * Convert the path to a device + */ +dev_t dm_get_dev_t(const char *path) +{ + dev_t uninitialized_var(dev); + struct block_device *bdev; + + bdev = lookup_bdev(path); + if (IS_ERR(bdev)) + dev = name_to_dev_t(path); + else { + dev = bdev->bd_dev; + bdput(bdev); + } + + return dev; +} +EXPORT_SYMBOL_GPL(dm_get_dev_t); + +/* * Add a device to the list, or just increment the usage count if * it's already present. */ @@ -372,23 +392,15 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, struct dm_dev **result) { int r; - dev_t uninitialized_var(dev); + dev_t dev; struct dm_dev_internal *dd; struct dm_table *t = ti->table; - struct block_device *bdev; BUG_ON(!t); - /* convert the path to a device */ - bdev = lookup_bdev(path); - if (IS_ERR(bdev)) { - dev = name_to_dev_t(path); - if (!dev) - return -ENODEV; - } else { - dev = bdev->bd_dev; - bdput(bdev); - } + dev = dm_get_dev_t(path); + if (!dev) + return -ENODEV; dd = find_device(&t->devices, dev); if (!dd) { diff --git a/kernel/drivers/md/dm-thin-metadata.c b/kernel/drivers/md/dm-thin-metadata.c index c219a053c..911ada643 100644 --- a/kernel/drivers/md/dm-thin-metadata.c +++ b/kernel/drivers/md/dm-thin-metadata.c @@ -1943,5 +1943,8 @@ bool dm_pool_metadata_needs_check(struct dm_pool_metadata *pmd) void dm_pool_issue_prefetches(struct dm_pool_metadata *pmd) { - dm_tm_issue_prefetches(pmd->tm); + down_read(&pmd->root_lock); + if (!pmd->fail_io) + dm_tm_issue_prefetches(pmd->tm); + up_read(&pmd->root_lock); } diff --git a/kernel/drivers/md/dm.c b/kernel/drivers/md/dm.c index 4745d2f13..170a3c2d8 100644 --- a/kernel/drivers/md/dm.c +++ b/kernel/drivers/md/dm.c @@ -1109,12 +1109,8 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) * back into ->request_fn() could deadlock attempting to grab the * queue lock again. */ - if (run_queue) { - if (md->queue->mq_ops) - blk_mq_run_hw_queues(md->queue, true); - else - blk_run_queue_async(md->queue); - } + if (!md->queue->mq_ops && run_queue) + blk_run_queue_async(md->queue); /* * dm_put() must be at the end of this function. See the comment above @@ -1214,9 +1210,9 @@ static void dm_requeue_original_request(struct mapped_device *md, { int rw = rq_data_dir(rq); + rq_end_stats(md, rq); dm_unprep_request(rq); - rq_end_stats(md, rq); if (!rq->q->mq_ops) old_requeue_request(rq); else { @@ -1336,7 +1332,10 @@ static void dm_complete_request(struct request *rq, int error) struct dm_rq_target_io *tio = tio_from_request(rq); tio->error = error; - blk_complete_request(rq); + if (!rq->q->mq_ops) + blk_complete_request(rq); + else + blk_mq_complete_request(rq, error); } /* @@ -2261,8 +2260,6 @@ static void cleanup_mapped_device(struct mapped_device *md) if (md->bs) bioset_free(md->bs); - cleanup_srcu_struct(&md->io_barrier); - if (md->disk) { spin_lock(&_minor_lock); md->disk->private_data = NULL; @@ -2274,6 +2271,8 @@ static void cleanup_mapped_device(struct mapped_device *md) if (md->queue) blk_cleanup_queue(md->queue); + cleanup_srcu_struct(&md->io_barrier); + if (md->bdev) { bdput(md->bdev); md->bdev = NULL; @@ -2870,6 +2869,7 @@ EXPORT_SYMBOL_GPL(dm_device_name); static void __dm_destroy(struct mapped_device *md, bool wait) { + struct request_queue *q = dm_get_md_queue(md); struct dm_table *map; int srcu_idx; @@ -2880,6 +2880,10 @@ static void __dm_destroy(struct mapped_device *md, bool wait) set_bit(DMF_FREEING, &md->flags); spin_unlock(&_minor_lock); + spin_lock_irq(q->queue_lock); + queue_flag_set(QUEUE_FLAG_DYING, q); + spin_unlock_irq(q->queue_lock); + if (dm_request_based(md) && md->kworker_task) flush_kthread_worker(&md->kworker); @@ -3079,7 +3083,8 @@ static void unlock_fs(struct mapped_device *md) * Caller must hold md->suspend_lock */ static int __dm_suspend(struct mapped_device *md, struct dm_table *map, - unsigned suspend_flags, int interruptible) + unsigned suspend_flags, int interruptible, + int dmf_suspended_flag) { bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG; bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG; @@ -3146,6 +3151,8 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map, * to finish. */ r = dm_wait_for_completion(md, interruptible); + if (!r) + set_bit(dmf_suspended_flag, &md->flags); if (noflush) clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); @@ -3207,12 +3214,10 @@ retry: map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); - r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE); + r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE, DMF_SUSPENDED); if (r) goto out_unlock; - set_bit(DMF_SUSPENDED, &md->flags); - dm_table_postsuspend_targets(map); out_unlock: @@ -3245,10 +3250,11 @@ static int __dm_resume(struct mapped_device *md, struct dm_table *map) int dm_resume(struct mapped_device *md) { - int r = -EINVAL; + int r; struct dm_table *map = NULL; retry: + r = -EINVAL; mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING); if (!dm_suspended_md(md)) @@ -3272,8 +3278,6 @@ retry: goto out; clear_bit(DMF_SUSPENDED, &md->flags); - - r = 0; out: mutex_unlock(&md->suspend_lock); @@ -3306,9 +3310,8 @@ static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_fla * would require changing .presuspend to return an error -- avoid this * until there is a need for more elaborate variants of internal suspend. */ - (void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE); - - set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); + (void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE, + DMF_SUSPENDED_INTERNALLY); dm_table_postsuspend_targets(map); } diff --git a/kernel/drivers/md/md.c b/kernel/drivers/md/md.c index b1e1f6b95..eff554a12 100644 --- a/kernel/drivers/md/md.c +++ b/kernel/drivers/md/md.c @@ -293,6 +293,8 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio) * go away inside make_request */ sectors = bio_sectors(bio); + /* bio could be mergeable after passing to underlayer */ + bio->bi_rw &= ~REQ_NOMERGE; mddev->pers->make_request(mddev, bio); cpu = part_stat_lock(); @@ -6769,7 +6771,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, /* need to ensure recovery thread has run */ wait_event_interruptible_timeout(mddev->sb_wait, !test_bit(MD_RECOVERY_NEEDED, - &mddev->flags), + &mddev->recovery), msecs_to_jiffies(5000)); if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) { /* Need to flush page cache, and ensure no-one else opens @@ -7570,16 +7572,12 @@ EXPORT_SYMBOL(unregister_md_cluster_operations); int md_setup_cluster(struct mddev *mddev, int nodes) { - int err; - - err = request_module("md-cluster"); - if (err) { - pr_err("md-cluster module not found.\n"); - return -ENOENT; - } - + if (!md_cluster_ops) + request_module("md-cluster"); spin_lock(&pers_lock); + /* ensure module won't be unloaded */ if (!md_cluster_ops || !try_module_get(md_cluster_mod)) { + pr_err("can't find md-cluster module or get it's reference.\n"); spin_unlock(&pers_lock); return -ENOENT; } diff --git a/kernel/drivers/md/multipath.c b/kernel/drivers/md/multipath.c index 0a72ab6e6..dd483bb2e 100644 --- a/kernel/drivers/md/multipath.c +++ b/kernel/drivers/md/multipath.c @@ -129,7 +129,9 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio) } multipath = conf->multipaths + mp_bh->path; - mp_bh->bio = *bio; + bio_init(&mp_bh->bio); + __bio_clone_fast(&mp_bh->bio, bio); + mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset; mp_bh->bio.bi_bdev = multipath->rdev->bdev; mp_bh->bio.bi_rw |= REQ_FAILFAST_TRANSPORT; diff --git a/kernel/drivers/md/persistent-data/dm-space-map-metadata.c b/kernel/drivers/md/persistent-data/dm-space-map-metadata.c index 7e4400559..20557e2c6 100644 --- a/kernel/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/kernel/drivers/md/persistent-data/dm-space-map-metadata.c @@ -775,17 +775,15 @@ int dm_sm_metadata_create(struct dm_space_map *sm, memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); r = sm_ll_new_metadata(&smm->ll, tm); + if (!r) { + if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) + nr_blocks = DM_SM_METADATA_MAX_BLOCKS; + r = sm_ll_extend(&smm->ll, nr_blocks); + } + memcpy(&smm->sm, &ops, sizeof(smm->sm)); if (r) return r; - if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) - nr_blocks = DM_SM_METADATA_MAX_BLOCKS; - r = sm_ll_extend(&smm->ll, nr_blocks); - if (r) - return r; - - memcpy(&smm->sm, &ops, sizeof(smm->sm)); - /* * Now we need to update the newly created data structures with the * allocated blocks that they were built from. diff --git a/kernel/drivers/md/raid1.c b/kernel/drivers/md/raid1.c index c4b913409..515554c73 100644 --- a/kernel/drivers/md/raid1.c +++ b/kernel/drivers/md/raid1.c @@ -2274,6 +2274,7 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio) if (fail) { spin_lock_irq(&conf->device_lock); list_add(&r1_bio->retry_list, &conf->bio_end_io_list); + conf->nr_queued++; spin_unlock_irq(&conf->device_lock); md_wakeup_thread(conf->mddev->thread); } else { @@ -2391,8 +2392,10 @@ static void raid1d(struct md_thread *thread) LIST_HEAD(tmp); spin_lock_irqsave(&conf->device_lock, flags); if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) { - list_add(&tmp, &conf->bio_end_io_list); - list_del_init(&conf->bio_end_io_list); + while (!list_empty(&conf->bio_end_io_list)) { + list_move(conf->bio_end_io_list.prev, &tmp); + conf->nr_queued--; + } } spin_unlock_irqrestore(&conf->device_lock, flags); while (!list_empty(&tmp)) { diff --git a/kernel/drivers/md/raid10.c b/kernel/drivers/md/raid10.c index ce959b4ae..ebb0dd612 100644 --- a/kernel/drivers/md/raid10.c +++ b/kernel/drivers/md/raid10.c @@ -2664,6 +2664,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) if (fail) { spin_lock_irq(&conf->device_lock); list_add(&r10_bio->retry_list, &conf->bio_end_io_list); + conf->nr_queued++; spin_unlock_irq(&conf->device_lock); md_wakeup_thread(conf->mddev->thread); } else { @@ -2691,8 +2692,10 @@ static void raid10d(struct md_thread *thread) LIST_HEAD(tmp); spin_lock_irqsave(&conf->device_lock, flags); if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) { - list_add(&tmp, &conf->bio_end_io_list); - list_del_init(&conf->bio_end_io_list); + while (!list_empty(&conf->bio_end_io_list)) { + list_move(conf->bio_end_io_list.prev, &tmp); + conf->nr_queued--; + } } spin_unlock_irqrestore(&conf->device_lock, flags); while (!list_empty(&tmp)) { diff --git a/kernel/drivers/md/raid5.c b/kernel/drivers/md/raid5.c index adf72a9b6..16cf94bbb 100644 --- a/kernel/drivers/md/raid5.c +++ b/kernel/drivers/md/raid5.c @@ -340,8 +340,7 @@ static void release_inactive_stripe_list(struct r5conf *conf, int hash) { int size; - unsigned long do_wakeup = 0; - int i = 0; + bool do_wakeup = false; unsigned long flags; if (hash == NR_STRIPE_HASH_LOCKS) { @@ -362,19 +361,15 @@ static void release_inactive_stripe_list(struct r5conf *conf, !list_empty(list)) atomic_dec(&conf->empty_inactive_list_nr); list_splice_tail_init(list, conf->inactive_list + hash); - do_wakeup |= 1 << hash; + do_wakeup = true; spin_unlock_irqrestore(conf->hash_locks + hash, flags); } size--; hash--; } - for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) { - if (do_wakeup & (1 << i)) - wake_up(&conf->wait_for_stripe[i]); - } - if (do_wakeup) { + wake_up(&conf->wait_for_stripe); if (atomic_read(&conf->active_stripes) == 0) wake_up(&conf->wait_for_quiescent); if (conf->retry_read_aligned) @@ -687,15 +682,14 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector, if (!sh) { set_bit(R5_INACTIVE_BLOCKED, &conf->cache_state); - wait_event_exclusive_cmd( - conf->wait_for_stripe[hash], + wait_event_lock_irq( + conf->wait_for_stripe, !list_empty(conf->inactive_list + hash) && (atomic_read(&conf->active_stripes) < (conf->max_nr_stripes * 3 / 4) || !test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)), - spin_unlock_irq(conf->hash_locks + hash), - spin_lock_irq(conf->hash_locks + hash)); + *(conf->hash_locks + hash)); clear_bit(R5_INACTIVE_BLOCKED, &conf->cache_state); } else { @@ -720,9 +714,6 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector, } } while (sh == NULL); - if (!list_empty(conf->inactive_list + hash)) - wake_up(&conf->wait_for_stripe[hash]); - spin_unlock_irq(conf->hash_locks + hash); return sh; } @@ -2093,6 +2084,14 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors) unsigned long cpu; int err = 0; + /* + * Never shrink. And mddev_suspend() could deadlock if this is called + * from raid5d. In that case, scribble_disks and scribble_sectors + * should equal to new_disks and new_sectors + */ + if (conf->scribble_disks >= new_disks && + conf->scribble_sectors >= new_sectors) + return 0; mddev_suspend(conf->mddev); get_online_cpus(); for_each_present_cpu(cpu) { @@ -2114,6 +2113,10 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors) } put_online_cpus(); mddev_resume(conf->mddev); + if (!err) { + conf->scribble_disks = new_disks; + conf->scribble_sectors = new_sectors; + } return err; } @@ -2194,7 +2197,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) cnt = 0; list_for_each_entry(nsh, &newstripes, lru) { lock_device_hash_lock(conf, hash); - wait_event_exclusive_cmd(conf->wait_for_stripe[hash], + wait_event_cmd(conf->wait_for_stripe, !list_empty(conf->inactive_list + hash), unlock_device_hash_lock(conf, hash), lock_device_hash_lock(conf, hash)); @@ -4240,7 +4243,6 @@ static void break_stripe_batch_list(struct stripe_head *head_sh, WARN_ON_ONCE(sh->state & ((1 << STRIPE_ACTIVE) | (1 << STRIPE_SYNCING) | (1 << STRIPE_REPLACED) | - (1 << STRIPE_PREREAD_ACTIVE) | (1 << STRIPE_DELAYED) | (1 << STRIPE_BIT_DELAY) | (1 << STRIPE_FULL_WRITE) | @@ -4255,6 +4257,7 @@ static void break_stripe_batch_list(struct stripe_head *head_sh, (1 << STRIPE_REPLACED))); set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS | + (1 << STRIPE_PREREAD_ACTIVE) | (1 << STRIPE_DEGRADED)), head_sh->state & (1 << STRIPE_INSYNC)); @@ -6417,6 +6420,12 @@ static int raid5_alloc_percpu(struct r5conf *conf) } put_online_cpus(); + if (!err) { + conf->scribble_disks = max(conf->raid_disks, + conf->previous_raid_disks); + conf->scribble_sectors = max(conf->chunk_sectors, + conf->prev_chunk_sectors); + } return err; } @@ -6507,9 +6516,7 @@ static struct r5conf *setup_conf(struct mddev *mddev) seqcount_init(&conf->gen_lock); mutex_init(&conf->cache_size_mutex); init_waitqueue_head(&conf->wait_for_quiescent); - for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) { - init_waitqueue_head(&conf->wait_for_stripe[i]); - } + init_waitqueue_head(&conf->wait_for_stripe); init_waitqueue_head(&conf->wait_for_overlap); INIT_LIST_HEAD(&conf->handle_list); INIT_LIST_HEAD(&conf->hold_list); @@ -6976,6 +6983,15 @@ static int run(struct mddev *mddev) stripe = (stripe | (stripe-1)) + 1; mddev->queue->limits.discard_alignment = stripe; mddev->queue->limits.discard_granularity = stripe; + + /* + * We use 16-bit counter of active stripes in bi_phys_segments + * (minus one for over-loaded initialization) + */ + blk_queue_max_hw_sectors(mddev->queue, 0xfffe * STRIPE_SECTORS); + blk_queue_max_discard_sectors(mddev->queue, + 0xfffe * STRIPE_SECTORS); + /* * unaligned part of discard request will be ignored, so can't * guarantee discard_zeroes_data @@ -7018,8 +7034,8 @@ static int run(struct mddev *mddev) } if (discard_supported && - mddev->queue->limits.max_discard_sectors >= stripe && - mddev->queue->limits.discard_granularity >= stripe) + mddev->queue->limits.max_discard_sectors >= (stripe >> 9) && + mddev->queue->limits.discard_granularity >= stripe) queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); else diff --git a/kernel/drivers/md/raid5.h b/kernel/drivers/md/raid5.h index 3f0c2e2a3..efe91887e 100644 --- a/kernel/drivers/md/raid5.h +++ b/kernel/drivers/md/raid5.h @@ -511,6 +511,8 @@ struct r5conf { * conversions */ } __percpu *percpu; + int scribble_disks; + int scribble_sectors; #ifdef CONFIG_HOTPLUG_CPU struct notifier_block cpu_notify; #endif @@ -523,7 +525,7 @@ struct r5conf { atomic_t empty_inactive_list_nr; struct llist_head released_stripes; wait_queue_head_t wait_for_quiescent; - wait_queue_head_t wait_for_stripe[NR_STRIPE_HASH_LOCKS]; + wait_queue_head_t wait_for_stripe; wait_queue_head_t wait_for_overlap; unsigned long cache_state; #define R5_INACTIVE_BLOCKED 1 /* release of inactive stripes blocked, |