diff options
Diffstat (limited to 'kernel/drivers/md')
-rw-r--r-- | kernel/drivers/md/bitmap.c | 15 | ||||
-rw-r--r-- | kernel/drivers/md/dm-cache-policy-cleaner.c | 3 | ||||
-rw-r--r-- | kernel/drivers/md/dm-cache-policy-internal.h | 5 | ||||
-rw-r--r-- | kernel/drivers/md/dm-cache-policy-mq.c | 41 | ||||
-rw-r--r-- | kernel/drivers/md/dm-cache-policy.h | 15 | ||||
-rw-r--r-- | kernel/drivers/md/dm-cache-target.c | 58 | ||||
-rw-r--r-- | kernel/drivers/md/dm-stats.c | 2 | ||||
-rw-r--r-- | kernel/drivers/md/dm-thin-metadata.c | 4 | ||||
-rw-r--r-- | kernel/drivers/md/dm-thin.c | 13 | ||||
-rw-r--r-- | kernel/drivers/md/dm.c | 35 | ||||
-rw-r--r-- | kernel/drivers/md/md.c | 11 | ||||
-rw-r--r-- | kernel/drivers/md/persistent-data/dm-btree-remove.c | 6 | ||||
-rw-r--r-- | kernel/drivers/md/persistent-data/dm-btree.c | 2 | ||||
-rw-r--r-- | kernel/drivers/md/persistent-data/dm-space-map-metadata.c | 50 | ||||
-rw-r--r-- | kernel/drivers/md/raid1.c | 12 |
15 files changed, 179 insertions, 93 deletions
diff --git a/kernel/drivers/md/bitmap.c b/kernel/drivers/md/bitmap.c index 135a0907e..c90118e90 100644 --- a/kernel/drivers/md/bitmap.c +++ b/kernel/drivers/md/bitmap.c @@ -494,7 +494,7 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap) bitmap_super_t *sb; unsigned long chunksize, daemon_sleep, write_behind; - bitmap->storage.sb_page = alloc_page(GFP_KERNEL); + bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (bitmap->storage.sb_page == NULL) return -ENOMEM; bitmap->storage.sb_page->index = 0; @@ -541,6 +541,7 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap) sb->state = cpu_to_le32(bitmap->flags); bitmap->events_cleared = bitmap->mddev->events; sb->events_cleared = cpu_to_le64(bitmap->mddev->events); + bitmap->mddev->bitmap_info.nodes = 0; kunmap_atomic(sb); @@ -611,8 +612,16 @@ re_read: daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ; write_behind = le32_to_cpu(sb->write_behind); sectors_reserved = le32_to_cpu(sb->sectors_reserved); - nodes = le32_to_cpu(sb->nodes); - strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64); + /* XXX: This is a hack to ensure that we don't use clustering + * in case: + * - dm-raid is in use and + * - the nodes written in bitmap_sb is erroneous. + */ + if (!bitmap->mddev->sync_super) { + nodes = le32_to_cpu(sb->nodes); + strlcpy(bitmap->mddev->bitmap_info.cluster_name, + sb->cluster_name, 64); + } /* verify that the bitmap-specific fields are valid */ if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) diff --git a/kernel/drivers/md/dm-cache-policy-cleaner.c b/kernel/drivers/md/dm-cache-policy-cleaner.c index b04d1f904..004e463c9 100644 --- a/kernel/drivers/md/dm-cache-policy-cleaner.c +++ b/kernel/drivers/md/dm-cache-policy-cleaner.c @@ -171,7 +171,8 @@ static void remove_cache_hash_entry(struct wb_cache_entry *e) /* Public interface (see dm-cache-policy.h */ static int wb_map(struct dm_cache_policy *pe, dm_oblock_t oblock, bool can_block, bool can_migrate, bool discarded_oblock, - struct bio *bio, struct policy_result *result) + struct bio *bio, struct policy_locker *locker, + struct policy_result *result) { struct policy *p = to_policy(pe); struct wb_cache_entry *e; diff --git a/kernel/drivers/md/dm-cache-policy-internal.h b/kernel/drivers/md/dm-cache-policy-internal.h index 2256a1f24..c198e6def 100644 --- a/kernel/drivers/md/dm-cache-policy-internal.h +++ b/kernel/drivers/md/dm-cache-policy-internal.h @@ -16,9 +16,10 @@ */ static inline int policy_map(struct dm_cache_policy *p, dm_oblock_t oblock, bool can_block, bool can_migrate, bool discarded_oblock, - struct bio *bio, struct policy_result *result) + struct bio *bio, struct policy_locker *locker, + struct policy_result *result) { - return p->map(p, oblock, can_block, can_migrate, discarded_oblock, bio, result); + return p->map(p, oblock, can_block, can_migrate, discarded_oblock, bio, locker, result); } static inline int policy_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock) diff --git a/kernel/drivers/md/dm-cache-policy-mq.c b/kernel/drivers/md/dm-cache-policy-mq.c index 3ddd11623..515d44bf2 100644 --- a/kernel/drivers/md/dm-cache-policy-mq.c +++ b/kernel/drivers/md/dm-cache-policy-mq.c @@ -693,9 +693,10 @@ static void requeue(struct mq_policy *mq, struct entry *e) * - set the hit count to a hard coded value other than 1, eg, is it better * if it goes in at level 2? */ -static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock) +static int demote_cblock(struct mq_policy *mq, + struct policy_locker *locker, dm_oblock_t *oblock) { - struct entry *demoted = pop(mq, &mq->cache_clean); + struct entry *demoted = peek(&mq->cache_clean); if (!demoted) /* @@ -707,6 +708,13 @@ static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock) */ return -ENOSPC; + if (locker->fn(locker, demoted->oblock)) + /* + * We couldn't lock the demoted block. + */ + return -EBUSY; + + del(mq, demoted); *oblock = demoted->oblock; free_entry(&mq->cache_pool, demoted); @@ -795,6 +803,7 @@ static int cache_entry_found(struct mq_policy *mq, * finding which cache block to use. */ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e, + struct policy_locker *locker, struct policy_result *result) { int r; @@ -803,11 +812,12 @@ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e, /* Ensure there's a free cblock in the cache */ if (epool_empty(&mq->cache_pool)) { result->op = POLICY_REPLACE; - r = demote_cblock(mq, &result->old_oblock); + r = demote_cblock(mq, locker, &result->old_oblock); if (r) { result->op = POLICY_MISS; return 0; } + } else result->op = POLICY_NEW; @@ -829,7 +839,8 @@ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e, static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e, bool can_migrate, bool discarded_oblock, - int data_dir, struct policy_result *result) + int data_dir, struct policy_locker *locker, + struct policy_result *result) { int r = 0; @@ -842,7 +853,7 @@ static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e, else { requeue(mq, e); - r = pre_cache_to_cache(mq, e, result); + r = pre_cache_to_cache(mq, e, locker, result); } return r; @@ -872,6 +883,7 @@ static void insert_in_pre_cache(struct mq_policy *mq, } static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock, + struct policy_locker *locker, struct policy_result *result) { int r; @@ -879,7 +891,7 @@ static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock, if (epool_empty(&mq->cache_pool)) { result->op = POLICY_REPLACE; - r = demote_cblock(mq, &result->old_oblock); + r = demote_cblock(mq, locker, &result->old_oblock); if (unlikely(r)) { result->op = POLICY_MISS; insert_in_pre_cache(mq, oblock); @@ -907,11 +919,12 @@ static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock, static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock, bool can_migrate, bool discarded_oblock, - int data_dir, struct policy_result *result) + int data_dir, struct policy_locker *locker, + struct policy_result *result) { if (adjusted_promote_threshold(mq, discarded_oblock, data_dir) <= 1) { if (can_migrate) - insert_in_cache(mq, oblock, result); + insert_in_cache(mq, oblock, locker, result); else return -EWOULDBLOCK; } else { @@ -928,7 +941,8 @@ static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock, */ static int map(struct mq_policy *mq, dm_oblock_t oblock, bool can_migrate, bool discarded_oblock, - int data_dir, struct policy_result *result) + int data_dir, struct policy_locker *locker, + struct policy_result *result) { int r = 0; struct entry *e = hash_lookup(mq, oblock); @@ -942,11 +956,11 @@ static int map(struct mq_policy *mq, dm_oblock_t oblock, else if (e) r = pre_cache_entry_found(mq, e, can_migrate, discarded_oblock, - data_dir, result); + data_dir, locker, result); else r = no_entry_found(mq, oblock, can_migrate, discarded_oblock, - data_dir, result); + data_dir, locker, result); if (r == -EWOULDBLOCK) result->op = POLICY_MISS; @@ -1012,7 +1026,8 @@ static void copy_tick(struct mq_policy *mq) static int mq_map(struct dm_cache_policy *p, dm_oblock_t oblock, bool can_block, bool can_migrate, bool discarded_oblock, - struct bio *bio, struct policy_result *result) + struct bio *bio, struct policy_locker *locker, + struct policy_result *result) { int r; struct mq_policy *mq = to_mq_policy(p); @@ -1028,7 +1043,7 @@ static int mq_map(struct dm_cache_policy *p, dm_oblock_t oblock, iot_examine_bio(&mq->tracker, bio); r = map(mq, oblock, can_migrate, discarded_oblock, - bio_data_dir(bio), result); + bio_data_dir(bio), locker, result); mutex_unlock(&mq->lock); diff --git a/kernel/drivers/md/dm-cache-policy.h b/kernel/drivers/md/dm-cache-policy.h index f50fe360c..5524e21e4 100644 --- a/kernel/drivers/md/dm-cache-policy.h +++ b/kernel/drivers/md/dm-cache-policy.h @@ -70,6 +70,18 @@ enum policy_operation { }; /* + * When issuing a POLICY_REPLACE the policy needs to make a callback to + * lock the block being demoted. This doesn't need to occur during a + * writeback operation since the block remains in the cache. + */ +struct policy_locker; +typedef int (*policy_lock_fn)(struct policy_locker *l, dm_oblock_t oblock); + +struct policy_locker { + policy_lock_fn fn; +}; + +/* * This is the instruction passed back to the core target. */ struct policy_result { @@ -122,7 +134,8 @@ struct dm_cache_policy { */ int (*map)(struct dm_cache_policy *p, dm_oblock_t oblock, bool can_block, bool can_migrate, bool discarded_oblock, - struct bio *bio, struct policy_result *result); + struct bio *bio, struct policy_locker *locker, + struct policy_result *result); /* * Sometimes we want to see if a block is in the cache, without diff --git a/kernel/drivers/md/dm-cache-target.c b/kernel/drivers/md/dm-cache-target.c index 7755af351..e049becaa 100644 --- a/kernel/drivers/md/dm-cache-target.c +++ b/kernel/drivers/md/dm-cache-target.c @@ -1445,16 +1445,43 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio) &cache->stats.read_miss : &cache->stats.write_miss); } +/*----------------------------------------------------------------*/ + +struct old_oblock_lock { + struct policy_locker locker; + struct cache *cache; + struct prealloc *structs; + struct dm_bio_prison_cell *cell; +}; + +static int null_locker(struct policy_locker *locker, dm_oblock_t b) +{ + /* This should never be called */ + BUG(); + return 0; +} + +static int cell_locker(struct policy_locker *locker, dm_oblock_t b) +{ + struct old_oblock_lock *l = container_of(locker, struct old_oblock_lock, locker); + struct dm_bio_prison_cell *cell_prealloc = prealloc_get_cell(l->structs); + + return bio_detain(l->cache, b, NULL, cell_prealloc, + (cell_free_fn) prealloc_put_cell, + l->structs, &l->cell); +} + static void process_bio(struct cache *cache, struct prealloc *structs, struct bio *bio) { int r; bool release_cell = true; dm_oblock_t block = get_bio_block(cache, bio); - struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell; + struct dm_bio_prison_cell *cell_prealloc, *new_ocell; struct policy_result lookup_result; bool passthrough = passthrough_mode(&cache->features); bool discarded_block, can_migrate; + struct old_oblock_lock ool; /* * Check to see if that block is currently migrating. @@ -1469,8 +1496,12 @@ static void process_bio(struct cache *cache, struct prealloc *structs, discarded_block = is_discarded_oblock(cache, block); can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache)); + ool.locker.fn = cell_locker; + ool.cache = cache; + ool.structs = structs; + ool.cell = NULL; r = policy_map(cache->policy, block, true, can_migrate, discarded_block, - bio, &lookup_result); + bio, &ool.locker, &lookup_result); if (r == -EWOULDBLOCK) /* migration has been denied */ @@ -1527,27 +1558,11 @@ static void process_bio(struct cache *cache, struct prealloc *structs, break; case POLICY_REPLACE: - cell_prealloc = prealloc_get_cell(structs); - r = bio_detain(cache, lookup_result.old_oblock, bio, cell_prealloc, - (cell_free_fn) prealloc_put_cell, - structs, &old_ocell); - if (r > 0) { - /* - * We have to be careful to avoid lock inversion of - * the cells. So we back off, and wait for the - * old_ocell to become free. - */ - policy_force_mapping(cache->policy, block, - lookup_result.old_oblock); - atomic_inc(&cache->stats.cache_cell_clash); - break; - } atomic_inc(&cache->stats.demotion); atomic_inc(&cache->stats.promotion); - demote_then_promote(cache, structs, lookup_result.old_oblock, block, lookup_result.cblock, - old_ocell, new_ocell); + ool.cell, new_ocell); release_cell = false; break; @@ -2595,6 +2610,9 @@ static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_priso bool discarded_block; struct policy_result lookup_result; struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size); + struct old_oblock_lock ool; + + ool.locker.fn = null_locker; if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) { /* @@ -2633,7 +2651,7 @@ static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_priso discarded_block = is_discarded_oblock(cache, block); r = policy_map(cache->policy, block, false, can_migrate, discarded_block, - bio, &lookup_result); + bio, &ool.locker, &lookup_result); if (r == -EWOULDBLOCK) { cell_defer(cache, *cell, true); return DM_MAPIO_SUBMITTED; diff --git a/kernel/drivers/md/dm-stats.c b/kernel/drivers/md/dm-stats.c index f478a4c96..419bdd4fc 100644 --- a/kernel/drivers/md/dm-stats.c +++ b/kernel/drivers/md/dm-stats.c @@ -795,6 +795,8 @@ static int message_stats_create(struct mapped_device *md, return -EINVAL; if (sscanf(argv[2], "/%u%c", &divisor, &dummy) == 1) { + if (!divisor) + return -EINVAL; step = end - start; if (do_div(step, divisor)) step++; diff --git a/kernel/drivers/md/dm-thin-metadata.c b/kernel/drivers/md/dm-thin-metadata.c index 79f694120..cde1d6749 100644 --- a/kernel/drivers/md/dm-thin-metadata.c +++ b/kernel/drivers/md/dm-thin-metadata.c @@ -1295,8 +1295,8 @@ static int __release_metadata_snap(struct dm_pool_metadata *pmd) return r; disk_super = dm_block_data(copy); - dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->data_mapping_root)); - dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->device_details_root)); + dm_btree_del(&pmd->info, le64_to_cpu(disk_super->data_mapping_root)); + dm_btree_del(&pmd->details_info, le64_to_cpu(disk_super->device_details_root)); dm_sm_dec_block(pmd->metadata_sm, held_root); return dm_tm_unlock(pmd->tm, copy); diff --git a/kernel/drivers/md/dm-thin.c b/kernel/drivers/md/dm-thin.c index 921aafd12..e22e6c892 100644 --- a/kernel/drivers/md/dm-thin.c +++ b/kernel/drivers/md/dm-thin.c @@ -18,6 +18,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/slab.h> +#include <linux/vmalloc.h> #include <linux/sort.h> #include <linux/rbtree.h> @@ -260,7 +261,7 @@ struct pool { process_mapping_fn process_prepared_mapping; process_mapping_fn process_prepared_discard; - struct dm_bio_prison_cell *cell_sort_array[CELL_SORT_ARRAY_SIZE]; + struct dm_bio_prison_cell **cell_sort_array; }; static enum pool_mode get_pool_mode(struct pool *pool); @@ -2499,6 +2500,7 @@ static void __pool_destroy(struct pool *pool) { __pool_table_remove(pool); + vfree(pool->cell_sort_array); if (dm_pool_metadata_close(pool->pmd) < 0) DMWARN("%s: dm_pool_metadata_close() failed.", __func__); @@ -2611,6 +2613,13 @@ static struct pool *pool_create(struct mapped_device *pool_md, goto bad_mapping_pool; } + pool->cell_sort_array = vmalloc(sizeof(*pool->cell_sort_array) * CELL_SORT_ARRAY_SIZE); + if (!pool->cell_sort_array) { + *error = "Error allocating cell sort array"; + err_p = ERR_PTR(-ENOMEM); + goto bad_sort_array; + } + pool->ref_count = 1; pool->last_commit_jiffies = jiffies; pool->pool_md = pool_md; @@ -2619,6 +2628,8 @@ static struct pool *pool_create(struct mapped_device *pool_md, return pool; +bad_sort_array: + mempool_destroy(pool->mapping_pool); bad_mapping_pool: dm_deferred_set_destroy(pool->all_io_ds); bad_all_io_ds: diff --git a/kernel/drivers/md/dm.c b/kernel/drivers/md/dm.c index 927523984..98347e2cf 100644 --- a/kernel/drivers/md/dm.c +++ b/kernel/drivers/md/dm.c @@ -1053,13 +1053,10 @@ static struct dm_rq_target_io *tio_from_request(struct request *rq) */ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) { - int nr_requests_pending; - atomic_dec(&md->pending[rw]); /* nudge anyone waiting on suspend queue */ - nr_requests_pending = md_in_flight(md); - if (!nr_requests_pending) + if (!md_in_flight(md)) wake_up(&md->wait); /* @@ -1071,8 +1068,7 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) if (run_queue) { if (md->queue->mq_ops) blk_mq_run_hw_queues(md->queue, true); - else if (!nr_requests_pending || - (nr_requests_pending >= md->queue->nr_congestion_on)) + else blk_run_queue_async(md->queue); } @@ -1723,7 +1719,8 @@ static int dm_merge_bvec(struct request_queue *q, struct mapped_device *md = q->queuedata; struct dm_table *map = dm_get_live_table_fast(md); struct dm_target *ti; - sector_t max_sectors, max_size = 0; + sector_t max_sectors; + int max_size = 0; if (unlikely(!map)) goto out; @@ -1736,18 +1733,10 @@ static int dm_merge_bvec(struct request_queue *q, * Find maximum amount of I/O that won't need splitting */ max_sectors = min(max_io_len(bvm->bi_sector, ti), - (sector_t) queue_max_sectors(q)); + (sector_t) BIO_MAX_SECTORS); max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size; - - /* - * FIXME: this stop-gap fix _must_ be cleaned up (by passing a sector_t - * to the targets' merge function since it holds sectors not bytes). - * Just doing this as an interim fix for stable@ because the more - * comprehensive cleanup of switching to sector_t will impact every - * DM target that implements a ->merge hook. - */ - if (max_size > INT_MAX) - max_size = INT_MAX; + if (max_size < 0) + max_size = 0; /* * merge_bvec_fn() returns number of bytes @@ -1755,13 +1744,13 @@ static int dm_merge_bvec(struct request_queue *q, * max is precomputed maximal io size */ if (max_size && ti->type->merge) - max_size = ti->type->merge(ti, bvm, biovec, (int) max_size); + max_size = ti->type->merge(ti, bvm, biovec, max_size); /* * If the target doesn't support merge method and some of the devices - * provided their merge_bvec method (we know this by looking for the - * max_hw_sectors that dm_set_device_limits may set), then we can't - * allow bios with multiple vector entries. So always set max_size - * to 0, and the code below allows just one page. + * provided their merge_bvec method (we know this by looking at + * queue_max_hw_sectors), then we can't allow bios with multiple vector + * entries. So always set max_size to 0, and the code below allows + * just one page. */ else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9) max_size = 0; diff --git a/kernel/drivers/md/md.c b/kernel/drivers/md/md.c index 4dbed4a67..e4621511d 100644 --- a/kernel/drivers/md/md.c +++ b/kernel/drivers/md/md.c @@ -4005,8 +4005,10 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len) else rdev = md_import_device(dev, -1, -1); - if (IS_ERR(rdev)) + if (IS_ERR(rdev)) { + mddev_unlock(mddev); return PTR_ERR(rdev); + } err = bind_rdev_to_array(rdev, mddev); out: if (err) @@ -5159,6 +5161,7 @@ int md_run(struct mddev *mddev) mddev_detach(mddev); if (mddev->private) pers->free(mddev, mddev->private); + mddev->private = NULL; module_put(pers->owner); bitmap_destroy(mddev); return err; @@ -5294,6 +5297,7 @@ static void md_clean(struct mddev *mddev) mddev->changed = 0; mddev->degraded = 0; mddev->safemode = 0; + mddev->private = NULL; mddev->merge_check_needed = 0; mddev->bitmap_info.offset = 0; mddev->bitmap_info.default_offset = 0; @@ -5366,6 +5370,7 @@ static void __md_stop(struct mddev *mddev) mddev->pers = NULL; spin_unlock(&mddev->lock); pers->free(mddev, mddev->private); + mddev->private = NULL; if (pers->sync_request && mddev->to_remove == NULL) mddev->to_remove = &md_redundancy_group; module_put(pers->owner); @@ -5735,7 +5740,7 @@ static int get_bitmap_file(struct mddev *mddev, void __user * arg) char *ptr; int err; - file = kmalloc(sizeof(*file), GFP_NOIO); + file = kzalloc(sizeof(*file), GFP_NOIO); if (!file) return -ENOMEM; @@ -6375,7 +6380,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) mddev->ctime != info->ctime || mddev->level != info->level || /* mddev->layout != info->layout || */ - !mddev->persistent != info->not_persistent|| + mddev->persistent != !info->not_persistent || mddev->chunk_sectors != info->chunk_size >> 9 || /* ignore bottom 8 bits of state, and allow SB_BITMAP_PRESENT to change */ ((state^info->state) & 0xfffffe00) diff --git a/kernel/drivers/md/persistent-data/dm-btree-remove.c b/kernel/drivers/md/persistent-data/dm-btree-remove.c index b88757cd0..a03178e91 100644 --- a/kernel/drivers/md/persistent-data/dm-btree-remove.c +++ b/kernel/drivers/md/persistent-data/dm-btree-remove.c @@ -309,8 +309,8 @@ static void redistribute3(struct dm_btree_info *info, struct btree_node *parent, if (s < 0 && nr_center < -s) { /* not enough in central node */ - shift(left, center, nr_center); - s = nr_center - target; + shift(left, center, -nr_center); + s += nr_center; shift(left, right, s); nr_right += s; } else @@ -323,7 +323,7 @@ static void redistribute3(struct dm_btree_info *info, struct btree_node *parent, if (s > 0 && nr_center < s) { /* not enough in central node */ shift(center, right, nr_center); - s = target - nr_center; + s -= nr_center; shift(left, right, s); nr_left -= s; } else diff --git a/kernel/drivers/md/persistent-data/dm-btree.c b/kernel/drivers/md/persistent-data/dm-btree.c index 200ac12a1..fdd3793e2 100644 --- a/kernel/drivers/md/persistent-data/dm-btree.c +++ b/kernel/drivers/md/persistent-data/dm-btree.c @@ -255,7 +255,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root) int r; struct del_stack *s; - s = kmalloc(sizeof(*s), GFP_KERNEL); + s = kmalloc(sizeof(*s), GFP_NOIO); if (!s) return -ENOMEM; s->info = info; diff --git a/kernel/drivers/md/persistent-data/dm-space-map-metadata.c b/kernel/drivers/md/persistent-data/dm-space-map-metadata.c index e8a904298..53091295f 100644 --- a/kernel/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/kernel/drivers/md/persistent-data/dm-space-map-metadata.c @@ -204,6 +204,27 @@ static void in(struct sm_metadata *smm) smm->recursion_count++; } +static int apply_bops(struct sm_metadata *smm) +{ + int r = 0; + + while (!brb_empty(&smm->uncommitted)) { + struct block_op bop; + + r = brb_pop(&smm->uncommitted, &bop); + if (r) { + DMERR("bug in bop ring buffer"); + break; + } + + r = commit_bop(smm, &bop); + if (r) + break; + } + + return r; +} + static int out(struct sm_metadata *smm) { int r = 0; @@ -216,21 +237,8 @@ static int out(struct sm_metadata *smm) return -ENOMEM; } - if (smm->recursion_count == 1) { - while (!brb_empty(&smm->uncommitted)) { - struct block_op bop; - - r = brb_pop(&smm->uncommitted, &bop); - if (r) { - DMERR("bug in bop ring buffer"); - break; - } - - r = commit_bop(smm, &bop); - if (r) - break; - } - } + if (smm->recursion_count == 1) + apply_bops(smm); smm->recursion_count--; @@ -704,6 +712,12 @@ static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) } old_len = smm->begin; + r = apply_bops(smm); + if (r) { + DMERR("%s: apply_bops failed", __func__); + goto out; + } + r = sm_ll_commit(&smm->ll); if (r) goto out; @@ -773,6 +787,12 @@ int dm_sm_metadata_create(struct dm_space_map *sm, if (r) return r; + r = apply_bops(smm); + if (r) { + DMERR("%s: apply_bops failed", __func__); + return r; + } + return sm_metadata_commit(sm); } diff --git a/kernel/drivers/md/raid1.c b/kernel/drivers/md/raid1.c index 9157a29c8..5ce3cd5c4 100644 --- a/kernel/drivers/md/raid1.c +++ b/kernel/drivers/md/raid1.c @@ -336,7 +336,7 @@ static void raid1_end_read_request(struct bio *bio, int error) spin_lock_irqsave(&conf->device_lock, flags); if (r1_bio->mddev->degraded == conf->raid_disks || (r1_bio->mddev->degraded == conf->raid_disks-1 && - !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags))) + test_bit(In_sync, &conf->mirrors[mirror].rdev->flags))) uptodate = 1; spin_unlock_irqrestore(&conf->device_lock, flags); } @@ -1475,6 +1475,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) { char b[BDEVNAME_SIZE]; struct r1conf *conf = mddev->private; + unsigned long flags; /* * If it is not operational, then we have already marked it as dead @@ -1494,14 +1495,13 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) return; } set_bit(Blocked, &rdev->flags); + spin_lock_irqsave(&conf->device_lock, flags); if (test_and_clear_bit(In_sync, &rdev->flags)) { - unsigned long flags; - spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded++; set_bit(Faulty, &rdev->flags); - spin_unlock_irqrestore(&conf->device_lock, flags); } else set_bit(Faulty, &rdev->flags); + spin_unlock_irqrestore(&conf->device_lock, flags); /* * if recovery is running, make sure it aborts. */ @@ -1567,7 +1567,10 @@ static int raid1_spare_active(struct mddev *mddev) * Find all failed disks within the RAID1 configuration * and mark them readable. * Called under mddev lock, so rcu protection not needed. + * device_lock used to avoid races with raid1_end_read_request + * which expects 'In_sync' flags and ->degraded to be consistent. */ + spin_lock_irqsave(&conf->device_lock, flags); for (i = 0; i < conf->raid_disks; i++) { struct md_rdev *rdev = conf->mirrors[i].rdev; struct md_rdev *repl = conf->mirrors[conf->raid_disks + i].rdev; @@ -1598,7 +1601,6 @@ static int raid1_spare_active(struct mddev *mddev) sysfs_notify_dirent_safe(rdev->sysfs_state); } } - spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded -= count; spin_unlock_irqrestore(&conf->device_lock, flags); |