summaryrefslogtreecommitdiffstats
path: root/kernel/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/drivers/md')
-rw-r--r--kernel/drivers/md/bitmap.c15
-rw-r--r--kernel/drivers/md/dm-cache-policy-cleaner.c3
-rw-r--r--kernel/drivers/md/dm-cache-policy-internal.h5
-rw-r--r--kernel/drivers/md/dm-cache-policy-mq.c41
-rw-r--r--kernel/drivers/md/dm-cache-policy.h15
-rw-r--r--kernel/drivers/md/dm-cache-target.c58
-rw-r--r--kernel/drivers/md/dm-stats.c2
-rw-r--r--kernel/drivers/md/dm-thin-metadata.c4
-rw-r--r--kernel/drivers/md/dm-thin.c13
-rw-r--r--kernel/drivers/md/dm.c35
-rw-r--r--kernel/drivers/md/md.c11
-rw-r--r--kernel/drivers/md/persistent-data/dm-btree-remove.c6
-rw-r--r--kernel/drivers/md/persistent-data/dm-btree.c2
-rw-r--r--kernel/drivers/md/persistent-data/dm-space-map-metadata.c50
-rw-r--r--kernel/drivers/md/raid1.c12
15 files changed, 179 insertions, 93 deletions
diff --git a/kernel/drivers/md/bitmap.c b/kernel/drivers/md/bitmap.c
index 135a0907e..c90118e90 100644
--- a/kernel/drivers/md/bitmap.c
+++ b/kernel/drivers/md/bitmap.c
@@ -494,7 +494,7 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap)
bitmap_super_t *sb;
unsigned long chunksize, daemon_sleep, write_behind;
- bitmap->storage.sb_page = alloc_page(GFP_KERNEL);
+ bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (bitmap->storage.sb_page == NULL)
return -ENOMEM;
bitmap->storage.sb_page->index = 0;
@@ -541,6 +541,7 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap)
sb->state = cpu_to_le32(bitmap->flags);
bitmap->events_cleared = bitmap->mddev->events;
sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
+ bitmap->mddev->bitmap_info.nodes = 0;
kunmap_atomic(sb);
@@ -611,8 +612,16 @@ re_read:
daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
write_behind = le32_to_cpu(sb->write_behind);
sectors_reserved = le32_to_cpu(sb->sectors_reserved);
- nodes = le32_to_cpu(sb->nodes);
- strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
+ /* XXX: This is a hack to ensure that we don't use clustering
+ * in case:
+ * - dm-raid is in use and
+ * - the nodes written in bitmap_sb is erroneous.
+ */
+ if (!bitmap->mddev->sync_super) {
+ nodes = le32_to_cpu(sb->nodes);
+ strlcpy(bitmap->mddev->bitmap_info.cluster_name,
+ sb->cluster_name, 64);
+ }
/* verify that the bitmap-specific fields are valid */
if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
diff --git a/kernel/drivers/md/dm-cache-policy-cleaner.c b/kernel/drivers/md/dm-cache-policy-cleaner.c
index b04d1f904..004e463c9 100644
--- a/kernel/drivers/md/dm-cache-policy-cleaner.c
+++ b/kernel/drivers/md/dm-cache-policy-cleaner.c
@@ -171,7 +171,8 @@ static void remove_cache_hash_entry(struct wb_cache_entry *e)
/* Public interface (see dm-cache-policy.h */
static int wb_map(struct dm_cache_policy *pe, dm_oblock_t oblock,
bool can_block, bool can_migrate, bool discarded_oblock,
- struct bio *bio, struct policy_result *result)
+ struct bio *bio, struct policy_locker *locker,
+ struct policy_result *result)
{
struct policy *p = to_policy(pe);
struct wb_cache_entry *e;
diff --git a/kernel/drivers/md/dm-cache-policy-internal.h b/kernel/drivers/md/dm-cache-policy-internal.h
index 2256a1f24..c198e6def 100644
--- a/kernel/drivers/md/dm-cache-policy-internal.h
+++ b/kernel/drivers/md/dm-cache-policy-internal.h
@@ -16,9 +16,10 @@
*/
static inline int policy_map(struct dm_cache_policy *p, dm_oblock_t oblock,
bool can_block, bool can_migrate, bool discarded_oblock,
- struct bio *bio, struct policy_result *result)
+ struct bio *bio, struct policy_locker *locker,
+ struct policy_result *result)
{
- return p->map(p, oblock, can_block, can_migrate, discarded_oblock, bio, result);
+ return p->map(p, oblock, can_block, can_migrate, discarded_oblock, bio, locker, result);
}
static inline int policy_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock)
diff --git a/kernel/drivers/md/dm-cache-policy-mq.c b/kernel/drivers/md/dm-cache-policy-mq.c
index 3ddd11623..515d44bf2 100644
--- a/kernel/drivers/md/dm-cache-policy-mq.c
+++ b/kernel/drivers/md/dm-cache-policy-mq.c
@@ -693,9 +693,10 @@ static void requeue(struct mq_policy *mq, struct entry *e)
* - set the hit count to a hard coded value other than 1, eg, is it better
* if it goes in at level 2?
*/
-static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock)
+static int demote_cblock(struct mq_policy *mq,
+ struct policy_locker *locker, dm_oblock_t *oblock)
{
- struct entry *demoted = pop(mq, &mq->cache_clean);
+ struct entry *demoted = peek(&mq->cache_clean);
if (!demoted)
/*
@@ -707,6 +708,13 @@ static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock)
*/
return -ENOSPC;
+ if (locker->fn(locker, demoted->oblock))
+ /*
+ * We couldn't lock the demoted block.
+ */
+ return -EBUSY;
+
+ del(mq, demoted);
*oblock = demoted->oblock;
free_entry(&mq->cache_pool, demoted);
@@ -795,6 +803,7 @@ static int cache_entry_found(struct mq_policy *mq,
* finding which cache block to use.
*/
static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e,
+ struct policy_locker *locker,
struct policy_result *result)
{
int r;
@@ -803,11 +812,12 @@ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e,
/* Ensure there's a free cblock in the cache */
if (epool_empty(&mq->cache_pool)) {
result->op = POLICY_REPLACE;
- r = demote_cblock(mq, &result->old_oblock);
+ r = demote_cblock(mq, locker, &result->old_oblock);
if (r) {
result->op = POLICY_MISS;
return 0;
}
+
} else
result->op = POLICY_NEW;
@@ -829,7 +839,8 @@ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e,
static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e,
bool can_migrate, bool discarded_oblock,
- int data_dir, struct policy_result *result)
+ int data_dir, struct policy_locker *locker,
+ struct policy_result *result)
{
int r = 0;
@@ -842,7 +853,7 @@ static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e,
else {
requeue(mq, e);
- r = pre_cache_to_cache(mq, e, result);
+ r = pre_cache_to_cache(mq, e, locker, result);
}
return r;
@@ -872,6 +883,7 @@ static void insert_in_pre_cache(struct mq_policy *mq,
}
static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock,
+ struct policy_locker *locker,
struct policy_result *result)
{
int r;
@@ -879,7 +891,7 @@ static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock,
if (epool_empty(&mq->cache_pool)) {
result->op = POLICY_REPLACE;
- r = demote_cblock(mq, &result->old_oblock);
+ r = demote_cblock(mq, locker, &result->old_oblock);
if (unlikely(r)) {
result->op = POLICY_MISS;
insert_in_pre_cache(mq, oblock);
@@ -907,11 +919,12 @@ static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock,
static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock,
bool can_migrate, bool discarded_oblock,
- int data_dir, struct policy_result *result)
+ int data_dir, struct policy_locker *locker,
+ struct policy_result *result)
{
if (adjusted_promote_threshold(mq, discarded_oblock, data_dir) <= 1) {
if (can_migrate)
- insert_in_cache(mq, oblock, result);
+ insert_in_cache(mq, oblock, locker, result);
else
return -EWOULDBLOCK;
} else {
@@ -928,7 +941,8 @@ static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock,
*/
static int map(struct mq_policy *mq, dm_oblock_t oblock,
bool can_migrate, bool discarded_oblock,
- int data_dir, struct policy_result *result)
+ int data_dir, struct policy_locker *locker,
+ struct policy_result *result)
{
int r = 0;
struct entry *e = hash_lookup(mq, oblock);
@@ -942,11 +956,11 @@ static int map(struct mq_policy *mq, dm_oblock_t oblock,
else if (e)
r = pre_cache_entry_found(mq, e, can_migrate, discarded_oblock,
- data_dir, result);
+ data_dir, locker, result);
else
r = no_entry_found(mq, oblock, can_migrate, discarded_oblock,
- data_dir, result);
+ data_dir, locker, result);
if (r == -EWOULDBLOCK)
result->op = POLICY_MISS;
@@ -1012,7 +1026,8 @@ static void copy_tick(struct mq_policy *mq)
static int mq_map(struct dm_cache_policy *p, dm_oblock_t oblock,
bool can_block, bool can_migrate, bool discarded_oblock,
- struct bio *bio, struct policy_result *result)
+ struct bio *bio, struct policy_locker *locker,
+ struct policy_result *result)
{
int r;
struct mq_policy *mq = to_mq_policy(p);
@@ -1028,7 +1043,7 @@ static int mq_map(struct dm_cache_policy *p, dm_oblock_t oblock,
iot_examine_bio(&mq->tracker, bio);
r = map(mq, oblock, can_migrate, discarded_oblock,
- bio_data_dir(bio), result);
+ bio_data_dir(bio), locker, result);
mutex_unlock(&mq->lock);
diff --git a/kernel/drivers/md/dm-cache-policy.h b/kernel/drivers/md/dm-cache-policy.h
index f50fe360c..5524e21e4 100644
--- a/kernel/drivers/md/dm-cache-policy.h
+++ b/kernel/drivers/md/dm-cache-policy.h
@@ -70,6 +70,18 @@ enum policy_operation {
};
/*
+ * When issuing a POLICY_REPLACE the policy needs to make a callback to
+ * lock the block being demoted. This doesn't need to occur during a
+ * writeback operation since the block remains in the cache.
+ */
+struct policy_locker;
+typedef int (*policy_lock_fn)(struct policy_locker *l, dm_oblock_t oblock);
+
+struct policy_locker {
+ policy_lock_fn fn;
+};
+
+/*
* This is the instruction passed back to the core target.
*/
struct policy_result {
@@ -122,7 +134,8 @@ struct dm_cache_policy {
*/
int (*map)(struct dm_cache_policy *p, dm_oblock_t oblock,
bool can_block, bool can_migrate, bool discarded_oblock,
- struct bio *bio, struct policy_result *result);
+ struct bio *bio, struct policy_locker *locker,
+ struct policy_result *result);
/*
* Sometimes we want to see if a block is in the cache, without
diff --git a/kernel/drivers/md/dm-cache-target.c b/kernel/drivers/md/dm-cache-target.c
index 7755af351..e049becaa 100644
--- a/kernel/drivers/md/dm-cache-target.c
+++ b/kernel/drivers/md/dm-cache-target.c
@@ -1445,16 +1445,43 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio)
&cache->stats.read_miss : &cache->stats.write_miss);
}
+/*----------------------------------------------------------------*/
+
+struct old_oblock_lock {
+ struct policy_locker locker;
+ struct cache *cache;
+ struct prealloc *structs;
+ struct dm_bio_prison_cell *cell;
+};
+
+static int null_locker(struct policy_locker *locker, dm_oblock_t b)
+{
+ /* This should never be called */
+ BUG();
+ return 0;
+}
+
+static int cell_locker(struct policy_locker *locker, dm_oblock_t b)
+{
+ struct old_oblock_lock *l = container_of(locker, struct old_oblock_lock, locker);
+ struct dm_bio_prison_cell *cell_prealloc = prealloc_get_cell(l->structs);
+
+ return bio_detain(l->cache, b, NULL, cell_prealloc,
+ (cell_free_fn) prealloc_put_cell,
+ l->structs, &l->cell);
+}
+
static void process_bio(struct cache *cache, struct prealloc *structs,
struct bio *bio)
{
int r;
bool release_cell = true;
dm_oblock_t block = get_bio_block(cache, bio);
- struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell;
+ struct dm_bio_prison_cell *cell_prealloc, *new_ocell;
struct policy_result lookup_result;
bool passthrough = passthrough_mode(&cache->features);
bool discarded_block, can_migrate;
+ struct old_oblock_lock ool;
/*
* Check to see if that block is currently migrating.
@@ -1469,8 +1496,12 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
discarded_block = is_discarded_oblock(cache, block);
can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache));
+ ool.locker.fn = cell_locker;
+ ool.cache = cache;
+ ool.structs = structs;
+ ool.cell = NULL;
r = policy_map(cache->policy, block, true, can_migrate, discarded_block,
- bio, &lookup_result);
+ bio, &ool.locker, &lookup_result);
if (r == -EWOULDBLOCK)
/* migration has been denied */
@@ -1527,27 +1558,11 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
break;
case POLICY_REPLACE:
- cell_prealloc = prealloc_get_cell(structs);
- r = bio_detain(cache, lookup_result.old_oblock, bio, cell_prealloc,
- (cell_free_fn) prealloc_put_cell,
- structs, &old_ocell);
- if (r > 0) {
- /*
- * We have to be careful to avoid lock inversion of
- * the cells. So we back off, and wait for the
- * old_ocell to become free.
- */
- policy_force_mapping(cache->policy, block,
- lookup_result.old_oblock);
- atomic_inc(&cache->stats.cache_cell_clash);
- break;
- }
atomic_inc(&cache->stats.demotion);
atomic_inc(&cache->stats.promotion);
-
demote_then_promote(cache, structs, lookup_result.old_oblock,
block, lookup_result.cblock,
- old_ocell, new_ocell);
+ ool.cell, new_ocell);
release_cell = false;
break;
@@ -2595,6 +2610,9 @@ static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_priso
bool discarded_block;
struct policy_result lookup_result;
struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size);
+ struct old_oblock_lock ool;
+
+ ool.locker.fn = null_locker;
if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) {
/*
@@ -2633,7 +2651,7 @@ static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_priso
discarded_block = is_discarded_oblock(cache, block);
r = policy_map(cache->policy, block, false, can_migrate, discarded_block,
- bio, &lookup_result);
+ bio, &ool.locker, &lookup_result);
if (r == -EWOULDBLOCK) {
cell_defer(cache, *cell, true);
return DM_MAPIO_SUBMITTED;
diff --git a/kernel/drivers/md/dm-stats.c b/kernel/drivers/md/dm-stats.c
index f478a4c96..419bdd4fc 100644
--- a/kernel/drivers/md/dm-stats.c
+++ b/kernel/drivers/md/dm-stats.c
@@ -795,6 +795,8 @@ static int message_stats_create(struct mapped_device *md,
return -EINVAL;
if (sscanf(argv[2], "/%u%c", &divisor, &dummy) == 1) {
+ if (!divisor)
+ return -EINVAL;
step = end - start;
if (do_div(step, divisor))
step++;
diff --git a/kernel/drivers/md/dm-thin-metadata.c b/kernel/drivers/md/dm-thin-metadata.c
index 79f694120..cde1d6749 100644
--- a/kernel/drivers/md/dm-thin-metadata.c
+++ b/kernel/drivers/md/dm-thin-metadata.c
@@ -1295,8 +1295,8 @@ static int __release_metadata_snap(struct dm_pool_metadata *pmd)
return r;
disk_super = dm_block_data(copy);
- dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->data_mapping_root));
- dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->device_details_root));
+ dm_btree_del(&pmd->info, le64_to_cpu(disk_super->data_mapping_root));
+ dm_btree_del(&pmd->details_info, le64_to_cpu(disk_super->device_details_root));
dm_sm_dec_block(pmd->metadata_sm, held_root);
return dm_tm_unlock(pmd->tm, copy);
diff --git a/kernel/drivers/md/dm-thin.c b/kernel/drivers/md/dm-thin.c
index 921aafd12..e22e6c892 100644
--- a/kernel/drivers/md/dm-thin.c
+++ b/kernel/drivers/md/dm-thin.c
@@ -18,6 +18,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/slab.h>
+#include <linux/vmalloc.h>
#include <linux/sort.h>
#include <linux/rbtree.h>
@@ -260,7 +261,7 @@ struct pool {
process_mapping_fn process_prepared_mapping;
process_mapping_fn process_prepared_discard;
- struct dm_bio_prison_cell *cell_sort_array[CELL_SORT_ARRAY_SIZE];
+ struct dm_bio_prison_cell **cell_sort_array;
};
static enum pool_mode get_pool_mode(struct pool *pool);
@@ -2499,6 +2500,7 @@ static void __pool_destroy(struct pool *pool)
{
__pool_table_remove(pool);
+ vfree(pool->cell_sort_array);
if (dm_pool_metadata_close(pool->pmd) < 0)
DMWARN("%s: dm_pool_metadata_close() failed.", __func__);
@@ -2611,6 +2613,13 @@ static struct pool *pool_create(struct mapped_device *pool_md,
goto bad_mapping_pool;
}
+ pool->cell_sort_array = vmalloc(sizeof(*pool->cell_sort_array) * CELL_SORT_ARRAY_SIZE);
+ if (!pool->cell_sort_array) {
+ *error = "Error allocating cell sort array";
+ err_p = ERR_PTR(-ENOMEM);
+ goto bad_sort_array;
+ }
+
pool->ref_count = 1;
pool->last_commit_jiffies = jiffies;
pool->pool_md = pool_md;
@@ -2619,6 +2628,8 @@ static struct pool *pool_create(struct mapped_device *pool_md,
return pool;
+bad_sort_array:
+ mempool_destroy(pool->mapping_pool);
bad_mapping_pool:
dm_deferred_set_destroy(pool->all_io_ds);
bad_all_io_ds:
diff --git a/kernel/drivers/md/dm.c b/kernel/drivers/md/dm.c
index 927523984..98347e2cf 100644
--- a/kernel/drivers/md/dm.c
+++ b/kernel/drivers/md/dm.c
@@ -1053,13 +1053,10 @@ static struct dm_rq_target_io *tio_from_request(struct request *rq)
*/
static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
{
- int nr_requests_pending;
-
atomic_dec(&md->pending[rw]);
/* nudge anyone waiting on suspend queue */
- nr_requests_pending = md_in_flight(md);
- if (!nr_requests_pending)
+ if (!md_in_flight(md))
wake_up(&md->wait);
/*
@@ -1071,8 +1068,7 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
if (run_queue) {
if (md->queue->mq_ops)
blk_mq_run_hw_queues(md->queue, true);
- else if (!nr_requests_pending ||
- (nr_requests_pending >= md->queue->nr_congestion_on))
+ else
blk_run_queue_async(md->queue);
}
@@ -1723,7 +1719,8 @@ static int dm_merge_bvec(struct request_queue *q,
struct mapped_device *md = q->queuedata;
struct dm_table *map = dm_get_live_table_fast(md);
struct dm_target *ti;
- sector_t max_sectors, max_size = 0;
+ sector_t max_sectors;
+ int max_size = 0;
if (unlikely(!map))
goto out;
@@ -1736,18 +1733,10 @@ static int dm_merge_bvec(struct request_queue *q,
* Find maximum amount of I/O that won't need splitting
*/
max_sectors = min(max_io_len(bvm->bi_sector, ti),
- (sector_t) queue_max_sectors(q));
+ (sector_t) BIO_MAX_SECTORS);
max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size;
-
- /*
- * FIXME: this stop-gap fix _must_ be cleaned up (by passing a sector_t
- * to the targets' merge function since it holds sectors not bytes).
- * Just doing this as an interim fix for stable@ because the more
- * comprehensive cleanup of switching to sector_t will impact every
- * DM target that implements a ->merge hook.
- */
- if (max_size > INT_MAX)
- max_size = INT_MAX;
+ if (max_size < 0)
+ max_size = 0;
/*
* merge_bvec_fn() returns number of bytes
@@ -1755,13 +1744,13 @@ static int dm_merge_bvec(struct request_queue *q,
* max is precomputed maximal io size
*/
if (max_size && ti->type->merge)
- max_size = ti->type->merge(ti, bvm, biovec, (int) max_size);
+ max_size = ti->type->merge(ti, bvm, biovec, max_size);
/*
* If the target doesn't support merge method and some of the devices
- * provided their merge_bvec method (we know this by looking for the
- * max_hw_sectors that dm_set_device_limits may set), then we can't
- * allow bios with multiple vector entries. So always set max_size
- * to 0, and the code below allows just one page.
+ * provided their merge_bvec method (we know this by looking at
+ * queue_max_hw_sectors), then we can't allow bios with multiple vector
+ * entries. So always set max_size to 0, and the code below allows
+ * just one page.
*/
else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9)
max_size = 0;
diff --git a/kernel/drivers/md/md.c b/kernel/drivers/md/md.c
index 4dbed4a67..e4621511d 100644
--- a/kernel/drivers/md/md.c
+++ b/kernel/drivers/md/md.c
@@ -4005,8 +4005,10 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len)
else
rdev = md_import_device(dev, -1, -1);
- if (IS_ERR(rdev))
+ if (IS_ERR(rdev)) {
+ mddev_unlock(mddev);
return PTR_ERR(rdev);
+ }
err = bind_rdev_to_array(rdev, mddev);
out:
if (err)
@@ -5159,6 +5161,7 @@ int md_run(struct mddev *mddev)
mddev_detach(mddev);
if (mddev->private)
pers->free(mddev, mddev->private);
+ mddev->private = NULL;
module_put(pers->owner);
bitmap_destroy(mddev);
return err;
@@ -5294,6 +5297,7 @@ static void md_clean(struct mddev *mddev)
mddev->changed = 0;
mddev->degraded = 0;
mddev->safemode = 0;
+ mddev->private = NULL;
mddev->merge_check_needed = 0;
mddev->bitmap_info.offset = 0;
mddev->bitmap_info.default_offset = 0;
@@ -5366,6 +5370,7 @@ static void __md_stop(struct mddev *mddev)
mddev->pers = NULL;
spin_unlock(&mddev->lock);
pers->free(mddev, mddev->private);
+ mddev->private = NULL;
if (pers->sync_request && mddev->to_remove == NULL)
mddev->to_remove = &md_redundancy_group;
module_put(pers->owner);
@@ -5735,7 +5740,7 @@ static int get_bitmap_file(struct mddev *mddev, void __user * arg)
char *ptr;
int err;
- file = kmalloc(sizeof(*file), GFP_NOIO);
+ file = kzalloc(sizeof(*file), GFP_NOIO);
if (!file)
return -ENOMEM;
@@ -6375,7 +6380,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
mddev->ctime != info->ctime ||
mddev->level != info->level ||
/* mddev->layout != info->layout || */
- !mddev->persistent != info->not_persistent||
+ mddev->persistent != !info->not_persistent ||
mddev->chunk_sectors != info->chunk_size >> 9 ||
/* ignore bottom 8 bits of state, and allow SB_BITMAP_PRESENT to change */
((state^info->state) & 0xfffffe00)
diff --git a/kernel/drivers/md/persistent-data/dm-btree-remove.c b/kernel/drivers/md/persistent-data/dm-btree-remove.c
index b88757cd0..a03178e91 100644
--- a/kernel/drivers/md/persistent-data/dm-btree-remove.c
+++ b/kernel/drivers/md/persistent-data/dm-btree-remove.c
@@ -309,8 +309,8 @@ static void redistribute3(struct dm_btree_info *info, struct btree_node *parent,
if (s < 0 && nr_center < -s) {
/* not enough in central node */
- shift(left, center, nr_center);
- s = nr_center - target;
+ shift(left, center, -nr_center);
+ s += nr_center;
shift(left, right, s);
nr_right += s;
} else
@@ -323,7 +323,7 @@ static void redistribute3(struct dm_btree_info *info, struct btree_node *parent,
if (s > 0 && nr_center < s) {
/* not enough in central node */
shift(center, right, nr_center);
- s = target - nr_center;
+ s -= nr_center;
shift(left, right, s);
nr_left -= s;
} else
diff --git a/kernel/drivers/md/persistent-data/dm-btree.c b/kernel/drivers/md/persistent-data/dm-btree.c
index 200ac12a1..fdd3793e2 100644
--- a/kernel/drivers/md/persistent-data/dm-btree.c
+++ b/kernel/drivers/md/persistent-data/dm-btree.c
@@ -255,7 +255,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
int r;
struct del_stack *s;
- s = kmalloc(sizeof(*s), GFP_KERNEL);
+ s = kmalloc(sizeof(*s), GFP_NOIO);
if (!s)
return -ENOMEM;
s->info = info;
diff --git a/kernel/drivers/md/persistent-data/dm-space-map-metadata.c b/kernel/drivers/md/persistent-data/dm-space-map-metadata.c
index e8a904298..53091295f 100644
--- a/kernel/drivers/md/persistent-data/dm-space-map-metadata.c
+++ b/kernel/drivers/md/persistent-data/dm-space-map-metadata.c
@@ -204,6 +204,27 @@ static void in(struct sm_metadata *smm)
smm->recursion_count++;
}
+static int apply_bops(struct sm_metadata *smm)
+{
+ int r = 0;
+
+ while (!brb_empty(&smm->uncommitted)) {
+ struct block_op bop;
+
+ r = brb_pop(&smm->uncommitted, &bop);
+ if (r) {
+ DMERR("bug in bop ring buffer");
+ break;
+ }
+
+ r = commit_bop(smm, &bop);
+ if (r)
+ break;
+ }
+
+ return r;
+}
+
static int out(struct sm_metadata *smm)
{
int r = 0;
@@ -216,21 +237,8 @@ static int out(struct sm_metadata *smm)
return -ENOMEM;
}
- if (smm->recursion_count == 1) {
- while (!brb_empty(&smm->uncommitted)) {
- struct block_op bop;
-
- r = brb_pop(&smm->uncommitted, &bop);
- if (r) {
- DMERR("bug in bop ring buffer");
- break;
- }
-
- r = commit_bop(smm, &bop);
- if (r)
- break;
- }
- }
+ if (smm->recursion_count == 1)
+ apply_bops(smm);
smm->recursion_count--;
@@ -704,6 +712,12 @@ static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
}
old_len = smm->begin;
+ r = apply_bops(smm);
+ if (r) {
+ DMERR("%s: apply_bops failed", __func__);
+ goto out;
+ }
+
r = sm_ll_commit(&smm->ll);
if (r)
goto out;
@@ -773,6 +787,12 @@ int dm_sm_metadata_create(struct dm_space_map *sm,
if (r)
return r;
+ r = apply_bops(smm);
+ if (r) {
+ DMERR("%s: apply_bops failed", __func__);
+ return r;
+ }
+
return sm_metadata_commit(sm);
}
diff --git a/kernel/drivers/md/raid1.c b/kernel/drivers/md/raid1.c
index 9157a29c8..5ce3cd5c4 100644
--- a/kernel/drivers/md/raid1.c
+++ b/kernel/drivers/md/raid1.c
@@ -336,7 +336,7 @@ static void raid1_end_read_request(struct bio *bio, int error)
spin_lock_irqsave(&conf->device_lock, flags);
if (r1_bio->mddev->degraded == conf->raid_disks ||
(r1_bio->mddev->degraded == conf->raid_disks-1 &&
- !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags)))
+ test_bit(In_sync, &conf->mirrors[mirror].rdev->flags)))
uptodate = 1;
spin_unlock_irqrestore(&conf->device_lock, flags);
}
@@ -1475,6 +1475,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
{
char b[BDEVNAME_SIZE];
struct r1conf *conf = mddev->private;
+ unsigned long flags;
/*
* If it is not operational, then we have already marked it as dead
@@ -1494,14 +1495,13 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
return;
}
set_bit(Blocked, &rdev->flags);
+ spin_lock_irqsave(&conf->device_lock, flags);
if (test_and_clear_bit(In_sync, &rdev->flags)) {
- unsigned long flags;
- spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded++;
set_bit(Faulty, &rdev->flags);
- spin_unlock_irqrestore(&conf->device_lock, flags);
} else
set_bit(Faulty, &rdev->flags);
+ spin_unlock_irqrestore(&conf->device_lock, flags);
/*
* if recovery is running, make sure it aborts.
*/
@@ -1567,7 +1567,10 @@ static int raid1_spare_active(struct mddev *mddev)
* Find all failed disks within the RAID1 configuration
* and mark them readable.
* Called under mddev lock, so rcu protection not needed.
+ * device_lock used to avoid races with raid1_end_read_request
+ * which expects 'In_sync' flags and ->degraded to be consistent.
*/
+ spin_lock_irqsave(&conf->device_lock, flags);
for (i = 0; i < conf->raid_disks; i++) {
struct md_rdev *rdev = conf->mirrors[i].rdev;
struct md_rdev *repl = conf->mirrors[conf->raid_disks + i].rdev;
@@ -1598,7 +1601,6 @@ static int raid1_spare_active(struct mddev *mddev)
sysfs_notify_dirent_safe(rdev->sysfs_state);
}
}
- spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded -= count;
spin_unlock_irqrestore(&conf->device_lock, flags);