From e09b41010ba33a20a87472ee821fa407a5b8da36 Mon Sep 17 00:00:00 2001 From: José Pekkarinen Date: Mon, 11 Apr 2016 10:41:07 +0300 Subject: These changes are the raw update to linux-4.4.6-rt14. Kernel sources are taken from kernel.org, and rt patch from the rt wiki download page. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During the rebasing, the following patch collided: Force tick interrupt and get rid of softirq magic(I70131fb85). Collisions have been removed because its logic was found on the source already. Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769 Signed-off-by: José Pekkarinen --- kernel/fs/btrfs/volumes.c | 892 ++++++++++++++++++++++++++++++---------------- 1 file changed, 578 insertions(+), 314 deletions(-) (limited to 'kernel/fs/btrfs/volumes.c') diff --git a/kernel/fs/btrfs/volumes.c b/kernel/fs/btrfs/volumes.c index 174f5e1e0..9c62a6f97 100644 --- a/kernel/fs/btrfs/volumes.c +++ b/kernel/fs/btrfs/volumes.c @@ -42,6 +42,82 @@ #include "dev-replace.h" #include "sysfs.h" +const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { + [BTRFS_RAID_RAID10] = { + .sub_stripes = 2, + .dev_stripes = 1, + .devs_max = 0, /* 0 == as many as possible */ + .devs_min = 4, + .tolerated_failures = 1, + .devs_increment = 2, + .ncopies = 2, + }, + [BTRFS_RAID_RAID1] = { + .sub_stripes = 1, + .dev_stripes = 1, + .devs_max = 2, + .devs_min = 2, + .tolerated_failures = 1, + .devs_increment = 2, + .ncopies = 2, + }, + [BTRFS_RAID_DUP] = { + .sub_stripes = 1, + .dev_stripes = 2, + .devs_max = 1, + .devs_min = 1, + .tolerated_failures = 0, + .devs_increment = 1, + .ncopies = 2, + }, + [BTRFS_RAID_RAID0] = { + .sub_stripes = 1, + .dev_stripes = 1, + .devs_max = 0, + .devs_min = 2, + .tolerated_failures = 0, + .devs_increment = 1, + .ncopies = 1, + }, + [BTRFS_RAID_SINGLE] = { + .sub_stripes = 1, + .dev_stripes = 1, + .devs_max = 1, + .devs_min = 1, + .tolerated_failures = 0, + .devs_increment = 1, + .ncopies = 1, + }, + [BTRFS_RAID_RAID5] = { + .sub_stripes = 1, + .dev_stripes = 1, + .devs_max = 0, + .devs_min = 2, + .tolerated_failures = 1, + .devs_increment = 1, + .ncopies = 2, + }, + [BTRFS_RAID_RAID6] = { + .sub_stripes = 1, + .dev_stripes = 1, + .devs_max = 0, + .devs_min = 3, + .tolerated_failures = 2, + .devs_increment = 1, + .ncopies = 3, + }, +}; + +const u64 const btrfs_raid_group[BTRFS_NR_RAID_TYPES] = { + [BTRFS_RAID_RAID10] = BTRFS_BLOCK_GROUP_RAID10, + [BTRFS_RAID_RAID1] = BTRFS_BLOCK_GROUP_RAID1, + [BTRFS_RAID_DUP] = BTRFS_BLOCK_GROUP_DUP, + [BTRFS_RAID_RAID0] = BTRFS_BLOCK_GROUP_RAID0, + [BTRFS_RAID_SINGLE] = 0, + [BTRFS_RAID_RAID5] = BTRFS_BLOCK_GROUP_RAID5, + [BTRFS_RAID_RAID6] = BTRFS_BLOCK_GROUP_RAID6, +}; + static int init_first_rw_device(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_device *device); @@ -52,6 +128,10 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *device); DEFINE_MUTEX(uuid_mutex); static LIST_HEAD(fs_uuids); +struct list_head *btrfs_get_fs_uuids(void) +{ + return &fs_uuids; +} static struct btrfs_fs_devices *__alloc_fs_devices(void) { @@ -152,8 +232,9 @@ static struct btrfs_device *__alloc_device(void) spin_lock_init(&dev->reada_lock); atomic_set(&dev->reada_in_flight, 0); atomic_set(&dev->dev_stats_ccnt, 0); - INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT); - INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT); + btrfs_device_data_ordered_init(dev); + INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); + INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); return dev; } @@ -194,7 +275,6 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder, if (IS_ERR(*bdev)) { ret = PTR_ERR(*bdev); - printk(KERN_INFO "BTRFS: open %s failed\n", device_path); goto error; } @@ -207,8 +287,8 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder, } invalidate_bdev(*bdev); *bh = btrfs_read_dev_super(*bdev); - if (!*bh) { - ret = -EINVAL; + if (IS_ERR(*bh)) { + ret = PTR_ERR(*bh); blkdev_put(*bdev, flags); goto error; } @@ -341,11 +421,14 @@ loop_lock: pending = pending->bi_next; cur->bi_next = NULL; + /* + * atomic_dec_return implies a barrier for waitqueue_active + */ if (atomic_dec_return(&fs_info->nr_async_bios) < limit && waitqueue_active(&fs_info->async_submit_wait)) wake_up(&fs_info->async_submit_wait); - BUG_ON(atomic_read(&cur->bi_cnt) == 0); + BUG_ON(atomic_read(&cur->__bi_cnt) == 0); /* * if we're doing the sync list, record that our @@ -441,6 +524,61 @@ static void pending_bios_fn(struct btrfs_work *work) run_scheduled_bios(device); } + +void btrfs_free_stale_device(struct btrfs_device *cur_dev) +{ + struct btrfs_fs_devices *fs_devs; + struct btrfs_device *dev; + + if (!cur_dev->name) + return; + + list_for_each_entry(fs_devs, &fs_uuids, list) { + int del = 1; + + if (fs_devs->opened) + continue; + if (fs_devs->seeding) + continue; + + list_for_each_entry(dev, &fs_devs->devices, dev_list) { + + if (dev == cur_dev) + continue; + if (!dev->name) + continue; + + /* + * Todo: This won't be enough. What if the same device + * comes back (with new uuid and) with its mapper path? + * But for now, this does help as mostly an admin will + * either use mapper or non mapper path throughout. + */ + rcu_read_lock(); + del = strcmp(rcu_str_deref(dev->name), + rcu_str_deref(cur_dev->name)); + rcu_read_unlock(); + if (!del) + break; + } + + if (!del) { + /* delete the stale device */ + if (fs_devs->num_devices == 1) { + btrfs_sysfs_remove_fsid(fs_devs); + list_del(&fs_devs->list); + free_fs_devices(fs_devs); + } else { + fs_devs->num_devices--; + list_del(&dev->dev_list); + rcu_string_free(dev->name); + kfree(dev); + } + break; + } + } +} + /* * Add new device to list of registered devices * @@ -556,6 +694,12 @@ static noinline int device_list_add(const char *path, if (!fs_devices->opened) device->generation = found_transid; + /* + * if there is new btrfs on an already registered device, + * then remove the stale device entry. + */ + btrfs_free_stale_device(device); + *fs_devices_ret = fs_devices; return ret; @@ -693,43 +837,14 @@ static void free_device(struct rcu_head *head) static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) { - struct btrfs_device *device; + struct btrfs_device *device, *tmp; if (--fs_devices->opened > 0) return 0; mutex_lock(&fs_devices->device_list_mutex); - list_for_each_entry(device, &fs_devices->devices, dev_list) { - struct btrfs_device *new_device; - struct rcu_string *name; - - if (device->bdev) - fs_devices->open_devices--; - - if (device->writeable && - device->devid != BTRFS_DEV_REPLACE_DEVID) { - list_del_init(&device->dev_alloc_list); - fs_devices->rw_devices--; - } - - if (device->missing) - fs_devices->missing_devices--; - - new_device = btrfs_alloc_device(NULL, &device->devid, - device->uuid); - BUG_ON(IS_ERR(new_device)); /* -ENOMEM */ - - /* Safe because we are under uuid_mutex */ - if (device->name) { - name = rcu_string_strdup(device->name->str, GFP_NOFS); - BUG_ON(!name); /* -ENOMEM */ - rcu_assign_pointer(new_device->name, name); - } - - list_replace_rcu(&device->dev_list, &new_device->dev_list); - new_device->fs_devices = device->fs_devices; - - call_rcu(&device->rcu, free_device); + list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list) { + btrfs_close_one_device(device); } mutex_unlock(&fs_devices->device_list_mutex); @@ -1051,15 +1166,18 @@ out: return ret; } -static int contains_pending_extent(struct btrfs_trans_handle *trans, +static int contains_pending_extent(struct btrfs_transaction *transaction, struct btrfs_device *device, u64 *start, u64 len) { + struct btrfs_fs_info *fs_info = device->dev_root->fs_info; struct extent_map *em; - struct list_head *search_list = &trans->transaction->pending_chunks; + struct list_head *search_list = &fs_info->pinned_chunks; int ret = 0; u64 physical_start = *start; + if (transaction) + search_list = &transaction->pending_chunks; again: list_for_each_entry(em, search_list, list) { struct map_lookup *map; @@ -1067,19 +1185,35 @@ again: map = (struct map_lookup *)em->bdev; for (i = 0; i < map->num_stripes; i++) { + u64 end; + if (map->stripes[i].dev != device) continue; if (map->stripes[i].physical >= physical_start + len || map->stripes[i].physical + em->orig_block_len <= physical_start) continue; - *start = map->stripes[i].physical + - em->orig_block_len; - ret = 1; + /* + * Make sure that while processing the pinned list we do + * not override our *start with a lower value, because + * we can have pinned chunks that fall within this + * device hole and that have lower physical addresses + * than the pending chunks we processed before. If we + * do not take this special care we can end up getting + * 2 pending chunks that start at the same physical + * device offsets because the end offset of a pinned + * chunk can be equal to the start offset of some + * pending chunk. + */ + end = map->stripes[i].physical + em->orig_block_len; + if (end > *start) { + *start = end; + ret = 1; + } } } - if (search_list == &trans->transaction->pending_chunks) { - search_list = &trans->root->fs_info->pinned_chunks; + if (search_list != &fs_info->pinned_chunks) { + search_list = &fs_info->pinned_chunks; goto again; } @@ -1088,12 +1222,13 @@ again: /* - * find_free_dev_extent - find free space in the specified device - * @device: the device which we search the free space in - * @num_bytes: the size of the free space that we need - * @start: store the start of the free space. - * @len: the size of the free space. that we find, or the size of the max - * free space if we don't find suitable free space + * find_free_dev_extent_start - find free space in the specified device + * @device: the device which we search the free space in + * @num_bytes: the size of the free space that we need + * @search_start: the position from which to begin the search + * @start: store the start of the free space. + * @len: the size of the free space. that we find, or the size + * of the max free space if we don't find suitable free space * * this uses a pretty simple search, the expectation is that it is * called very infrequently and that a given device has a small number @@ -1107,9 +1242,9 @@ again: * But if we don't find suitable free space, it is used to store the size of * the max free space. */ -int find_free_dev_extent(struct btrfs_trans_handle *trans, - struct btrfs_device *device, u64 num_bytes, - u64 *start, u64 *len) +int find_free_dev_extent_start(struct btrfs_transaction *transaction, + struct btrfs_device *device, u64 num_bytes, + u64 search_start, u64 *start, u64 *len) { struct btrfs_key key; struct btrfs_root *root = device->dev_root; @@ -1119,18 +1254,19 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, u64 max_hole_start; u64 max_hole_size; u64 extent_end; - u64 search_start; u64 search_end = device->total_bytes; int ret; int slot; struct extent_buffer *l; + u64 min_search_start; - /* FIXME use last free of some kind */ - - /* we don't want to overwrite the superblock on the drive, - * so we make sure to start at an offset of at least 1MB + /* + * We don't want to overwrite the superblock on the drive nor any area + * used by the boot loader (grub for example), so we make sure to start + * at an offset of at least 1MB. */ - search_start = max(root->fs_info->alloc_start, 1024ull * 1024); + min_search_start = max(root->fs_info->alloc_start, 1024ull * 1024); + search_start = max(search_start, min_search_start); path = btrfs_alloc_path(); if (!path) @@ -1192,7 +1328,7 @@ again: * Have to check before we set max_hole_start, otherwise * we could end up sending back this offset anyway. */ - if (contains_pending_extent(trans, device, + if (contains_pending_extent(transaction, device, &search_start, hole_size)) { if (key.offset >= search_start) { @@ -1241,7 +1377,7 @@ next: if (search_end > search_start) { hole_size = search_end - search_start; - if (contains_pending_extent(trans, device, &search_start, + if (contains_pending_extent(transaction, device, &search_start, hole_size)) { btrfs_release_path(path); goto again; @@ -1267,6 +1403,15 @@ out: return ret; } +int find_free_dev_extent(struct btrfs_trans_handle *trans, + struct btrfs_device *device, u64 num_bytes, + u64 *start, u64 *len) +{ + /* FIXME use last free of some kind */ + return find_free_dev_extent_start(trans->transaction, device, + num_bytes, 0, start, len); +} + static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 start, u64 *dev_extent_len) @@ -1307,7 +1452,7 @@ again: extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_extent); } else { - btrfs_error(root->fs_info, ret, "Slot search failed"); + btrfs_std_error(root->fs_info, ret, "Slot search failed"); goto out; } @@ -1315,10 +1460,10 @@ again: ret = btrfs_del_item(trans, root, path); if (ret) { - btrfs_error(root->fs_info, ret, + btrfs_std_error(root->fs_info, ret, "Failed to remove dev extent item"); } else { - trans->transaction->have_free_bgs = 1; + set_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags); } out: btrfs_free_path(path); @@ -1706,7 +1851,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) if (device->bdev) { device->fs_devices->open_devices--; /* remove sysfs entry */ - btrfs_kobj_rm_device(root->fs_info, device); + btrfs_sysfs_rm_device_link(root->fs_info->fs_devices, device); } call_rcu(&device->rcu, free_device); @@ -1829,7 +1974,7 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info, if (srcdev->writeable) { fs_devices->rw_devices--; /* zero out the old super if it is writable */ - btrfs_scratch_superblock(srcdev); + btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str); } if (srcdev->bdev) @@ -1875,8 +2020,11 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, mutex_lock(&uuid_mutex); WARN_ON(!tgtdev); mutex_lock(&fs_info->fs_devices->device_list_mutex); + + btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev); + if (tgtdev->bdev) { - btrfs_scratch_superblock(tgtdev); + btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str); fs_info->fs_devices->open_devices--; } fs_info->fs_devices->num_devices--; @@ -1943,10 +2091,8 @@ int btrfs_find_device_missing_or_by_path(struct btrfs_root *root, } } - if (!*device) { - btrfs_err(root->fs_info, "no missing device found"); - return -ENOENT; - } + if (!*device) + return BTRFS_ERROR_DEV_MISSING_NOT_FOUND; return 0; } else { @@ -2211,7 +2357,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) tmp + 1); /* add sysfs device entry */ - btrfs_kobj_add_device(root->fs_info, device); + btrfs_sysfs_add_device_link(root->fs_info->fs_devices, device); /* * we've got more storage, clear any full flags on the space @@ -2252,8 +2398,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) */ snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU", root->fs_info->fsid); - if (kobject_rename(&root->fs_info->super_kobj, fsid_buf)) - goto error_trans; + if (kobject_rename(&root->fs_info->fs_devices->fsid_kobj, + fsid_buf)) + btrfs_warn(root->fs_info, + "sysfs: failed to create fsid for sprout"); } root->fs_info->num_tolerated_disk_barrier_failures = @@ -2269,7 +2417,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) ret = btrfs_relocate_sys_chunks(root); if (ret < 0) - btrfs_error(root->fs_info, ret, + btrfs_std_error(root->fs_info, ret, "Failed to relocate sys chunks after " "device initialization. This can be fixed " "using the \"btrfs balance\" command."); @@ -2289,7 +2437,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) error_trans: btrfs_end_transaction(trans, root); rcu_string_free(device->name); - btrfs_kobj_rm_device(root->fs_info, device); + btrfs_sysfs_rm_device_link(root->fs_info->fs_devices, device); kfree(device); error: blkdev_put(bdev, FMODE_EXCL); @@ -2514,7 +2662,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans, if (ret < 0) goto out; else if (ret > 0) { /* Logic error or corruption */ - btrfs_error(root->fs_info, -ENOENT, + btrfs_std_error(root->fs_info, -ENOENT, "Failed lookup while freeing chunk."); ret = -ENOENT; goto out; @@ -2522,7 +2670,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans, ret = btrfs_del_item(trans, root, path); if (ret < 0) - btrfs_error(root->fs_info, ret, + btrfs_std_error(root->fs_info, ret, "Failed to delete chunk item."); out: btrfs_free_path(path); @@ -2609,6 +2757,9 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, return -EINVAL; } map = (struct map_lookup *)em->bdev; + lock_chunks(root->fs_info->chunk_root); + check_system_chunk(trans, extent_root, map->type); + unlock_chunks(root->fs_info->chunk_root); for (i = 0; i < map->num_stripes; i++) { struct btrfs_device *device = map->stripes[i].dev; @@ -2667,9 +2818,7 @@ out: return ret; } -static int btrfs_relocate_chunk(struct btrfs_root *root, - u64 chunk_objectid, - u64 chunk_offset) +static int btrfs_relocate_chunk(struct btrfs_root *root, u64 chunk_offset) { struct btrfs_root *extent_root; struct btrfs_trans_handle *trans; @@ -2678,19 +2827,36 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, root = root->fs_info->chunk_root; extent_root = root->fs_info->extent_root; + /* + * Prevent races with automatic removal of unused block groups. + * After we relocate and before we remove the chunk with offset + * chunk_offset, automatic removal of the block group can kick in, + * resulting in a failure when calling btrfs_remove_chunk() below. + * + * Make sure to acquire this mutex before doing a tree search (dev + * or chunk trees) to find chunks. Otherwise the cleaner kthread might + * call btrfs_remove_chunk() (through btrfs_delete_unused_bgs()) after + * we release the path used to search the chunk/dev tree and before + * the current task acquires this mutex and calls us. + */ + ASSERT(mutex_is_locked(&root->fs_info->delete_unused_bgs_mutex)); + ret = btrfs_can_relocate(extent_root, chunk_offset); if (ret) return -ENOSPC; /* step one, relocate all the extents inside this chunk */ + btrfs_scrub_pause(root); ret = btrfs_relocate_block_group(extent_root, chunk_offset); + btrfs_scrub_continue(root); if (ret) return ret; - trans = btrfs_start_transaction(root, 0); + trans = btrfs_start_trans_remove_block_group(root->fs_info, + chunk_offset); if (IS_ERR(trans)) { ret = PTR_ERR(trans); - btrfs_std_error(root->fs_info, ret); + btrfs_std_error(root->fs_info, ret, NULL); return ret; } @@ -2726,13 +2892,18 @@ again: key.type = BTRFS_CHUNK_ITEM_KEY; while (1) { + mutex_lock(&root->fs_info->delete_unused_bgs_mutex); ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); - if (ret < 0) + if (ret < 0) { + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); goto error; + } BUG_ON(ret == 0); /* Corruption */ ret = btrfs_previous_item(chunk_root, path, key.objectid, key.type); + if (ret) + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); if (ret < 0) goto error; if (ret > 0) @@ -2748,13 +2919,13 @@ again: if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) { ret = btrfs_relocate_chunk(chunk_root, - found_key.objectid, found_key.offset); if (ret == -ENOSPC) failed++; else BUG_ON(ret); } + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); if (found_key.offset == 0) break; @@ -2888,16 +3059,19 @@ static void update_balance_args(struct btrfs_balance_control *bctl) * (albeit full) chunks. */ if (!(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE) && + !(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) && !(bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)) { bctl->data.flags |= BTRFS_BALANCE_ARGS_USAGE; bctl->data.usage = 90; } if (!(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE) && + !(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) && !(bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)) { bctl->sys.flags |= BTRFS_BALANCE_ARGS_USAGE; bctl->sys.usage = 90; } if (!(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE) && + !(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) && !(bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)) { bctl->meta.flags |= BTRFS_BALANCE_ARGS_USAGE; bctl->meta.usage = 90; @@ -2949,8 +3123,41 @@ static int chunk_profiles_filter(u64 chunk_type, return 1; } -static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, +static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, struct btrfs_balance_args *bargs) +{ + struct btrfs_block_group_cache *cache; + u64 chunk_used; + u64 user_thresh_min; + u64 user_thresh_max; + int ret = 1; + + cache = btrfs_lookup_block_group(fs_info, chunk_offset); + chunk_used = btrfs_block_group_used(&cache->item); + + if (bargs->usage_min == 0) + user_thresh_min = 0; + else + user_thresh_min = div_factor_fine(cache->key.offset, + bargs->usage_min); + + if (bargs->usage_max == 0) + user_thresh_max = 1; + else if (bargs->usage_max > 100) + user_thresh_max = cache->key.offset; + else + user_thresh_max = div_factor_fine(cache->key.offset, + bargs->usage_max); + + if (user_thresh_min <= chunk_used && chunk_used < user_thresh_max) + ret = 0; + + btrfs_put_block_group(cache); + return ret; +} + +static int chunk_usage_filter(struct btrfs_fs_info *fs_info, + u64 chunk_offset, struct btrfs_balance_args *bargs) { struct btrfs_block_group_cache *cache; u64 chunk_used, user_thresh; @@ -2959,7 +3166,7 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, cache = btrfs_lookup_block_group(fs_info, chunk_offset); chunk_used = btrfs_block_group_used(&cache->item); - if (bargs->usage == 0) + if (bargs->usage_min == 0) user_thresh = 1; else if (bargs->usage > 100) user_thresh = cache->key.offset; @@ -3049,6 +3256,19 @@ static int chunk_vrange_filter(struct extent_buffer *leaf, return 1; } +static int chunk_stripes_range_filter(struct extent_buffer *leaf, + struct btrfs_chunk *chunk, + struct btrfs_balance_args *bargs) +{ + int num_stripes = btrfs_chunk_num_stripes(leaf, chunk); + + if (bargs->stripes_min <= num_stripes + && num_stripes <= bargs->stripes_max) + return 0; + + return 1; +} + static int chunk_soft_convert_filter(u64 chunk_type, struct btrfs_balance_args *bargs) { @@ -3095,6 +3315,9 @@ static int should_balance_chunk(struct btrfs_root *root, if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE) && chunk_usage_filter(bctl->fs_info, chunk_offset, bargs)) { return 0; + } else if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) && + chunk_usage_range_filter(bctl->fs_info, chunk_offset, bargs)) { + return 0; } /* devid filter */ @@ -3115,6 +3338,12 @@ static int should_balance_chunk(struct btrfs_root *root, return 0; } + /* stripes filter */ + if ((bargs->flags & BTRFS_BALANCE_ARGS_STRIPES_RANGE) && + chunk_stripes_range_filter(leaf, chunk, bargs)) { + return 0; + } + /* soft profile changing mode */ if ((bargs->flags & BTRFS_BALANCE_ARGS_SOFT) && chunk_soft_convert_filter(chunk_type, bargs)) { @@ -3129,6 +3358,16 @@ static int should_balance_chunk(struct btrfs_root *root, return 0; else bargs->limit--; + } else if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT_RANGE)) { + /* + * Same logic as the 'limit' filter; the minimum cannot be + * determined here because we do not have the global informatoin + * about the count of all chunks that satisfy the filters. + */ + if (bargs->limit_max == 0) + return 0; + else + bargs->limit_max--; } return 1; @@ -3143,6 +3382,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) struct btrfs_device *device; u64 old_size; u64 size_to_free; + u64 chunk_type; struct btrfs_chunk *chunk; struct btrfs_path *path; struct btrfs_key key; @@ -3153,9 +3393,14 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) int ret; int enospc_errors = 0; bool counting = true; + /* The single value limit and min/max limits use the same bytes in the */ u64 limit_data = bctl->data.limit; u64 limit_meta = bctl->meta.limit; u64 limit_sys = bctl->sys.limit; + u32 count_data = 0; + u32 count_meta = 0; + u32 count_sys = 0; + int chunk_reserved = 0; /* step one make some room on all the devices */ devices = &fs_info->fs_devices->devices; @@ -3196,6 +3441,10 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) spin_unlock(&fs_info->balance_lock); again: if (!counting) { + /* + * The single value limit and min/max limits use the same bytes + * in the + */ bctl->data.limit = limit_data; bctl->meta.limit = limit_meta; bctl->sys.limit = limit_sys; @@ -3211,9 +3460,12 @@ again: goto error; } + mutex_lock(&fs_info->delete_unused_bgs_mutex); ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); - if (ret < 0) + if (ret < 0) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); goto error; + } /* * this shouldn't happen, it means the last relocate @@ -3225,6 +3477,7 @@ again: ret = btrfs_previous_item(chunk_root, path, 0, BTRFS_CHUNK_ITEM_KEY); if (ret) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); ret = 0; break; } @@ -3233,10 +3486,13 @@ again: slot = path->slots[0]; btrfs_item_key_to_cpu(leaf, &found_key, slot); - if (found_key.objectid != key.objectid) + if (found_key.objectid != key.objectid) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); break; + } chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); + chunk_type = btrfs_chunk_type(leaf, chunk); if (!counting) { spin_lock(&fs_info->balance_lock); @@ -3246,20 +3502,64 @@ again: ret = should_balance_chunk(chunk_root, leaf, chunk, found_key.offset); + btrfs_release_path(path); - if (!ret) + if (!ret) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); goto loop; + } if (counting) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); spin_lock(&fs_info->balance_lock); bctl->stat.expected++; spin_unlock(&fs_info->balance_lock); + + if (chunk_type & BTRFS_BLOCK_GROUP_DATA) + count_data++; + else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) + count_sys++; + else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA) + count_meta++; + + goto loop; + } + + /* + * Apply limit_min filter, no need to check if the LIMITS + * filter is used, limit_min is 0 by default + */ + if (((chunk_type & BTRFS_BLOCK_GROUP_DATA) && + count_data < bctl->data.limit_min) + || ((chunk_type & BTRFS_BLOCK_GROUP_METADATA) && + count_meta < bctl->meta.limit_min) + || ((chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) && + count_sys < bctl->sys.limit_min)) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); goto loop; } + if ((chunk_type & BTRFS_BLOCK_GROUP_DATA) && !chunk_reserved) { + trans = btrfs_start_transaction(chunk_root, 0); + if (IS_ERR(trans)) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); + ret = PTR_ERR(trans); + goto error; + } + + ret = btrfs_force_chunk_alloc(trans, chunk_root, + BTRFS_BLOCK_GROUP_DATA); + btrfs_end_transaction(trans, chunk_root); + if (ret < 0) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); + goto error; + } + chunk_reserved = 1; + } + ret = btrfs_relocate_chunk(chunk_root, - found_key.objectid, found_key.offset); + mutex_unlock(&fs_info->delete_unused_bgs_mutex); if (ret && ret != -ENOSPC) goto error; if (ret == -ENOSPC) { @@ -3331,11 +3631,20 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info) unset_balance_control(fs_info); ret = del_balance_item(fs_info->tree_root); if (ret) - btrfs_std_error(fs_info, ret); + btrfs_std_error(fs_info, ret, NULL); atomic_set(&fs_info->mutually_exclusive_operation_running, 0); } +/* Non-zero return value signifies invalidity */ +static inline int validate_convert_profile(struct btrfs_balance_args *bctl_arg, + u64 allowed) +{ + return ((bctl_arg->flags & BTRFS_BALANCE_ARGS_CONVERT) && + (!alloc_profile_is_valid(bctl_arg->target, 1) || + (bctl_arg->target & ~allowed))); +} + /* * Should be called with both balance and volume mutexes held */ @@ -3393,27 +3702,21 @@ int btrfs_balance(struct btrfs_balance_control *bctl, if (num_devices > 3) allowed |= (BTRFS_BLOCK_GROUP_RAID10 | BTRFS_BLOCK_GROUP_RAID6); - if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) && - (!alloc_profile_is_valid(bctl->data.target, 1) || - (bctl->data.target & ~allowed))) { + if (validate_convert_profile(&bctl->data, allowed)) { btrfs_err(fs_info, "unable to start balance with target " "data profile %llu", bctl->data.target); ret = -EINVAL; goto out; } - if ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) && - (!alloc_profile_is_valid(bctl->meta.target, 1) || - (bctl->meta.target & ~allowed))) { + if (validate_convert_profile(&bctl->meta, allowed)) { btrfs_err(fs_info, "unable to start balance with target metadata profile %llu", bctl->meta.target); ret = -EINVAL; goto out; } - if ((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) && - (!alloc_profile_is_valid(bctl->sys.target, 1) || - (bctl->sys.target & ~allowed))) { + if (validate_convert_profile(&bctl->sys, allowed)) { btrfs_err(fs_info, "unable to start balance with target system profile %llu", bctl->sys.target); @@ -3455,23 +3758,10 @@ int btrfs_balance(struct btrfs_balance_control *bctl, } while (read_seqretry(&fs_info->profiles_lock, seq)); if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { - int num_tolerated_disk_barrier_failures; - u64 target = bctl->sys.target; - - num_tolerated_disk_barrier_failures = - btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); - if (num_tolerated_disk_barrier_failures > 0 && - (target & - (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID0 | - BTRFS_AVAIL_ALLOC_BIT_SINGLE))) - num_tolerated_disk_barrier_failures = 0; - else if (num_tolerated_disk_barrier_failures > 1 && - (target & - (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10))) - num_tolerated_disk_barrier_failures = 1; - - fs_info->num_tolerated_disk_barrier_failures = - num_tolerated_disk_barrier_failures; + fs_info->num_tolerated_disk_barrier_failures = min( + btrfs_calc_num_tolerated_disk_barrier_failures(fs_info), + btrfs_get_num_tolerated_disk_barrier_failures( + bctl->sys.target)); } ret = insert_balance_item(fs_info->tree_root, bctl); @@ -3908,9 +4198,9 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info) uuid_root = btrfs_create_tree(trans, fs_info, BTRFS_UUID_TREE_OBJECTID); if (IS_ERR(uuid_root)) { - btrfs_abort_transaction(trans, tree_root, - PTR_ERR(uuid_root)); - return PTR_ERR(uuid_root); + ret = PTR_ERR(uuid_root); + btrfs_abort_transaction(trans, tree_root, ret); + return ret; } fs_info->uuid_root = uuid_root; @@ -3959,12 +4249,12 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) struct btrfs_dev_extent *dev_extent = NULL; struct btrfs_path *path; u64 length; - u64 chunk_objectid; u64 chunk_offset; int ret; int slot; int failed = 0; bool retried = false; + bool checked_pending_chunks = false; struct extent_buffer *l; struct btrfs_key key; struct btrfs_super_block *super_copy = root->fs_info->super_copy; @@ -3998,11 +4288,16 @@ again: key.type = BTRFS_DEV_EXTENT_KEY; do { + mutex_lock(&root->fs_info->delete_unused_bgs_mutex); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) + if (ret < 0) { + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); goto done; + } ret = btrfs_previous_item(root, path, 0, key.type); + if (ret) + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); if (ret < 0) goto done; if (ret) { @@ -4016,6 +4311,7 @@ again: btrfs_item_key_to_cpu(l, &key, path->slots[0]); if (key.objectid != device->devid) { + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); btrfs_release_path(path); break; } @@ -4024,15 +4320,16 @@ again: length = btrfs_dev_extent_length(l, dev_extent); if (key.offset + length <= new_size) { + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); btrfs_release_path(path); break; } - chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent); btrfs_release_path(path); - ret = btrfs_relocate_chunk(root, chunk_objectid, chunk_offset); + ret = btrfs_relocate_chunk(root, chunk_offset); + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); if (ret && ret != -ENOSPC) goto done; if (ret == -ENOSPC) @@ -4045,15 +4342,6 @@ again: goto again; } else if (failed && retried) { ret = -ENOSPC; - lock_chunks(root); - - btrfs_device_set_total_bytes(device, old_size); - if (device->writeable) - device->fs_devices->total_rw_bytes += diff; - spin_lock(&root->fs_info->free_chunk_lock); - root->fs_info->free_chunk_space += diff; - spin_unlock(&root->fs_info->free_chunk_lock); - unlock_chunks(root); goto done; } @@ -4065,6 +4353,36 @@ again: } lock_chunks(root); + + /* + * We checked in the above loop all device extents that were already in + * the device tree. However before we have updated the device's + * total_bytes to the new size, we might have had chunk allocations that + * have not complete yet (new block groups attached to transaction + * handles), and therefore their device extents were not yet in the + * device tree and we missed them in the loop above. So if we have any + * pending chunk using a device extent that overlaps the device range + * that we can not use anymore, commit the current transaction and + * repeat the search on the device tree - this way we guarantee we will + * not have chunks using device extents that end beyond 'new_size'. + */ + if (!checked_pending_chunks) { + u64 start = new_size; + u64 len = old_size - new_size; + + if (contains_pending_extent(trans->transaction, device, + &start, len)) { + unlock_chunks(root); + checked_pending_chunks = true; + failed = 0; + retried = false; + ret = btrfs_commit_transaction(trans, root); + if (ret) + goto done; + goto again; + } + } + btrfs_device_set_disk_total_bytes(device, new_size); if (list_empty(&device->resized_list)) list_add_tail(&device->resized_list, @@ -4079,6 +4397,16 @@ again: btrfs_end_transaction(trans, root); done: btrfs_free_path(path); + if (ret) { + lock_chunks(root); + btrfs_device_set_total_bytes(device, old_size); + if (device->writeable) + device->fs_devices->total_rw_bytes += diff; + spin_lock(&root->fs_info->free_chunk_lock); + root->fs_info->free_chunk_space += diff; + spin_unlock(&root->fs_info->free_chunk_lock); + unlock_chunks(root); + } return ret; } @@ -4130,65 +4458,6 @@ static int btrfs_cmp_device_info(const void *a, const void *b) return 0; } -static const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { - [BTRFS_RAID_RAID10] = { - .sub_stripes = 2, - .dev_stripes = 1, - .devs_max = 0, /* 0 == as many as possible */ - .devs_min = 4, - .devs_increment = 2, - .ncopies = 2, - }, - [BTRFS_RAID_RAID1] = { - .sub_stripes = 1, - .dev_stripes = 1, - .devs_max = 2, - .devs_min = 2, - .devs_increment = 2, - .ncopies = 2, - }, - [BTRFS_RAID_DUP] = { - .sub_stripes = 1, - .dev_stripes = 2, - .devs_max = 1, - .devs_min = 1, - .devs_increment = 1, - .ncopies = 2, - }, - [BTRFS_RAID_RAID0] = { - .sub_stripes = 1, - .dev_stripes = 1, - .devs_max = 0, - .devs_min = 2, - .devs_increment = 1, - .ncopies = 1, - }, - [BTRFS_RAID_SINGLE] = { - .sub_stripes = 1, - .dev_stripes = 1, - .devs_max = 1, - .devs_min = 1, - .devs_increment = 1, - .ncopies = 1, - }, - [BTRFS_RAID_RAID5] = { - .sub_stripes = 1, - .dev_stripes = 1, - .devs_max = 0, - .devs_min = 2, - .devs_increment = 1, - .ncopies = 2, - }, - [BTRFS_RAID_RAID6] = { - .sub_stripes = 1, - .dev_stripes = 1, - .devs_max = 0, - .devs_min = 3, - .devs_increment = 1, - .ncopies = 3, - }, -}; - static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target) { /* TODO allow them to set a preferred stripe size */ @@ -4914,9 +5183,7 @@ static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes) * and the stripes */ sizeof(u64) * (total_stripes), - GFP_NOFS); - if (!bbio) - return NULL; + GFP_NOFS|__GFP_NOFAIL); atomic_set(&bbio->error, 0); atomic_set(&bbio->refs, 1); @@ -5584,26 +5851,26 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, return 0; } -static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int err) +static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio) { - if (likely(bbio->flags & BTRFS_BIO_ORIG_BIO_SUBMITTED)) - bio_endio_nodec(bio, err); - else - bio_endio(bio, err); + bio->bi_private = bbio->private; + bio->bi_end_io = bbio->end_io; + bio_endio(bio); + btrfs_put_bbio(bbio); } -static void btrfs_end_bio(struct bio *bio, int err) +static void btrfs_end_bio(struct bio *bio) { struct btrfs_bio *bbio = bio->bi_private; - struct btrfs_device *dev = bbio->stripes[0].dev; int is_orig_bio = 0; - if (err) { + if (bio->bi_error) { atomic_inc(&bbio->error); - if (err == -EIO || err == -EREMOTEIO) { + if (bio->bi_error == -EIO || bio->bi_error == -EREMOTEIO) { unsigned int stripe_index = btrfs_io_bio(bio)->stripe_index; + struct btrfs_device *dev; BUG_ON(stripe_index >= bbio->num_stripes); dev = bbio->stripes[stripe_index].dev; @@ -5633,24 +5900,21 @@ static void btrfs_end_bio(struct bio *bio, int err) bio = bbio->orig_bio; } - bio->bi_private = bbio->private; - bio->bi_end_io = bbio->end_io; btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; /* only send an error to the higher layers if it is * beyond the tolerance of the btrfs bio */ if (atomic_read(&bbio->error) > bbio->max_errors) { - err = -EIO; + bio->bi_error = -EIO; } else { /* * this bio is actually up to date, we didn't * go over the max number of errors */ - set_bit(BIO_UPTODATE, &bio->bi_flags); - err = 0; + bio->bi_error = 0; } - btrfs_end_bbio(bbio, bio, err); + btrfs_end_bbio(bbio, bio); } else if (!is_orig_bio) { bio_put(bio); } @@ -5671,7 +5935,7 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root, struct btrfs_pending_bios *pending_bios; if (device->missing || !device->bdev) { - bio_endio(bio, -EIO); + bio_io_error(bio); return; } @@ -5716,34 +5980,6 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root, &device->work); } -static int bio_size_ok(struct block_device *bdev, struct bio *bio, - sector_t sector) -{ - struct bio_vec *prev; - struct request_queue *q = bdev_get_queue(bdev); - unsigned int max_sectors = queue_max_sectors(q); - struct bvec_merge_data bvm = { - .bi_bdev = bdev, - .bi_sector = sector, - .bi_rw = bio->bi_rw, - }; - - if (WARN_ON(bio->bi_vcnt == 0)) - return 1; - - prev = &bio->bi_io_vec[bio->bi_vcnt - 1]; - if (bio_sectors(bio) > max_sectors) - return 0; - - if (!q->merge_bvec_fn) - return 1; - - bvm.bi_size = bio->bi_iter.bi_size - prev->bv_len; - if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) - return 0; - return 1; -} - static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio, struct bio *bio, u64 physical, int dev_nr, int rw, int async) @@ -5777,38 +6013,6 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio, btrfsic_submit_bio(rw, bio); } -static int breakup_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio, - struct bio *first_bio, struct btrfs_device *dev, - int dev_nr, int rw, int async) -{ - struct bio_vec *bvec = first_bio->bi_io_vec; - struct bio *bio; - int nr_vecs = bio_get_nr_vecs(dev->bdev); - u64 physical = bbio->stripes[dev_nr].physical; - -again: - bio = btrfs_bio_alloc(dev->bdev, physical >> 9, nr_vecs, GFP_NOFS); - if (!bio) - return -ENOMEM; - - while (bvec <= (first_bio->bi_io_vec + first_bio->bi_vcnt - 1)) { - if (bio_add_page(bio, bvec->bv_page, bvec->bv_len, - bvec->bv_offset) < bvec->bv_len) { - u64 len = bio->bi_iter.bi_size; - - atomic_inc(&bbio->stripes_pending); - submit_stripe_bio(root, bbio, bio, physical, dev_nr, - rw, async); - physical += len; - goto again; - } - bvec++; - } - - submit_stripe_bio(root, bbio, bio, physical, dev_nr, rw, async); - return 0; -} - static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical) { atomic_inc(&bbio->error); @@ -5816,12 +6020,10 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical) /* Shoud be the original bio. */ WARN_ON(bio != bbio->orig_bio); - bio->bi_private = bbio->private; - bio->bi_end_io = bbio->end_io; btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; bio->bi_iter.bi_sector = logical >> 9; - - btrfs_end_bbio(bbio, bio, -EIO); + bio->bi_error = -EIO; + btrfs_end_bbio(bbio, bio); } } @@ -5883,25 +6085,11 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, continue; } - /* - * Check and see if we're ok with this bio based on it's size - * and offset with the given device. - */ - if (!bio_size_ok(dev->bdev, first_bio, - bbio->stripes[dev_nr].physical >> 9)) { - ret = breakup_stripe_bio(root, bbio, first_bio, dev, - dev_nr, rw, async_submit); - BUG_ON(ret); - continue; - } - if (dev_nr < total_devs - 1) { bio = btrfs_bio_clone(first_bio, GFP_NOFS); BUG_ON(!bio); /* -ENOMEM */ - } else { + } else bio = first_bio; - bbio->flags |= BTRFS_BIO_ORIG_BIO_SUBMITTED; - } submit_stripe_bio(root, bbio, bio, bbio->stripes[dev_nr].physical, dev_nr, rw, @@ -6078,6 +6266,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, free_extent_map(em); return -EIO; } + btrfs_warn(root->fs_info, "devid %llu uuid %pU is missing", + devid, uuid); } map->stripes[i].dev->in_fs_metadata = 1; } @@ -6197,10 +6387,11 @@ static int read_one_dev(struct btrfs_root *root, if (!btrfs_test_opt(root, DEGRADED)) return -EIO; - btrfs_warn(root->fs_info, "devid %llu missing", devid); device = add_missing_dev(root, fs_devices, devid, dev_uuid); if (!device) return -ENOMEM; + btrfs_warn(root->fs_info, "devid %llu uuid %pU missing", + devid, dev_uuid); } else { if (!device->bdev && !btrfs_test_opt(root, DEGRADED)) return -EIO; @@ -6322,6 +6513,14 @@ int btrfs_read_sys_array(struct btrfs_root *root) goto out_short_read; num_stripes = btrfs_chunk_num_stripes(sb, chunk); + if (!num_stripes) { + printk(KERN_ERR + "BTRFS: invalid number of stripes %u in sys_array at offset %u\n", + num_stripes, cur_offset); + ret = -EIO; + break; + } + len = btrfs_chunk_item_size(num_stripes); if (cur_offset + len > array_size) goto out_short_read; @@ -6517,8 +6716,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans, BUG_ON(!path); ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1); if (ret < 0) { - printk_in_rcu(KERN_WARNING "BTRFS: " - "error %d while searching for dev_stats item for device %s!\n", + btrfs_warn_in_rcu(dev_root->fs_info, + "error %d while searching for dev_stats item for device %s", ret, rcu_str_deref(device->name)); goto out; } @@ -6528,8 +6727,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans, /* need to delete old one and insert a new one */ ret = btrfs_del_item(trans, dev_root, path); if (ret != 0) { - printk_in_rcu(KERN_WARNING "BTRFS: " - "delete too small dev_stats item for device %s failed %d!\n", + btrfs_warn_in_rcu(dev_root->fs_info, + "delete too small dev_stats item for device %s failed %d", rcu_str_deref(device->name), ret); goto out; } @@ -6542,9 +6741,9 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_item(trans, dev_root, path, &key, sizeof(*ptr)); if (ret < 0) { - printk_in_rcu(KERN_WARNING "BTRFS: " - "insert dev_stats item for device %s failed %d!\n", - rcu_str_deref(device->name), ret); + btrfs_warn_in_rcu(dev_root->fs_info, + "insert dev_stats item for device %s failed %d", + rcu_str_deref(device->name), ret); goto out; } } @@ -6598,8 +6797,8 @@ static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev) { if (!dev->dev_stats_valid) return; - printk_ratelimited_in_rcu(KERN_ERR "BTRFS: " - "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", + btrfs_err_rl_in_rcu(dev->dev_root->fs_info, + "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u", rcu_str_deref(dev->name), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), @@ -6618,8 +6817,8 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) if (i == BTRFS_DEV_STAT_VALUES_MAX) return; /* all values == 0, suppress message */ - printk_in_rcu(KERN_INFO "BTRFS: " - "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", + btrfs_info_in_rcu(dev->dev_root->fs_info, + "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u", rcu_str_deref(dev->name), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), @@ -6663,22 +6862,34 @@ int btrfs_get_dev_stats(struct btrfs_root *root, return 0; } -int btrfs_scratch_superblock(struct btrfs_device *device) +void btrfs_scratch_superblocks(struct block_device *bdev, char *device_path) { struct buffer_head *bh; struct btrfs_super_block *disk_super; + int copy_num; - bh = btrfs_read_dev_super(device->bdev); - if (!bh) - return -EINVAL; - disk_super = (struct btrfs_super_block *)bh->b_data; + if (!bdev) + return; - memset(&disk_super->magic, 0, sizeof(disk_super->magic)); - set_buffer_dirty(bh); - sync_dirty_buffer(bh); - brelse(bh); + for (copy_num = 0; copy_num < BTRFS_SUPER_MIRROR_MAX; + copy_num++) { - return 0; + if (btrfs_read_dev_one_super(bdev, copy_num, &bh)) + continue; + + disk_super = (struct btrfs_super_block *)bh->b_data; + + memset(&disk_super->magic, 0, sizeof(disk_super->magic)); + set_buffer_dirty(bh); + sync_dirty_buffer(bh); + brelse(bh); + } + + /* Notify udev that device has changed */ + btrfs_kobject_uevent(bdev, KOBJ_CHANGE); + + /* Update ctime/mtime for device path for libblkid */ + update_dev_time(device_path); } /* @@ -6728,3 +6939,56 @@ void btrfs_update_commit_device_bytes_used(struct btrfs_root *root, } unlock_chunks(root); } + +void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info) +{ + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; + while (fs_devices) { + fs_devices->fs_info = fs_info; + fs_devices = fs_devices->seed; + } +} + +void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info) +{ + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; + while (fs_devices) { + fs_devices->fs_info = NULL; + fs_devices = fs_devices->seed; + } +} + +void btrfs_close_one_device(struct btrfs_device *device) +{ + struct btrfs_fs_devices *fs_devices = device->fs_devices; + struct btrfs_device *new_device; + struct rcu_string *name; + + if (device->bdev) + fs_devices->open_devices--; + + if (device->writeable && + device->devid != BTRFS_DEV_REPLACE_DEVID) { + list_del_init(&device->dev_alloc_list); + fs_devices->rw_devices--; + } + + if (device->missing) + fs_devices->missing_devices--; + + new_device = btrfs_alloc_device(NULL, &device->devid, + device->uuid); + BUG_ON(IS_ERR(new_device)); /* -ENOMEM */ + + /* Safe because we are under uuid_mutex */ + if (device->name) { + name = rcu_string_strdup(device->name->str, GFP_NOFS); + BUG_ON(!name); /* -ENOMEM */ + rcu_assign_pointer(new_device->name, name); + } + + list_replace_rcu(&device->dev_list, &new_device->dev_list); + new_device->fs_devices = device->fs_devices; + + call_rcu(&device->rcu, free_device); +} -- cgit 1.2.3-korg