summaryrefslogtreecommitdiffstats
path: root/kernel/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fs/btrfs')
-rw-r--r--kernel/fs/btrfs/acl.c6
-rw-r--r--kernel/fs/btrfs/async-thread.c14
-rw-r--r--kernel/fs/btrfs/async-thread.h1
-rw-r--r--kernel/fs/btrfs/compression.c4
-rw-r--r--kernel/fs/btrfs/ctree.c5
-rw-r--r--kernel/fs/btrfs/ctree.h5
-rw-r--r--kernel/fs/btrfs/delayed-inode.c6
-rw-r--r--kernel/fs/btrfs/disk-io.c3
-rw-r--r--kernel/fs/btrfs/extent-tree.c58
-rw-r--r--kernel/fs/btrfs/extent_io.c15
-rw-r--r--kernel/fs/btrfs/file.c39
-rw-r--r--kernel/fs/btrfs/inode.c9
-rw-r--r--kernel/fs/btrfs/ioctl.c21
-rw-r--r--kernel/fs/btrfs/qgroup.c18
-rw-r--r--kernel/fs/btrfs/qgroup.h3
-rw-r--r--kernel/fs/btrfs/relocation.c27
-rw-r--r--kernel/fs/btrfs/super.c2
-rw-r--r--kernel/fs/btrfs/transaction.h2
-rw-r--r--kernel/fs/btrfs/tree-log.c161
19 files changed, 317 insertions, 82 deletions
diff --git a/kernel/fs/btrfs/acl.c b/kernel/fs/btrfs/acl.c
index 9a0124a95..fb3e64d37 100644
--- a/kernel/fs/btrfs/acl.c
+++ b/kernel/fs/btrfs/acl.c
@@ -83,11 +83,9 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
case ACL_TYPE_ACCESS:
name = POSIX_ACL_XATTR_ACCESS;
if (acl) {
- ret = posix_acl_equiv_mode(acl, &inode->i_mode);
- if (ret < 0)
+ ret = posix_acl_update_mode(inode, &inode->i_mode, &acl);
+ if (ret)
return ret;
- if (ret == 0)
- acl = NULL;
}
ret = 0;
break;
diff --git a/kernel/fs/btrfs/async-thread.c b/kernel/fs/btrfs/async-thread.c
index 9aba42b78..a09264d8b 100644
--- a/kernel/fs/btrfs/async-thread.c
+++ b/kernel/fs/btrfs/async-thread.c
@@ -70,6 +70,20 @@ void btrfs_##name(struct work_struct *arg) \
normal_work_helper(work); \
}
+bool btrfs_workqueue_normal_congested(struct btrfs_workqueue *wq)
+{
+ /*
+ * We could compare wq->normal->pending with num_online_cpus()
+ * to support "thresh == NO_THRESHOLD" case, but it requires
+ * moving up atomic_inc/dec in thresh_queue/exec_hook. Let's
+ * postpone it until someone needs the support of that case.
+ */
+ if (wq->normal->thresh == NO_THRESHOLD)
+ return false;
+
+ return atomic_read(&wq->normal->pending) > wq->normal->thresh * 2;
+}
+
BTRFS_WORK_HELPER(worker_helper);
BTRFS_WORK_HELPER(delalloc_helper);
BTRFS_WORK_HELPER(flush_delalloc_helper);
diff --git a/kernel/fs/btrfs/async-thread.h b/kernel/fs/btrfs/async-thread.h
index ad4d0647d..8e1d6576d 100644
--- a/kernel/fs/btrfs/async-thread.h
+++ b/kernel/fs/btrfs/async-thread.h
@@ -80,4 +80,5 @@ void btrfs_queue_work(struct btrfs_workqueue *wq,
void btrfs_destroy_workqueue(struct btrfs_workqueue *wq);
void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max);
void btrfs_set_work_high_priority(struct btrfs_work *work);
+bool btrfs_workqueue_normal_congested(struct btrfs_workqueue *wq);
#endif
diff --git a/kernel/fs/btrfs/compression.c b/kernel/fs/btrfs/compression.c
index c473c42d7..bae05c5c7 100644
--- a/kernel/fs/btrfs/compression.c
+++ b/kernel/fs/btrfs/compression.c
@@ -694,7 +694,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
ret = btrfs_map_bio(root, READ, comp_bio,
mirror_num, 0);
if (ret) {
- bio->bi_error = ret;
+ comp_bio->bi_error = ret;
bio_endio(comp_bio);
}
@@ -723,7 +723,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
if (ret) {
- bio->bi_error = ret;
+ comp_bio->bi_error = ret;
bio_endio(comp_bio);
}
diff --git a/kernel/fs/btrfs/ctree.c b/kernel/fs/btrfs/ctree.c
index 5b8e235c4..0f2b7c622 100644
--- a/kernel/fs/btrfs/ctree.c
+++ b/kernel/fs/btrfs/ctree.c
@@ -1551,6 +1551,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
trans->transid, root->fs_info->generation);
if (!should_cow_block(trans, root, buf)) {
+ trans->dirty = true;
*cow_ret = buf;
return 0;
}
@@ -2773,8 +2774,10 @@ again:
* then we don't want to set the path blocking,
* so we test it here
*/
- if (!should_cow_block(trans, root, b))
+ if (!should_cow_block(trans, root, b)) {
+ trans->dirty = true;
goto cow_done;
+ }
/*
* must have write locks on this node and the
diff --git a/kernel/fs/btrfs/ctree.h b/kernel/fs/btrfs/ctree.h
index 385b449fd..e847573c6 100644
--- a/kernel/fs/btrfs/ctree.h
+++ b/kernel/fs/btrfs/ctree.h
@@ -1770,6 +1770,7 @@ struct btrfs_fs_info {
struct btrfs_workqueue *qgroup_rescan_workers;
struct completion qgroup_rescan_completion;
struct btrfs_work qgroup_rescan_work;
+ bool qgroup_rescan_running; /* protected by qgroup_rescan_lock */
/* filesystem state */
unsigned long fs_state;
@@ -3069,6 +3070,8 @@ btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu,
cpu->target = le64_to_cpu(disk->target);
cpu->flags = le64_to_cpu(disk->flags);
cpu->limit = le64_to_cpu(disk->limit);
+ cpu->stripes_min = le32_to_cpu(disk->stripes_min);
+ cpu->stripes_max = le32_to_cpu(disk->stripes_max);
}
static inline void
@@ -3087,6 +3090,8 @@ btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk,
disk->target = cpu_to_le64(cpu->target);
disk->flags = cpu_to_le64(cpu->flags);
disk->limit = cpu_to_le64(cpu->limit);
+ disk->stripes_min = cpu_to_le32(cpu->stripes_min);
+ disk->stripes_max = cpu_to_le32(cpu->stripes_max);
}
/* struct btrfs_super_block */
diff --git a/kernel/fs/btrfs/delayed-inode.c b/kernel/fs/btrfs/delayed-inode.c
index 02b934d0e..09fa5af97 100644
--- a/kernel/fs/btrfs/delayed-inode.c
+++ b/kernel/fs/btrfs/delayed-inode.c
@@ -1375,7 +1375,8 @@ release_path:
total_done++;
btrfs_release_prepared_delayed_node(delayed_node);
- if (async_work->nr == 0 || total_done < async_work->nr)
+ if ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK) ||
+ total_done < async_work->nr)
goto again;
free_path:
@@ -1391,7 +1392,8 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
{
struct btrfs_async_delayed_work *async_work;
- if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
+ if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND ||
+ btrfs_workqueue_normal_congested(fs_info->delayed_workers))
return 0;
async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
diff --git a/kernel/fs/btrfs/disk-io.c b/kernel/fs/btrfs/disk-io.c
index 41fb43183..85b207d19 100644
--- a/kernel/fs/btrfs/disk-io.c
+++ b/kernel/fs/btrfs/disk-io.c
@@ -2276,6 +2276,7 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
fs_info->quota_enabled = 0;
fs_info->pending_quota_state = 0;
fs_info->qgroup_ulist = NULL;
+ fs_info->qgroup_rescan_running = false;
mutex_init(&fs_info->qgroup_rescan_lock);
}
@@ -3811,7 +3812,7 @@ void close_ctree(struct btrfs_root *root)
smp_mb();
/* wait for the qgroup rescan worker to stop */
- btrfs_qgroup_wait_for_completion(fs_info);
+ btrfs_qgroup_wait_for_completion(fs_info, false);
/* wait for the uuid_scan task to finish */
down(&fs_info->uuid_tree_rescan_sem);
diff --git a/kernel/fs/btrfs/extent-tree.c b/kernel/fs/btrfs/extent-tree.c
index 2368cac11..2a2e37039 100644
--- a/kernel/fs/btrfs/extent-tree.c
+++ b/kernel/fs/btrfs/extent-tree.c
@@ -2520,11 +2520,11 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
if (ref && ref->seq &&
btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
spin_unlock(&locked_ref->lock);
- btrfs_delayed_ref_unlock(locked_ref);
spin_lock(&delayed_refs->lock);
locked_ref->processing = 0;
delayed_refs->num_heads_ready++;
spin_unlock(&delayed_refs->lock);
+ btrfs_delayed_ref_unlock(locked_ref);
locked_ref = NULL;
cond_resched();
count++;
@@ -2570,7 +2570,10 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
*/
if (must_insert_reserved)
locked_ref->must_insert_reserved = 1;
+ spin_lock(&delayed_refs->lock);
locked_ref->processing = 0;
+ delayed_refs->num_heads_ready++;
+ spin_unlock(&delayed_refs->lock);
btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
btrfs_delayed_ref_unlock(locked_ref);
return ret;
@@ -7856,7 +7859,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
buf->start + buf->len - 1, GFP_NOFS);
}
- trans->blocks_used++;
+ trans->dirty = true;
/* this returns a buffer locked for blocking */
return buf;
}
@@ -8486,14 +8489,13 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
ret = btrfs_lookup_extent_info(trans, root, bytenr, level - 1, 1,
&wc->refs[level - 1],
&wc->flags[level - 1]);
- if (ret < 0) {
- btrfs_tree_unlock(next);
- return ret;
- }
+ if (ret < 0)
+ goto out_unlock;
if (unlikely(wc->refs[level - 1] == 0)) {
btrfs_err(root->fs_info, "Missing references.");
- BUG();
+ ret = -EIO;
+ goto out_unlock;
}
*lookup_info = 0;
@@ -8545,7 +8547,12 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
}
level--;
- BUG_ON(level != btrfs_header_level(next));
+ ASSERT(level == btrfs_header_level(next));
+ if (level != btrfs_header_level(next)) {
+ btrfs_err(root->fs_info, "mismatched level");
+ ret = -EIO;
+ goto out_unlock;
+ }
path->nodes[level] = next;
path->slots[level] = 0;
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
@@ -8560,8 +8567,15 @@ skip:
if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
parent = path->nodes[level]->start;
} else {
- BUG_ON(root->root_key.objectid !=
+ ASSERT(root->root_key.objectid ==
btrfs_header_owner(path->nodes[level]));
+ if (root->root_key.objectid !=
+ btrfs_header_owner(path->nodes[level])) {
+ btrfs_err(root->fs_info,
+ "mismatched block owner");
+ ret = -EIO;
+ goto out_unlock;
+ }
parent = 0;
}
@@ -8578,12 +8592,18 @@ skip:
}
ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
root->root_key.objectid, level - 1, 0);
- BUG_ON(ret); /* -ENOMEM */
+ if (ret)
+ goto out_unlock;
}
+
+ *lookup_info = 1;
+ ret = 1;
+
+out_unlock:
btrfs_tree_unlock(next);
free_extent_buffer(next);
- *lookup_info = 1;
- return 1;
+
+ return ret;
}
/*
@@ -9686,6 +9706,11 @@ int btrfs_read_block_groups(struct btrfs_root *root)
struct extent_buffer *leaf;
int need_clear = 0;
u64 cache_gen;
+ u64 feature;
+ int mixed;
+
+ feature = btrfs_super_incompat_flags(info->super_copy);
+ mixed = !!(feature & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS);
root = info->extent_root;
key.objectid = 0;
@@ -9739,6 +9764,15 @@ int btrfs_read_block_groups(struct btrfs_root *root)
btrfs_item_ptr_offset(leaf, path->slots[0]),
sizeof(cache->item));
cache->flags = btrfs_block_group_flags(&cache->item);
+ if (!mixed &&
+ ((cache->flags & BTRFS_BLOCK_GROUP_METADATA) &&
+ (cache->flags & BTRFS_BLOCK_GROUP_DATA))) {
+ btrfs_err(info,
+"bg %llu is a mixed block group but filesystem hasn't enabled mixed block groups",
+ cache->key.objectid);
+ ret = -EINVAL;
+ goto error;
+ }
key.objectid = found_key.objectid + found_key.offset;
btrfs_release_path(path);
diff --git a/kernel/fs/btrfs/extent_io.c b/kernel/fs/btrfs/extent_io.c
index 9abe18763..e767f347f 100644
--- a/kernel/fs/btrfs/extent_io.c
+++ b/kernel/fs/btrfs/extent_io.c
@@ -2786,12 +2786,6 @@ struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
btrfs_bio->csum = NULL;
btrfs_bio->csum_allocated = NULL;
btrfs_bio->end_io = NULL;
-
-#ifdef CONFIG_BLK_CGROUP
- /* FIXME, put this into bio_clone_bioset */
- if (bio->bi_css)
- bio_associate_blkcg(new, bio->bi_css);
-#endif
}
return new;
}
@@ -5300,11 +5294,20 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
lock_page(page);
}
locked_pages++;
+ }
+ /*
+ * We need to firstly lock all pages to make sure that
+ * the uptodate bit of our pages won't be affected by
+ * clear_extent_buffer_uptodate().
+ */
+ for (i = start_i; i < num_pages; i++) {
+ page = eb->pages[i];
if (!PageUptodate(page)) {
num_reads++;
all_uptodate = 0;
}
}
+
if (all_uptodate) {
if (start_i == 0)
set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
diff --git a/kernel/fs/btrfs/file.c b/kernel/fs/btrfs/file.c
index 0f09526aa..353f4bae6 100644
--- a/kernel/fs/btrfs/file.c
+++ b/kernel/fs/btrfs/file.c
@@ -1526,27 +1526,24 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
- if (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
- BTRFS_INODE_PREALLOC)) {
- ret = check_can_nocow(inode, pos, &write_bytes);
- if (ret < 0)
- break;
- if (ret > 0) {
- /*
- * For nodata cow case, no need to reserve
- * data space.
- */
- only_release_metadata = true;
- /*
- * our prealloc extent may be smaller than
- * write_bytes, so scale down.
- */
- num_pages = DIV_ROUND_UP(write_bytes + offset,
- PAGE_CACHE_SIZE);
- reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
- goto reserve_metadata;
- }
+ if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
+ BTRFS_INODE_PREALLOC)) &&
+ check_can_nocow(inode, pos, &write_bytes) > 0) {
+ /*
+ * For nodata cow case, no need to reserve
+ * data space.
+ */
+ only_release_metadata = true;
+ /*
+ * our prealloc extent may be smaller than
+ * write_bytes, so scale down.
+ */
+ num_pages = DIV_ROUND_UP(write_bytes + offset,
+ PAGE_CACHE_SIZE);
+ reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
+ goto reserve_metadata;
}
+
ret = btrfs_check_data_free_space(inode, pos, write_bytes);
if (ret < 0)
break;
@@ -1885,7 +1882,7 @@ static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
*/
int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
- struct dentry *dentry = file->f_path.dentry;
+ struct dentry *dentry = file_dentry(file);
struct inode *inode = d_inode(dentry);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
diff --git a/kernel/fs/btrfs/inode.c b/kernel/fs/btrfs/inode.c
index 4bc9dbf29..3cff6523f 100644
--- a/kernel/fs/btrfs/inode.c
+++ b/kernel/fs/btrfs/inode.c
@@ -8691,9 +8691,14 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
* So even we call qgroup_free_data(), it won't decrease reserved
* space.
* 2) Not written to disk
- * This means the reserved space should be freed here.
+ * This means the reserved space should be freed here. However,
+ * if a truncate invalidates the page (by clearing PageDirty)
+ * and the page is accounted for while allocating extent
+ * in btrfs_check_data_free_space() we let delayed_ref to
+ * free the entire extent.
*/
- btrfs_qgroup_free_data(inode, page_start, PAGE_CACHE_SIZE);
+ if (PageDirty(page))
+ btrfs_qgroup_free_data(inode, page_start, PAGE_SIZE);
if (!inode_evicting) {
clear_extent_bit(tree, page_start, page_end,
EXTENT_LOCKED | EXTENT_DIRTY |
diff --git a/kernel/fs/btrfs/ioctl.c b/kernel/fs/btrfs/ioctl.c
index f07d01bc4..317b99acd 100644
--- a/kernel/fs/btrfs/ioctl.c
+++ b/kernel/fs/btrfs/ioctl.c
@@ -1619,6 +1619,9 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
int namelen;
int ret = 0;
+ if (!S_ISDIR(file_inode(file)->i_mode))
+ return -ENOTDIR;
+
ret = mnt_want_write_file(file);
if (ret)
goto out;
@@ -1648,7 +1651,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
src_inode = file_inode(src.file);
if (src_inode->i_sb != file_inode(file)->i_sb) {
- btrfs_info(BTRFS_I(src_inode)->root->fs_info,
+ btrfs_info(BTRFS_I(file_inode(file))->root->fs_info,
"Snapshot src from another FS");
ret = -EXDEV;
} else if (!inode_owner_or_capable(src_inode)) {
@@ -1676,6 +1679,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
struct btrfs_ioctl_vol_args *vol_args;
int ret;
+ if (!S_ISDIR(file_inode(file)->i_mode))
+ return -ENOTDIR;
+
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
@@ -1699,6 +1705,9 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
bool readonly = false;
struct btrfs_qgroup_inherit *inherit = NULL;
+ if (!S_ISDIR(file_inode(file)->i_mode))
+ return -ENOTDIR;
+
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
@@ -2345,6 +2354,9 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
int ret;
int err = 0;
+ if (!S_ISDIR(dir->i_mode))
+ return -ENOTDIR;
+
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
@@ -3813,6 +3825,11 @@ process_slot:
}
btrfs_release_path(path);
key.offset = next_key_min_offset;
+
+ if (fatal_signal_pending(current)) {
+ ret = -EINTR;
+ goto out;
+ }
}
ret = 0;
@@ -5121,7 +5138,7 @@ static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- return btrfs_qgroup_wait_for_completion(root->fs_info);
+ return btrfs_qgroup_wait_for_completion(root->fs_info, true);
}
static long _btrfs_ioctl_set_received_subvol(struct file *file,
diff --git a/kernel/fs/btrfs/qgroup.c b/kernel/fs/btrfs/qgroup.c
index 5279fdae7..88d9b66e2 100644
--- a/kernel/fs/btrfs/qgroup.c
+++ b/kernel/fs/btrfs/qgroup.c
@@ -995,7 +995,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
goto out;
fs_info->quota_enabled = 0;
fs_info->pending_quota_state = 0;
- btrfs_qgroup_wait_for_completion(fs_info);
+ btrfs_qgroup_wait_for_completion(fs_info, false);
spin_lock(&fs_info->qgroup_lock);
quota_root = fs_info->quota_root;
fs_info->quota_root = NULL;
@@ -2349,6 +2349,9 @@ out:
}
done:
+ mutex_lock(&fs_info->qgroup_rescan_lock);
+ fs_info->qgroup_rescan_running = false;
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
complete_all(&fs_info->qgroup_rescan_completion);
}
@@ -2390,6 +2393,7 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
sizeof(fs_info->qgroup_rescan_progress));
fs_info->qgroup_rescan_progress.objectid = progress_objectid;
init_completion(&fs_info->qgroup_rescan_completion);
+ fs_info->qgroup_rescan_running = true;
spin_unlock(&fs_info->qgroup_lock);
mutex_unlock(&fs_info->qgroup_rescan_lock);
@@ -2467,20 +2471,26 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
return 0;
}
-int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info)
+int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
+ bool interruptible)
{
int running;
int ret = 0;
mutex_lock(&fs_info->qgroup_rescan_lock);
spin_lock(&fs_info->qgroup_lock);
- running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN;
+ running = fs_info->qgroup_rescan_running;
spin_unlock(&fs_info->qgroup_lock);
mutex_unlock(&fs_info->qgroup_rescan_lock);
- if (running)
+ if (!running)
+ return 0;
+
+ if (interruptible)
ret = wait_for_completion_interruptible(
&fs_info->qgroup_rescan_completion);
+ else
+ wait_for_completion(&fs_info->qgroup_rescan_completion);
return ret;
}
diff --git a/kernel/fs/btrfs/qgroup.h b/kernel/fs/btrfs/qgroup.h
index ecb2c143e..3d73e4c9c 100644
--- a/kernel/fs/btrfs/qgroup.h
+++ b/kernel/fs/btrfs/qgroup.h
@@ -46,7 +46,8 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
-int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info);
+int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
+ bool interruptible);
int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst);
int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
diff --git a/kernel/fs/btrfs/relocation.c b/kernel/fs/btrfs/relocation.c
index b4ca5454e..8ca9aa929 100644
--- a/kernel/fs/btrfs/relocation.c
+++ b/kernel/fs/btrfs/relocation.c
@@ -921,9 +921,16 @@ again:
path2->slots[level]--;
eb = path2->nodes[level];
- WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) !=
- cur->bytenr);
-
+ if (btrfs_node_blockptr(eb, path2->slots[level]) !=
+ cur->bytenr) {
+ btrfs_err(root->fs_info,
+ "couldn't find block (%llu) (level %d) in tree (%llu) with key (%llu %u %llu)",
+ cur->bytenr, level - 1, root->objectid,
+ node_key->objectid, node_key->type,
+ node_key->offset);
+ err = -ENOENT;
+ goto out;
+ }
lower = cur;
need_check = true;
for (; level < BTRFS_MAX_LEVEL; level++) {
@@ -2343,6 +2350,10 @@ void free_reloc_roots(struct list_head *list)
while (!list_empty(list)) {
reloc_root = list_entry(list->next, struct btrfs_root,
root_list);
+ free_extent_buffer(reloc_root->node);
+ free_extent_buffer(reloc_root->commit_root);
+ reloc_root->node = NULL;
+ reloc_root->commit_root = NULL;
__del_reloc_root(reloc_root);
}
}
@@ -2676,11 +2687,15 @@ static int do_relocation(struct btrfs_trans_handle *trans,
if (!upper->eb) {
ret = btrfs_search_slot(trans, root, key, path, 0, 1);
- if (ret < 0) {
- err = ret;
+ if (ret) {
+ if (ret < 0)
+ err = ret;
+ else
+ err = -ENOENT;
+
+ btrfs_release_path(path);
break;
}
- BUG_ON(ret > 0);
if (!upper->eb) {
upper->eb = path->nodes[upper->level];
diff --git a/kernel/fs/btrfs/super.c b/kernel/fs/btrfs/super.c
index fe609b81d..5d34a062c 100644
--- a/kernel/fs/btrfs/super.c
+++ b/kernel/fs/btrfs/super.c
@@ -239,7 +239,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
trans->aborted = errno;
/* Nothing used. The other threads that have joined this
* transaction may be able to continue. */
- if (!trans->blocks_used && list_empty(&trans->new_bgs)) {
+ if (!trans->dirty && list_empty(&trans->new_bgs)) {
const char *errstr;
errstr = btrfs_decode_error(errno);
diff --git a/kernel/fs/btrfs/transaction.h b/kernel/fs/btrfs/transaction.h
index 64c8221b6..1e872923e 100644
--- a/kernel/fs/btrfs/transaction.h
+++ b/kernel/fs/btrfs/transaction.h
@@ -110,7 +110,6 @@ struct btrfs_trans_handle {
u64 chunk_bytes_reserved;
unsigned long use_count;
unsigned long blocks_reserved;
- unsigned long blocks_used;
unsigned long delayed_ref_updates;
struct btrfs_transaction *transaction;
struct btrfs_block_rsv *block_rsv;
@@ -121,6 +120,7 @@ struct btrfs_trans_handle {
bool can_flush_pending_bgs;
bool reloc_reserved;
bool sync;
+ bool dirty;
unsigned int type;
/*
* this root is only needed to validate that the root passed to
diff --git a/kernel/fs/btrfs/tree-log.c b/kernel/fs/btrfs/tree-log.c
index 323e12cc9..ee7832e2d 100644
--- a/kernel/fs/btrfs/tree-log.c
+++ b/kernel/fs/btrfs/tree-log.c
@@ -1923,12 +1923,11 @@ static noinline int find_dir_range(struct btrfs_root *root,
next:
/* check the next slot in the tree to see if it is a valid item */
nritems = btrfs_header_nritems(path->nodes[0]);
+ path->slots[0]++;
if (path->slots[0] >= nritems) {
ret = btrfs_next_leaf(root, path);
if (ret)
goto out;
- } else {
- path->slots[0]++;
}
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
@@ -2696,14 +2695,12 @@ static inline void btrfs_remove_all_log_ctxs(struct btrfs_root *root,
int index, int error)
{
struct btrfs_log_ctx *ctx;
+ struct btrfs_log_ctx *safe;
- if (!error) {
- INIT_LIST_HEAD(&root->log_ctxs[index]);
- return;
- }
-
- list_for_each_entry(ctx, &root->log_ctxs[index], list)
+ list_for_each_entry_safe(ctx, safe, &root->log_ctxs[index], list) {
+ list_del_init(&ctx->list);
ctx->log_ret = error;
+ }
INIT_LIST_HEAD(&root->log_ctxs[index]);
}
@@ -2850,6 +2847,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) {
blk_finish_plug(&plug);
+ list_del_init(&root_log_ctx.list);
mutex_unlock(&log_root_tree->log_mutex);
ret = root_log_ctx.log_ret;
goto out;
@@ -2943,13 +2941,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
mutex_unlock(&root->log_mutex);
out_wake_log_root:
- /*
- * We needn't get log_mutex here because we are sure all
- * the other tasks are blocked.
- */
+ mutex_lock(&log_root_tree->log_mutex);
btrfs_remove_all_log_ctxs(log_root_tree, index2, ret);
- mutex_lock(&log_root_tree->log_mutex);
log_root_tree->log_transid_committed++;
atomic_set(&log_root_tree->log_commit[index2], 0);
mutex_unlock(&log_root_tree->log_mutex);
@@ -2960,10 +2954,8 @@ out_wake_log_root:
if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
wake_up(&log_root_tree->log_commit_wait[index2]);
out:
- /* See above. */
- btrfs_remove_all_log_ctxs(root, index1, ret);
-
mutex_lock(&root->log_mutex);
+ btrfs_remove_all_log_ctxs(root, index1, ret);
root->log_transid_committed++;
atomic_set(&root->log_commit[index1], 0);
mutex_unlock(&root->log_mutex);
@@ -4406,6 +4398,127 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
return ret;
}
+/*
+ * When we are logging a new inode X, check if it doesn't have a reference that
+ * matches the reference from some other inode Y created in a past transaction
+ * and that was renamed in the current transaction. If we don't do this, then at
+ * log replay time we can lose inode Y (and all its files if it's a directory):
+ *
+ * mkdir /mnt/x
+ * echo "hello world" > /mnt/x/foobar
+ * sync
+ * mv /mnt/x /mnt/y
+ * mkdir /mnt/x # or touch /mnt/x
+ * xfs_io -c fsync /mnt/x
+ * <power fail>
+ * mount fs, trigger log replay
+ *
+ * After the log replay procedure, we would lose the first directory and all its
+ * files (file foobar).
+ * For the case where inode Y is not a directory we simply end up losing it:
+ *
+ * echo "123" > /mnt/foo
+ * sync
+ * mv /mnt/foo /mnt/bar
+ * echo "abc" > /mnt/foo
+ * xfs_io -c fsync /mnt/foo
+ * <power fail>
+ *
+ * We also need this for cases where a snapshot entry is replaced by some other
+ * entry (file or directory) otherwise we end up with an unreplayable log due to
+ * attempts to delete the snapshot entry (entry of type BTRFS_ROOT_ITEM_KEY) as
+ * if it were a regular entry:
+ *
+ * mkdir /mnt/x
+ * btrfs subvolume snapshot /mnt /mnt/x/snap
+ * btrfs subvolume delete /mnt/x/snap
+ * rmdir /mnt/x
+ * mkdir /mnt/x
+ * fsync /mnt/x or fsync some new file inside it
+ * <power fail>
+ *
+ * The snapshot delete, rmdir of x, mkdir of a new x and the fsync all happen in
+ * the same transaction.
+ */
+static int btrfs_check_ref_name_override(struct extent_buffer *eb,
+ const int slot,
+ const struct btrfs_key *key,
+ struct inode *inode)
+{
+ int ret;
+ struct btrfs_path *search_path;
+ char *name = NULL;
+ u32 name_len = 0;
+ u32 item_size = btrfs_item_size_nr(eb, slot);
+ u32 cur_offset = 0;
+ unsigned long ptr = btrfs_item_ptr_offset(eb, slot);
+
+ search_path = btrfs_alloc_path();
+ if (!search_path)
+ return -ENOMEM;
+ search_path->search_commit_root = 1;
+ search_path->skip_locking = 1;
+
+ while (cur_offset < item_size) {
+ u64 parent;
+ u32 this_name_len;
+ u32 this_len;
+ unsigned long name_ptr;
+ struct btrfs_dir_item *di;
+
+ if (key->type == BTRFS_INODE_REF_KEY) {
+ struct btrfs_inode_ref *iref;
+
+ iref = (struct btrfs_inode_ref *)(ptr + cur_offset);
+ parent = key->offset;
+ this_name_len = btrfs_inode_ref_name_len(eb, iref);
+ name_ptr = (unsigned long)(iref + 1);
+ this_len = sizeof(*iref) + this_name_len;
+ } else {
+ struct btrfs_inode_extref *extref;
+
+ extref = (struct btrfs_inode_extref *)(ptr +
+ cur_offset);
+ parent = btrfs_inode_extref_parent(eb, extref);
+ this_name_len = btrfs_inode_extref_name_len(eb, extref);
+ name_ptr = (unsigned long)&extref->name;
+ this_len = sizeof(*extref) + this_name_len;
+ }
+
+ if (this_name_len > name_len) {
+ char *new_name;
+
+ new_name = krealloc(name, this_name_len, GFP_NOFS);
+ if (!new_name) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ name_len = this_name_len;
+ name = new_name;
+ }
+
+ read_extent_buffer(eb, name, name_ptr, this_name_len);
+ di = btrfs_lookup_dir_item(NULL, BTRFS_I(inode)->root,
+ search_path, parent,
+ name, this_name_len, 0);
+ if (di && !IS_ERR(di)) {
+ ret = 1;
+ goto out;
+ } else if (IS_ERR(di)) {
+ ret = PTR_ERR(di);
+ goto out;
+ }
+ btrfs_release_path(search_path);
+
+ cur_offset += this_len;
+ }
+ ret = 0;
+out:
+ btrfs_free_path(search_path);
+ kfree(name);
+ return ret;
+}
+
/* log a single inode in the tree log.
* At least one parent directory for this inode must exist in the tree
* or be logged already.
@@ -4578,6 +4691,22 @@ again:
if (min_key.type == BTRFS_INODE_ITEM_KEY)
need_log_inode_item = false;
+ if ((min_key.type == BTRFS_INODE_REF_KEY ||
+ min_key.type == BTRFS_INODE_EXTREF_KEY) &&
+ BTRFS_I(inode)->generation == trans->transid) {
+ ret = btrfs_check_ref_name_override(path->nodes[0],
+ path->slots[0],
+ &min_key, inode);
+ if (ret < 0) {
+ err = ret;
+ goto out_unlock;
+ } else if (ret > 0) {
+ err = 1;
+ btrfs_set_log_full_commit(root->fs_info, trans);
+ goto out_unlock;
+ }
+ }
+
/* Skip xattrs, we log them later with btrfs_log_all_xattrs() */
if (min_key.type == BTRFS_XATTR_ITEM_KEY) {
if (ins_nr == 0)