summaryrefslogtreecommitdiffstats
path: root/kernel/fs/btrfs/transaction.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fs/btrfs/transaction.c')
-rw-r--r--kernel/fs/btrfs/transaction.c283
1 files changed, 167 insertions, 116 deletions
diff --git a/kernel/fs/btrfs/transaction.c b/kernel/fs/btrfs/transaction.c
index 00d18c2bd..be8eae80f 100644
--- a/kernel/fs/btrfs/transaction.c
+++ b/kernel/fs/btrfs/transaction.c
@@ -82,6 +82,12 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
static void clear_btree_io_tree(struct extent_io_tree *tree)
{
spin_lock(&tree->lock);
+ /*
+ * Do a single barrier for the waitqueue_active check here, the state
+ * of the waitqueue should not change once clear_btree_io_tree is
+ * called.
+ */
+ smp_mb();
while (!RB_EMPTY_ROOT(&tree->state)) {
struct rb_node *node;
struct extent_state *state;
@@ -117,6 +123,18 @@ static noinline void switch_commit_roots(struct btrfs_transaction *trans,
btrfs_unpin_free_ino(root);
clear_btree_io_tree(&root->dirty_log_pages);
}
+
+ /* We can free old roots now. */
+ spin_lock(&trans->dropped_roots_lock);
+ while (!list_empty(&trans->dropped_roots)) {
+ root = list_first_entry(&trans->dropped_roots,
+ struct btrfs_root, root_list);
+ list_del_init(&root->root_list);
+ spin_unlock(&trans->dropped_roots_lock);
+ btrfs_drop_and_free_fs_root(fs_info, root);
+ spin_lock(&trans->dropped_roots_lock);
+ }
+ spin_unlock(&trans->dropped_roots_lock);
up_write(&fs_info->commit_root_sem);
}
@@ -214,23 +232,22 @@ loop:
extwriter_counter_init(cur_trans, type);
init_waitqueue_head(&cur_trans->writer_wait);
init_waitqueue_head(&cur_trans->commit_wait);
+ init_waitqueue_head(&cur_trans->pending_wait);
cur_trans->state = TRANS_STATE_RUNNING;
/*
* One for this trans handle, one so it will live on until we
* commit the transaction.
*/
atomic_set(&cur_trans->use_count, 2);
- cur_trans->have_free_bgs = 0;
+ atomic_set(&cur_trans->pending_ordered, 0);
+ cur_trans->flags = 0;
cur_trans->start_time = get_seconds();
- cur_trans->dirty_bg_run = 0;
+
+ memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs));
cur_trans->delayed_refs.href_root = RB_ROOT;
+ cur_trans->delayed_refs.dirty_extent_root = RB_ROOT;
atomic_set(&cur_trans->delayed_refs.num_entries, 0);
- cur_trans->delayed_refs.num_heads_ready = 0;
- cur_trans->delayed_refs.pending_csums = 0;
- cur_trans->delayed_refs.num_heads = 0;
- cur_trans->delayed_refs.flushing = 0;
- cur_trans->delayed_refs.run_delayed_start = 0;
/*
* although the tree mod log is per file system and not per transaction,
@@ -250,12 +267,14 @@ loop:
INIT_LIST_HEAD(&cur_trans->pending_snapshots);
INIT_LIST_HEAD(&cur_trans->pending_chunks);
INIT_LIST_HEAD(&cur_trans->switch_commits);
- INIT_LIST_HEAD(&cur_trans->pending_ordered);
INIT_LIST_HEAD(&cur_trans->dirty_bgs);
INIT_LIST_HEAD(&cur_trans->io_bgs);
+ INIT_LIST_HEAD(&cur_trans->dropped_roots);
mutex_init(&cur_trans->cache_write_mutex);
cur_trans->num_dirty_bgs = 0;
spin_lock_init(&cur_trans->dirty_bgs_lock);
+ INIT_LIST_HEAD(&cur_trans->deleted_bgs);
+ spin_lock_init(&cur_trans->dropped_roots_lock);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
extent_io_tree_init(&cur_trans->dirty_pages,
fs_info->btree_inode->i_mapping);
@@ -332,6 +351,24 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans,
}
+void btrfs_add_dropped_root(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ struct btrfs_transaction *cur_trans = trans->transaction;
+
+ /* Add ourselves to the transaction dropped list */
+ spin_lock(&cur_trans->dropped_roots_lock);
+ list_add_tail(&root->root_list, &cur_trans->dropped_roots);
+ spin_unlock(&cur_trans->dropped_roots_lock);
+
+ /* Make sure we don't try to update the root at commit time */
+ spin_lock(&root->fs_info->fs_roots_radix_lock);
+ radix_tree_tag_clear(&root->fs_info->fs_roots_radix,
+ (unsigned long)root->root_key.objectid,
+ BTRFS_ROOT_TRANS_TAG);
+ spin_unlock(&root->fs_info->fs_roots_radix_lock);
+}
+
int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
@@ -411,8 +448,8 @@ static inline bool need_reserve_reloc_root(struct btrfs_root *root)
}
static struct btrfs_trans_handle *
-start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
- enum btrfs_reserve_flush_enum flush)
+start_transaction(struct btrfs_root *root, unsigned int num_items,
+ unsigned int type, enum btrfs_reserve_flush_enum flush)
{
struct btrfs_trans_handle *h;
struct btrfs_transaction *cur_trans;
@@ -442,13 +479,10 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
* the appropriate flushing if need be.
*/
if (num_items > 0 && root != root->fs_info->chunk_root) {
- if (root->fs_info->quota_enabled &&
- is_fstree(root->root_key.objectid)) {
- qgroup_reserved = num_items * root->nodesize;
- ret = btrfs_qgroup_reserve(root, qgroup_reserved);
- if (ret)
- return ERR_PTR(ret);
- }
+ qgroup_reserved = num_items * root->nodesize;
+ ret = btrfs_qgroup_reserve_meta(root, qgroup_reserved);
+ if (ret)
+ return ERR_PTR(ret);
num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
/*
@@ -466,7 +500,7 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
goto reserve_fail;
}
again:
- h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
+ h = kmem_cache_zalloc(btrfs_trans_handle_cachep, GFP_NOFS);
if (!h) {
ret = -ENOMEM;
goto alloc_fail;
@@ -507,24 +541,13 @@ again:
h->transid = cur_trans->transid;
h->transaction = cur_trans;
- h->blocks_used = 0;
- h->bytes_reserved = 0;
h->root = root;
- h->delayed_ref_updates = 0;
h->use_count = 1;
- h->adding_csums = 0;
- h->block_rsv = NULL;
- h->orig_rsv = NULL;
- h->aborted = 0;
- h->qgroup_reserved = 0;
- h->delayed_ref_elem.seq = 0;
+
h->type = type;
- h->allocating_chunk = false;
- h->reloc_reserved = false;
- h->sync = false;
+ h->can_flush_pending_bgs = true;
INIT_LIST_HEAD(&h->qgroup_ref_list);
INIT_LIST_HEAD(&h->new_bgs);
- INIT_LIST_HEAD(&h->ordered);
smp_mb();
if (cur_trans->state >= TRANS_STATE_BLOCKED &&
@@ -541,7 +564,6 @@ again:
h->bytes_reserved = num_bytes;
h->reloc_reserved = reloc_reserved;
}
- h->qgroup_reserved = qgroup_reserved;
got_it:
btrfs_record_root_in_trans(h, root);
@@ -559,20 +581,52 @@ alloc_fail:
btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
num_bytes);
reserve_fail:
- if (qgroup_reserved)
- btrfs_qgroup_free(root, qgroup_reserved);
+ btrfs_qgroup_free_meta(root, qgroup_reserved);
return ERR_PTR(ret);
}
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
- int num_items)
+ unsigned int num_items)
{
return start_transaction(root, num_items, TRANS_START,
BTRFS_RESERVE_FLUSH_ALL);
}
+struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
+ struct btrfs_root *root,
+ unsigned int num_items,
+ int min_factor)
+{
+ struct btrfs_trans_handle *trans;
+ u64 num_bytes;
+ int ret;
+
+ trans = btrfs_start_transaction(root, num_items);
+ if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
+ return trans;
+
+ trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans))
+ return trans;
+
+ num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
+ ret = btrfs_cond_migrate_bytes(root->fs_info,
+ &root->fs_info->trans_block_rsv,
+ num_bytes,
+ min_factor);
+ if (ret) {
+ btrfs_end_transaction(trans, root);
+ return ERR_PTR(ret);
+ }
+
+ trans->block_rsv = &root->fs_info->trans_block_rsv;
+ trans->bytes_reserved = num_bytes;
+
+ return trans;
+}
struct btrfs_trans_handle *btrfs_start_transaction_lflush(
- struct btrfs_root *root, int num_items)
+ struct btrfs_root *root,
+ unsigned int num_items)
{
return start_transaction(root, num_items, TRANS_START,
BTRFS_RESERVE_FLUSH_LIMIT);
@@ -756,12 +810,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
if (!list_empty(&trans->new_bgs))
btrfs_create_pending_block_groups(trans, root);
- if (!list_empty(&trans->ordered)) {
- spin_lock(&info->trans_lock);
- list_splice_init(&trans->ordered, &cur_trans->pending_ordered);
- spin_unlock(&info->trans_lock);
- }
-
trans->delayed_ref_updates = 0;
if (!trans->sync) {
must_run_delayed_refs =
@@ -777,21 +825,14 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
must_run_delayed_refs = 2;
}
- if (trans->qgroup_reserved) {
- /*
- * the same root has to be passed here between start_transaction
- * and end_transaction. Subvolume quota depends on this.
- */
- btrfs_qgroup_free(trans->root, trans->qgroup_reserved);
- trans->qgroup_reserved = 0;
- }
-
btrfs_trans_release_metadata(trans, root);
trans->block_rsv = NULL;
if (!list_empty(&trans->new_bgs))
btrfs_create_pending_block_groups(trans, root);
+ btrfs_trans_release_chunk_metadata(trans);
+
if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
should_end_transaction(trans, root) &&
ACCESS_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
@@ -816,6 +857,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
atomic_dec(&cur_trans->num_writers);
extwriter_counter_dec(cur_trans, trans->type);
+ /*
+ * Make sure counter is updated before we wake up waiters.
+ */
smp_mb();
if (waitqueue_active(&cur_trans->writer_wait))
wake_up(&cur_trans->writer_wait);
@@ -1198,6 +1242,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
spin_lock(&fs_info->fs_roots_radix_lock);
if (err)
break;
+ btrfs_qgroup_free_meta_all(root);
}
}
spin_unlock(&fs_info->fs_roots_radix_lock);
@@ -1290,7 +1335,13 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
if (pending->error)
goto no_free_objectid;
- btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);
+ /*
+ * Make qgroup to skip current new snapshot's qgroupid, as it is
+ * accounted by later btrfs_qgroup_inherit().
+ */
+ btrfs_set_skip_qgroup(trans, objectid);
+
+ btrfs_reloc_pre_snapshot(pending, &to_reserve);
if (to_reserve > 0) {
pending->error = btrfs_block_rsv_add(root,
@@ -1298,7 +1349,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
to_reserve,
BTRFS_RESERVE_NO_FLUSH);
if (pending->error)
- goto no_free_objectid;
+ goto clear_skip_qgroup;
}
key.objectid = objectid;
@@ -1396,25 +1447,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
btrfs_abort_transaction(trans, root, ret);
goto fail;
}
-
- /*
- * We need to flush delayed refs in order to make sure all of our quota
- * operations have been done before we call btrfs_qgroup_inherit.
- */
- ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- goto fail;
- }
-
- ret = btrfs_qgroup_inherit(trans, fs_info,
- root->root_key.objectid,
- objectid, pending->inherit);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- goto fail;
- }
-
/* see comments in should_cow_block() */
set_bit(BTRFS_ROOT_FORCE_COW, &root->state);
smp_wmb();
@@ -1497,11 +1529,37 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
goto fail;
}
}
+
+ ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto fail;
+ }
+
+ /*
+ * account qgroup counters before qgroup_inherit()
+ */
+ ret = btrfs_qgroup_prepare_account_extents(trans, fs_info);
+ if (ret)
+ goto fail;
+ ret = btrfs_qgroup_account_extents(trans, fs_info);
+ if (ret)
+ goto fail;
+ ret = btrfs_qgroup_inherit(trans, fs_info,
+ root->root_key.objectid,
+ objectid, pending->inherit);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto fail;
+ }
+
fail:
pending->error = ret;
dir_item_existed:
trans->block_rsv = rsv;
trans->bytes_reserved = 0;
+clear_skip_qgroup:
+ btrfs_clear_skip_qgroup(trans);
no_free_objectid:
kfree(new_root_item);
root_item_alloc_fail:
@@ -1620,9 +1678,7 @@ static void do_async_commit(struct work_struct *work)
* Tell lockdep about it.
*/
if (ac->newtrans->type & __TRANS_FREEZABLE)
- rwsem_acquire_read(
- &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
- 0, 1, _THIS_IP_);
+ __sb_writers_acquired(ac->root->fs_info->sb, SB_FREEZE_FS);
current->journal_info = ac->newtrans;
@@ -1661,9 +1717,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
* async commit thread will be the one to unlock it.
*/
if (ac->newtrans->type & __TRANS_FREEZABLE)
- rwsem_release(
- &root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
- 1, _THIS_IP_);
+ __sb_writers_release(root->fs_info->sb, SB_FREEZE_FS);
schedule_work(&ac->work);
@@ -1746,25 +1800,10 @@ static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
}
static inline void
-btrfs_wait_pending_ordered(struct btrfs_transaction *cur_trans,
- struct btrfs_fs_info *fs_info)
+btrfs_wait_pending_ordered(struct btrfs_transaction *cur_trans)
{
- struct btrfs_ordered_extent *ordered;
-
- spin_lock(&fs_info->trans_lock);
- while (!list_empty(&cur_trans->pending_ordered)) {
- ordered = list_first_entry(&cur_trans->pending_ordered,
- struct btrfs_ordered_extent,
- trans_list);
- list_del_init(&ordered->trans_list);
- spin_unlock(&fs_info->trans_lock);
-
- wait_event(ordered->wait, test_bit(BTRFS_ORDERED_COMPLETE,
- &ordered->flags));
- btrfs_put_ordered_extent(ordered);
- spin_lock(&fs_info->trans_lock);
- }
- spin_unlock(&fs_info->trans_lock);
+ wait_event(cur_trans->pending_wait,
+ atomic_read(&cur_trans->pending_ordered) == 0);
}
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
@@ -1793,10 +1832,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
btrfs_trans_release_metadata(trans, root);
trans->block_rsv = NULL;
- if (trans->qgroup_reserved) {
- btrfs_qgroup_free(root, trans->qgroup_reserved);
- trans->qgroup_reserved = 0;
- }
cur_trans = trans->transaction;
@@ -1816,7 +1851,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
return ret;
}
- if (!cur_trans->dirty_bg_run) {
+ if (!test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &cur_trans->flags)) {
int run_it = 0;
/* this mutex is also taken before trying to set
@@ -1825,18 +1860,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
* after a extents from that block group have been
* allocated for cache files. btrfs_set_block_group_ro
* will wait for the transaction to commit if it
- * finds dirty_bg_run = 1
+ * finds BTRFS_TRANS_DIRTY_BG_RUN set.
*
- * The dirty_bg_run flag is also used to make sure only
- * one process starts all the block group IO. It wouldn't
+ * The BTRFS_TRANS_DIRTY_BG_RUN flag is also used to make sure
+ * only one process starts all the block group IO. It wouldn't
* hurt to have more than one go through, but there's no
* real advantage to it either.
*/
mutex_lock(&root->fs_info->ro_block_group_mutex);
- if (!cur_trans->dirty_bg_run) {
+ if (!test_and_set_bit(BTRFS_TRANS_DIRTY_BG_RUN,
+ &cur_trans->flags))
run_it = 1;
- cur_trans->dirty_bg_run = 1;
- }
mutex_unlock(&root->fs_info->ro_block_group_mutex);
if (run_it)
@@ -1848,7 +1882,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
}
spin_lock(&root->fs_info->trans_lock);
- list_splice_init(&trans->ordered, &cur_trans->pending_ordered);
if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
spin_unlock(&root->fs_info->trans_lock);
atomic_inc(&cur_trans->use_count);
@@ -1907,7 +1940,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
btrfs_wait_delalloc_flush(root->fs_info);
- btrfs_wait_pending_ordered(cur_trans, root->fs_info);
+ btrfs_wait_pending_ordered(cur_trans);
btrfs_scrub_pause(root);
/*
@@ -1966,6 +1999,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
goto scrub_continue;
}
+ /* Reocrd old roots for later qgroup accounting */
+ ret = btrfs_qgroup_prepare_account_extents(trans, root->fs_info);
+ if (ret) {
+ mutex_unlock(&root->fs_info->reloc_mutex);
+ goto scrub_continue;
+ }
+
/*
* make sure none of the code above managed to slip in a
* delayed item
@@ -2007,6 +2047,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
*/
btrfs_free_log_root_tree(trans, root->fs_info);
+ /*
+ * Since fs roots are all committed, we can get a quite accurate
+ * new_roots. So let's do quota accounting.
+ */
+ ret = btrfs_qgroup_account_extents(trans, root->fs_info);
+ if (ret < 0) {
+ mutex_unlock(&root->fs_info->tree_log_mutex);
+ mutex_unlock(&root->fs_info->reloc_mutex);
+ goto scrub_continue;
+ }
+
ret = commit_cowonly_roots(trans, root);
if (ret) {
mutex_unlock(&root->fs_info->tree_log_mutex);
@@ -2057,6 +2108,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
clear_bit(BTRFS_INODE_BTREE_LOG1_ERR, &btree_ino->runtime_flags);
clear_bit(BTRFS_INODE_BTREE_LOG2_ERR, &btree_ino->runtime_flags);
+ btrfs_trans_release_chunk_metadata(trans);
+
spin_lock(&root->fs_info->trans_lock);
cur_trans->state = TRANS_STATE_UNBLOCKED;
root->fs_info->running_transaction = NULL;
@@ -2067,7 +2120,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
ret = btrfs_write_and_wait_transaction(trans, root);
if (ret) {
- btrfs_error(root->fs_info, ret,
+ btrfs_std_error(root->fs_info, ret,
"Error while writing out transaction");
mutex_unlock(&root->fs_info->tree_log_mutex);
goto scrub_continue;
@@ -2087,7 +2140,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
btrfs_finish_extent_commit(trans, root);
- if (cur_trans->have_free_bgs)
+ if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &cur_trans->flags))
btrfs_clear_space_info_full(root->fs_info);
root->fs_info->last_trans_committed = cur_trans->transid;
@@ -2117,7 +2170,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
kmem_cache_free(btrfs_trans_handle_cachep, trans);
- if (current != root->fs_info->transaction_kthread)
+ if (current != root->fs_info->transaction_kthread &&
+ current != root->fs_info->cleaner_kthread)
btrfs_run_delayed_iputs(root);
return ret;
@@ -2126,11 +2180,8 @@ scrub_continue:
btrfs_scrub_continue(root);
cleanup_transaction:
btrfs_trans_release_metadata(trans, root);
+ btrfs_trans_release_chunk_metadata(trans);
trans->block_rsv = NULL;
- if (trans->qgroup_reserved) {
- btrfs_qgroup_free(root, trans->qgroup_reserved);
- trans->qgroup_reserved = 0;
- }
btrfs_warn(root->fs_info, "Skipping commit of aborted transaction.");
if (current->journal_info == trans)
current->journal_info = NULL;