diff options
Diffstat (limited to 'kernel/fs/btrfs/transaction.c')
-rw-r--r-- | kernel/fs/btrfs/transaction.c | 283 |
1 files changed, 167 insertions, 116 deletions
diff --git a/kernel/fs/btrfs/transaction.c b/kernel/fs/btrfs/transaction.c index 00d18c2bd..be8eae80f 100644 --- a/kernel/fs/btrfs/transaction.c +++ b/kernel/fs/btrfs/transaction.c @@ -82,6 +82,12 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction) static void clear_btree_io_tree(struct extent_io_tree *tree) { spin_lock(&tree->lock); + /* + * Do a single barrier for the waitqueue_active check here, the state + * of the waitqueue should not change once clear_btree_io_tree is + * called. + */ + smp_mb(); while (!RB_EMPTY_ROOT(&tree->state)) { struct rb_node *node; struct extent_state *state; @@ -117,6 +123,18 @@ static noinline void switch_commit_roots(struct btrfs_transaction *trans, btrfs_unpin_free_ino(root); clear_btree_io_tree(&root->dirty_log_pages); } + + /* We can free old roots now. */ + spin_lock(&trans->dropped_roots_lock); + while (!list_empty(&trans->dropped_roots)) { + root = list_first_entry(&trans->dropped_roots, + struct btrfs_root, root_list); + list_del_init(&root->root_list); + spin_unlock(&trans->dropped_roots_lock); + btrfs_drop_and_free_fs_root(fs_info, root); + spin_lock(&trans->dropped_roots_lock); + } + spin_unlock(&trans->dropped_roots_lock); up_write(&fs_info->commit_root_sem); } @@ -214,23 +232,22 @@ loop: extwriter_counter_init(cur_trans, type); init_waitqueue_head(&cur_trans->writer_wait); init_waitqueue_head(&cur_trans->commit_wait); + init_waitqueue_head(&cur_trans->pending_wait); cur_trans->state = TRANS_STATE_RUNNING; /* * One for this trans handle, one so it will live on until we * commit the transaction. */ atomic_set(&cur_trans->use_count, 2); - cur_trans->have_free_bgs = 0; + atomic_set(&cur_trans->pending_ordered, 0); + cur_trans->flags = 0; cur_trans->start_time = get_seconds(); - cur_trans->dirty_bg_run = 0; + + memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs)); cur_trans->delayed_refs.href_root = RB_ROOT; + cur_trans->delayed_refs.dirty_extent_root = RB_ROOT; atomic_set(&cur_trans->delayed_refs.num_entries, 0); - cur_trans->delayed_refs.num_heads_ready = 0; - cur_trans->delayed_refs.pending_csums = 0; - cur_trans->delayed_refs.num_heads = 0; - cur_trans->delayed_refs.flushing = 0; - cur_trans->delayed_refs.run_delayed_start = 0; /* * although the tree mod log is per file system and not per transaction, @@ -250,12 +267,14 @@ loop: INIT_LIST_HEAD(&cur_trans->pending_snapshots); INIT_LIST_HEAD(&cur_trans->pending_chunks); INIT_LIST_HEAD(&cur_trans->switch_commits); - INIT_LIST_HEAD(&cur_trans->pending_ordered); INIT_LIST_HEAD(&cur_trans->dirty_bgs); INIT_LIST_HEAD(&cur_trans->io_bgs); + INIT_LIST_HEAD(&cur_trans->dropped_roots); mutex_init(&cur_trans->cache_write_mutex); cur_trans->num_dirty_bgs = 0; spin_lock_init(&cur_trans->dirty_bgs_lock); + INIT_LIST_HEAD(&cur_trans->deleted_bgs); + spin_lock_init(&cur_trans->dropped_roots_lock); list_add_tail(&cur_trans->list, &fs_info->trans_list); extent_io_tree_init(&cur_trans->dirty_pages, fs_info->btree_inode->i_mapping); @@ -332,6 +351,24 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans, } +void btrfs_add_dropped_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_transaction *cur_trans = trans->transaction; + + /* Add ourselves to the transaction dropped list */ + spin_lock(&cur_trans->dropped_roots_lock); + list_add_tail(&root->root_list, &cur_trans->dropped_roots); + spin_unlock(&cur_trans->dropped_roots_lock); + + /* Make sure we don't try to update the root at commit time */ + spin_lock(&root->fs_info->fs_roots_radix_lock); + radix_tree_tag_clear(&root->fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_TRANS_TAG); + spin_unlock(&root->fs_info->fs_roots_radix_lock); +} + int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -411,8 +448,8 @@ static inline bool need_reserve_reloc_root(struct btrfs_root *root) } static struct btrfs_trans_handle * -start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, - enum btrfs_reserve_flush_enum flush) +start_transaction(struct btrfs_root *root, unsigned int num_items, + unsigned int type, enum btrfs_reserve_flush_enum flush) { struct btrfs_trans_handle *h; struct btrfs_transaction *cur_trans; @@ -442,13 +479,10 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, * the appropriate flushing if need be. */ if (num_items > 0 && root != root->fs_info->chunk_root) { - if (root->fs_info->quota_enabled && - is_fstree(root->root_key.objectid)) { - qgroup_reserved = num_items * root->nodesize; - ret = btrfs_qgroup_reserve(root, qgroup_reserved); - if (ret) - return ERR_PTR(ret); - } + qgroup_reserved = num_items * root->nodesize; + ret = btrfs_qgroup_reserve_meta(root, qgroup_reserved); + if (ret) + return ERR_PTR(ret); num_bytes = btrfs_calc_trans_metadata_size(root, num_items); /* @@ -466,7 +500,7 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, goto reserve_fail; } again: - h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); + h = kmem_cache_zalloc(btrfs_trans_handle_cachep, GFP_NOFS); if (!h) { ret = -ENOMEM; goto alloc_fail; @@ -507,24 +541,13 @@ again: h->transid = cur_trans->transid; h->transaction = cur_trans; - h->blocks_used = 0; - h->bytes_reserved = 0; h->root = root; - h->delayed_ref_updates = 0; h->use_count = 1; - h->adding_csums = 0; - h->block_rsv = NULL; - h->orig_rsv = NULL; - h->aborted = 0; - h->qgroup_reserved = 0; - h->delayed_ref_elem.seq = 0; + h->type = type; - h->allocating_chunk = false; - h->reloc_reserved = false; - h->sync = false; + h->can_flush_pending_bgs = true; INIT_LIST_HEAD(&h->qgroup_ref_list); INIT_LIST_HEAD(&h->new_bgs); - INIT_LIST_HEAD(&h->ordered); smp_mb(); if (cur_trans->state >= TRANS_STATE_BLOCKED && @@ -541,7 +564,6 @@ again: h->bytes_reserved = num_bytes; h->reloc_reserved = reloc_reserved; } - h->qgroup_reserved = qgroup_reserved; got_it: btrfs_record_root_in_trans(h, root); @@ -559,20 +581,52 @@ alloc_fail: btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv, num_bytes); reserve_fail: - if (qgroup_reserved) - btrfs_qgroup_free(root, qgroup_reserved); + btrfs_qgroup_free_meta(root, qgroup_reserved); return ERR_PTR(ret); } struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, - int num_items) + unsigned int num_items) { return start_transaction(root, num_items, TRANS_START, BTRFS_RESERVE_FLUSH_ALL); } +struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv( + struct btrfs_root *root, + unsigned int num_items, + int min_factor) +{ + struct btrfs_trans_handle *trans; + u64 num_bytes; + int ret; + + trans = btrfs_start_transaction(root, num_items); + if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) + return trans; + + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) + return trans; + + num_bytes = btrfs_calc_trans_metadata_size(root, num_items); + ret = btrfs_cond_migrate_bytes(root->fs_info, + &root->fs_info->trans_block_rsv, + num_bytes, + min_factor); + if (ret) { + btrfs_end_transaction(trans, root); + return ERR_PTR(ret); + } + + trans->block_rsv = &root->fs_info->trans_block_rsv; + trans->bytes_reserved = num_bytes; + + return trans; +} struct btrfs_trans_handle *btrfs_start_transaction_lflush( - struct btrfs_root *root, int num_items) + struct btrfs_root *root, + unsigned int num_items) { return start_transaction(root, num_items, TRANS_START, BTRFS_RESERVE_FLUSH_LIMIT); @@ -756,12 +810,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, if (!list_empty(&trans->new_bgs)) btrfs_create_pending_block_groups(trans, root); - if (!list_empty(&trans->ordered)) { - spin_lock(&info->trans_lock); - list_splice_init(&trans->ordered, &cur_trans->pending_ordered); - spin_unlock(&info->trans_lock); - } - trans->delayed_ref_updates = 0; if (!trans->sync) { must_run_delayed_refs = @@ -777,21 +825,14 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, must_run_delayed_refs = 2; } - if (trans->qgroup_reserved) { - /* - * the same root has to be passed here between start_transaction - * and end_transaction. Subvolume quota depends on this. - */ - btrfs_qgroup_free(trans->root, trans->qgroup_reserved); - trans->qgroup_reserved = 0; - } - btrfs_trans_release_metadata(trans, root); trans->block_rsv = NULL; if (!list_empty(&trans->new_bgs)) btrfs_create_pending_block_groups(trans, root); + btrfs_trans_release_chunk_metadata(trans); + if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && should_end_transaction(trans, root) && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) { @@ -816,6 +857,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, atomic_dec(&cur_trans->num_writers); extwriter_counter_dec(cur_trans, trans->type); + /* + * Make sure counter is updated before we wake up waiters. + */ smp_mb(); if (waitqueue_active(&cur_trans->writer_wait)) wake_up(&cur_trans->writer_wait); @@ -1198,6 +1242,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, spin_lock(&fs_info->fs_roots_radix_lock); if (err) break; + btrfs_qgroup_free_meta_all(root); } } spin_unlock(&fs_info->fs_roots_radix_lock); @@ -1290,7 +1335,13 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, if (pending->error) goto no_free_objectid; - btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); + /* + * Make qgroup to skip current new snapshot's qgroupid, as it is + * accounted by later btrfs_qgroup_inherit(). + */ + btrfs_set_skip_qgroup(trans, objectid); + + btrfs_reloc_pre_snapshot(pending, &to_reserve); if (to_reserve > 0) { pending->error = btrfs_block_rsv_add(root, @@ -1298,7 +1349,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, to_reserve, BTRFS_RESERVE_NO_FLUSH); if (pending->error) - goto no_free_objectid; + goto clear_skip_qgroup; } key.objectid = objectid; @@ -1396,25 +1447,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_abort_transaction(trans, root, ret); goto fail; } - - /* - * We need to flush delayed refs in order to make sure all of our quota - * operations have been done before we call btrfs_qgroup_inherit. - */ - ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); - if (ret) { - btrfs_abort_transaction(trans, root, ret); - goto fail; - } - - ret = btrfs_qgroup_inherit(trans, fs_info, - root->root_key.objectid, - objectid, pending->inherit); - if (ret) { - btrfs_abort_transaction(trans, root, ret); - goto fail; - } - /* see comments in should_cow_block() */ set_bit(BTRFS_ROOT_FORCE_COW, &root->state); smp_wmb(); @@ -1497,11 +1529,37 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, goto fail; } } + + ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); + if (ret) { + btrfs_abort_transaction(trans, root, ret); + goto fail; + } + + /* + * account qgroup counters before qgroup_inherit() + */ + ret = btrfs_qgroup_prepare_account_extents(trans, fs_info); + if (ret) + goto fail; + ret = btrfs_qgroup_account_extents(trans, fs_info); + if (ret) + goto fail; + ret = btrfs_qgroup_inherit(trans, fs_info, + root->root_key.objectid, + objectid, pending->inherit); + if (ret) { + btrfs_abort_transaction(trans, root, ret); + goto fail; + } + fail: pending->error = ret; dir_item_existed: trans->block_rsv = rsv; trans->bytes_reserved = 0; +clear_skip_qgroup: + btrfs_clear_skip_qgroup(trans); no_free_objectid: kfree(new_root_item); root_item_alloc_fail: @@ -1620,9 +1678,7 @@ static void do_async_commit(struct work_struct *work) * Tell lockdep about it. */ if (ac->newtrans->type & __TRANS_FREEZABLE) - rwsem_acquire_read( - &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], - 0, 1, _THIS_IP_); + __sb_writers_acquired(ac->root->fs_info->sb, SB_FREEZE_FS); current->journal_info = ac->newtrans; @@ -1661,9 +1717,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, * async commit thread will be the one to unlock it. */ if (ac->newtrans->type & __TRANS_FREEZABLE) - rwsem_release( - &root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], - 1, _THIS_IP_); + __sb_writers_release(root->fs_info->sb, SB_FREEZE_FS); schedule_work(&ac->work); @@ -1746,25 +1800,10 @@ static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) } static inline void -btrfs_wait_pending_ordered(struct btrfs_transaction *cur_trans, - struct btrfs_fs_info *fs_info) +btrfs_wait_pending_ordered(struct btrfs_transaction *cur_trans) { - struct btrfs_ordered_extent *ordered; - - spin_lock(&fs_info->trans_lock); - while (!list_empty(&cur_trans->pending_ordered)) { - ordered = list_first_entry(&cur_trans->pending_ordered, - struct btrfs_ordered_extent, - trans_list); - list_del_init(&ordered->trans_list); - spin_unlock(&fs_info->trans_lock); - - wait_event(ordered->wait, test_bit(BTRFS_ORDERED_COMPLETE, - &ordered->flags)); - btrfs_put_ordered_extent(ordered); - spin_lock(&fs_info->trans_lock); - } - spin_unlock(&fs_info->trans_lock); + wait_event(cur_trans->pending_wait, + atomic_read(&cur_trans->pending_ordered) == 0); } int btrfs_commit_transaction(struct btrfs_trans_handle *trans, @@ -1793,10 +1832,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_trans_release_metadata(trans, root); trans->block_rsv = NULL; - if (trans->qgroup_reserved) { - btrfs_qgroup_free(root, trans->qgroup_reserved); - trans->qgroup_reserved = 0; - } cur_trans = trans->transaction; @@ -1816,7 +1851,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, return ret; } - if (!cur_trans->dirty_bg_run) { + if (!test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &cur_trans->flags)) { int run_it = 0; /* this mutex is also taken before trying to set @@ -1825,18 +1860,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, * after a extents from that block group have been * allocated for cache files. btrfs_set_block_group_ro * will wait for the transaction to commit if it - * finds dirty_bg_run = 1 + * finds BTRFS_TRANS_DIRTY_BG_RUN set. * - * The dirty_bg_run flag is also used to make sure only - * one process starts all the block group IO. It wouldn't + * The BTRFS_TRANS_DIRTY_BG_RUN flag is also used to make sure + * only one process starts all the block group IO. It wouldn't * hurt to have more than one go through, but there's no * real advantage to it either. */ mutex_lock(&root->fs_info->ro_block_group_mutex); - if (!cur_trans->dirty_bg_run) { + if (!test_and_set_bit(BTRFS_TRANS_DIRTY_BG_RUN, + &cur_trans->flags)) run_it = 1; - cur_trans->dirty_bg_run = 1; - } mutex_unlock(&root->fs_info->ro_block_group_mutex); if (run_it) @@ -1848,7 +1882,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, } spin_lock(&root->fs_info->trans_lock); - list_splice_init(&trans->ordered, &cur_trans->pending_ordered); if (cur_trans->state >= TRANS_STATE_COMMIT_START) { spin_unlock(&root->fs_info->trans_lock); atomic_inc(&cur_trans->use_count); @@ -1907,7 +1940,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_wait_delalloc_flush(root->fs_info); - btrfs_wait_pending_ordered(cur_trans, root->fs_info); + btrfs_wait_pending_ordered(cur_trans); btrfs_scrub_pause(root); /* @@ -1966,6 +1999,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, goto scrub_continue; } + /* Reocrd old roots for later qgroup accounting */ + ret = btrfs_qgroup_prepare_account_extents(trans, root->fs_info); + if (ret) { + mutex_unlock(&root->fs_info->reloc_mutex); + goto scrub_continue; + } + /* * make sure none of the code above managed to slip in a * delayed item @@ -2007,6 +2047,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, */ btrfs_free_log_root_tree(trans, root->fs_info); + /* + * Since fs roots are all committed, we can get a quite accurate + * new_roots. So let's do quota accounting. + */ + ret = btrfs_qgroup_account_extents(trans, root->fs_info); + if (ret < 0) { + mutex_unlock(&root->fs_info->tree_log_mutex); + mutex_unlock(&root->fs_info->reloc_mutex); + goto scrub_continue; + } + ret = commit_cowonly_roots(trans, root); if (ret) { mutex_unlock(&root->fs_info->tree_log_mutex); @@ -2057,6 +2108,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, clear_bit(BTRFS_INODE_BTREE_LOG1_ERR, &btree_ino->runtime_flags); clear_bit(BTRFS_INODE_BTREE_LOG2_ERR, &btree_ino->runtime_flags); + btrfs_trans_release_chunk_metadata(trans); + spin_lock(&root->fs_info->trans_lock); cur_trans->state = TRANS_STATE_UNBLOCKED; root->fs_info->running_transaction = NULL; @@ -2067,7 +2120,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ret = btrfs_write_and_wait_transaction(trans, root); if (ret) { - btrfs_error(root->fs_info, ret, + btrfs_std_error(root->fs_info, ret, "Error while writing out transaction"); mutex_unlock(&root->fs_info->tree_log_mutex); goto scrub_continue; @@ -2087,7 +2140,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_finish_extent_commit(trans, root); - if (cur_trans->have_free_bgs) + if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &cur_trans->flags)) btrfs_clear_space_info_full(root->fs_info); root->fs_info->last_trans_committed = cur_trans->transid; @@ -2117,7 +2170,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, kmem_cache_free(btrfs_trans_handle_cachep, trans); - if (current != root->fs_info->transaction_kthread) + if (current != root->fs_info->transaction_kthread && + current != root->fs_info->cleaner_kthread) btrfs_run_delayed_iputs(root); return ret; @@ -2126,11 +2180,8 @@ scrub_continue: btrfs_scrub_continue(root); cleanup_transaction: btrfs_trans_release_metadata(trans, root); + btrfs_trans_release_chunk_metadata(trans); trans->block_rsv = NULL; - if (trans->qgroup_reserved) { - btrfs_qgroup_free(root, trans->qgroup_reserved); - trans->qgroup_reserved = 0; - } btrfs_warn(root->fs_info, "Skipping commit of aborted transaction."); if (current->journal_info == trans) current->journal_info = NULL; |