diff options
Diffstat (limited to 'fs/btrfs/transaction.c')
-rw-r--r-- | fs/btrfs/transaction.c | 231 |
1 files changed, 113 insertions, 118 deletions
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 04f07144b45c..5c4cf0f9146b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -37,22 +37,16 @@ static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = { [TRANS_STATE_RUNNING] = 0U, - [TRANS_STATE_BLOCKED] = (__TRANS_USERSPACE | - __TRANS_START), - [TRANS_STATE_COMMIT_START] = (__TRANS_USERSPACE | - __TRANS_START | - __TRANS_ATTACH), - [TRANS_STATE_COMMIT_DOING] = (__TRANS_USERSPACE | - __TRANS_START | + [TRANS_STATE_BLOCKED] = __TRANS_START, + [TRANS_STATE_COMMIT_START] = (__TRANS_START | __TRANS_ATTACH), + [TRANS_STATE_COMMIT_DOING] = (__TRANS_START | __TRANS_ATTACH | __TRANS_JOIN), - [TRANS_STATE_UNBLOCKED] = (__TRANS_USERSPACE | - __TRANS_START | + [TRANS_STATE_UNBLOCKED] = (__TRANS_START | __TRANS_ATTACH | __TRANS_JOIN | __TRANS_JOIN_NOLOCK), - [TRANS_STATE_COMPLETED] = (__TRANS_USERSPACE | - __TRANS_START | + [TRANS_STATE_COMPLETED] = (__TRANS_START | __TRANS_ATTACH | __TRANS_JOIN | __TRANS_JOIN_NOLOCK), @@ -126,9 +120,9 @@ static void clear_btree_io_tree(struct extent_io_tree *tree) spin_unlock(&tree->lock); } -static noinline void switch_commit_roots(struct btrfs_transaction *trans, - struct btrfs_fs_info *fs_info) +static noinline void switch_commit_roots(struct btrfs_transaction *trans) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *root, *tmp; down_write(&fs_info->commit_root_sem); @@ -319,7 +313,7 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans, if ((test_bit(BTRFS_ROOT_REF_COWS, &root->state) && root->last_trans < trans->transid) || force) { WARN_ON(root == fs_info->extent_root); - WARN_ON(root->commit_root != root->node); + WARN_ON(!force && root->commit_root != root->node); /* * see below for IN_TRANS_SETUP usage rules @@ -449,11 +443,7 @@ static int may_wait_transaction(struct btrfs_fs_info *fs_info, int type) if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) return 0; - if (type == TRANS_USERSPACE) - return 1; - - if (type == TRANS_START && - !atomic_read(&fs_info->open_ioctl_trans)) + if (type == TRANS_START) return 1; return 0; @@ -508,8 +498,8 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, */ if (num_items && root != fs_info->chunk_root) { qgroup_reserved = num_items * fs_info->nodesize; - ret = btrfs_qgroup_reserve_meta(root, qgroup_reserved, - enforce_qgroups); + ret = btrfs_qgroup_reserve_meta_pertrans(root, qgroup_reserved, + enforce_qgroups); if (ret) return ERR_PTR(ret); @@ -593,7 +583,7 @@ again: got_it: btrfs_record_root_in_trans(h, root); - if (!current->journal_info && type != TRANS_USERSPACE) + if (!current->journal_info) current->journal_info = h; return h; @@ -606,7 +596,7 @@ alloc_fail: btrfs_block_rsv_release(fs_info, &fs_info->trans_block_rsv, num_bytes); reserve_fail: - btrfs_qgroup_free_meta(root, qgroup_reserved); + btrfs_qgroup_free_meta_pertrans(root, qgroup_reserved); return ERR_PTR(ret); } @@ -658,14 +648,6 @@ struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv( return trans; } -struct btrfs_trans_handle *btrfs_start_transaction_lflush( - struct btrfs_root *root, - unsigned int num_items) -{ - return start_transaction(root, num_items, TRANS_START, - BTRFS_RESERVE_FLUSH_LIMIT, true); -} - struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root) { return start_transaction(root, 0, TRANS_JOIN, BTRFS_RESERVE_NO_FLUSH, @@ -678,12 +660,6 @@ struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root BTRFS_RESERVE_NO_FLUSH, true); } -struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root) -{ - return start_transaction(root, 0, TRANS_USERSPACE, - BTRFS_RESERVE_NO_FLUSH, true); -} - /* * btrfs_attach_transaction() - catch the running transaction * @@ -789,8 +765,7 @@ out: void btrfs_throttle(struct btrfs_fs_info *fs_info) { - if (!atomic_read(&fs_info->open_ioctl_trans)) - wait_current_trans(fs_info); + wait_current_trans(fs_info); } static int should_end_transaction(struct btrfs_trans_handle *trans) @@ -806,7 +781,6 @@ static int should_end_transaction(struct btrfs_trans_handle *trans) int btrfs_should_end_transaction(struct btrfs_trans_handle *trans) { struct btrfs_transaction *cur_trans = trans->transaction; - struct btrfs_fs_info *fs_info = trans->fs_info; int updates; int err; @@ -818,7 +792,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans) updates = trans->delayed_ref_updates; trans->delayed_ref_updates = 0; if (updates) { - err = btrfs_run_delayed_refs(trans, fs_info, updates * 2); + err = btrfs_run_delayed_refs(trans, updates * 2); if (err) /* Error code will also eval true */ return err; } @@ -826,6 +800,27 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans) return should_end_transaction(trans); } +static void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans) + +{ + struct btrfs_fs_info *fs_info = trans->fs_info; + + if (!trans->block_rsv) { + ASSERT(!trans->bytes_reserved); + return; + } + + if (!trans->bytes_reserved) + return; + + ASSERT(trans->block_rsv == &fs_info->trans_block_rsv); + trace_btrfs_space_reservation(fs_info, "transaction", + trans->transid, trans->bytes_reserved, 0); + btrfs_block_rsv_release(fs_info, trans->block_rsv, + trans->bytes_reserved); + trans->bytes_reserved = 0; +} + static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, int throttle) { @@ -843,11 +838,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, return 0; } - btrfs_trans_release_metadata(trans, info); + btrfs_trans_release_metadata(trans); trans->block_rsv = NULL; if (!list_empty(&trans->new_bgs)) - btrfs_create_pending_block_groups(trans, info); + btrfs_create_pending_block_groups(trans); trans->delayed_ref_updates = 0; if (!trans->sync) { @@ -864,16 +859,15 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, must_run_delayed_refs = 2; } - btrfs_trans_release_metadata(trans, info); + btrfs_trans_release_metadata(trans); trans->block_rsv = NULL; if (!list_empty(&trans->new_bgs)) - btrfs_create_pending_block_groups(trans, info); + btrfs_create_pending_block_groups(trans); btrfs_trans_release_chunk_metadata(trans); - if (lock && !atomic_read(&info->open_ioctl_trans) && - should_end_transaction(trans) && + if (lock && should_end_transaction(trans) && READ_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) { spin_lock(&info->trans_lock); if (cur_trans->state == TRANS_STATE_RUNNING) @@ -1072,40 +1066,33 @@ int btrfs_wait_tree_log_extents(struct btrfs_root *log_root, int mark) } /* - * when btree blocks are allocated, they have some corresponding bits set for - * them in one of two extent_io trees. This is used to make sure all of - * those extents are on disk for transaction or log commit + * When btree blocks are allocated the corresponding extents are marked dirty. + * This function ensures such extents are persisted on disk for transaction or + * log commit. + * + * @trans: transaction whose dirty pages we'd like to write */ -static int btrfs_write_and_wait_marked_extents(struct btrfs_fs_info *fs_info, - struct extent_io_tree *dirty_pages, int mark) +static int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans) { int ret; int ret2; + struct extent_io_tree *dirty_pages = &trans->transaction->dirty_pages; + struct btrfs_fs_info *fs_info = trans->fs_info; struct blk_plug plug; blk_start_plug(&plug); - ret = btrfs_write_marked_extents(fs_info, dirty_pages, mark); + ret = btrfs_write_marked_extents(fs_info, dirty_pages, EXTENT_DIRTY); blk_finish_plug(&plug); ret2 = btrfs_wait_extents(fs_info, dirty_pages); + clear_btree_io_tree(&trans->transaction->dirty_pages); + if (ret) return ret; - if (ret2) + else if (ret2) return ret2; - return 0; -} - -static int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) -{ - int ret; - - ret = btrfs_write_and_wait_marked_extents(fs_info, - &trans->transaction->dirty_pages, - EXTENT_DIRTY); - clear_btree_io_tree(&trans->transaction->dirty_pages); - - return ret; + else + return 0; } /* @@ -1155,9 +1142,9 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, * failures will cause the file system to go offline. We still need * to clean up the delayed refs. */ -static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) +static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct list_head *dirty_bgs = &trans->transaction->dirty_bgs; struct list_head *io_bgs = &trans->transaction->io_bgs; struct list_head *next; @@ -1173,7 +1160,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, if (ret) return ret; - ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1); + ret = btrfs_run_delayed_refs(trans, (unsigned long)-1); if (ret) return ret; @@ -1192,7 +1179,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, return ret; /* run_qgroups might have added some more refs */ - ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1); + ret = btrfs_run_delayed_refs(trans, (unsigned long)-1); if (ret) return ret; again: @@ -1209,7 +1196,7 @@ again: ret = update_cowonly_root(trans, root); if (ret) return ret; - ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1); + ret = btrfs_run_delayed_refs(trans, (unsigned long)-1); if (ret) return ret; } @@ -1218,7 +1205,7 @@ again: ret = btrfs_write_dirty_block_groups(trans, fs_info); if (ret) return ret; - ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1); + ret = btrfs_run_delayed_refs(trans, (unsigned long)-1); if (ret) return ret; } @@ -1251,9 +1238,9 @@ void btrfs_add_dead_root(struct btrfs_root *root) /* * update all the cowonly tree roots on disk */ -static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) +static noinline int commit_fs_roots(struct btrfs_trans_handle *trans) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *gang[8]; int i; int ret; @@ -1297,7 +1284,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, spin_lock(&fs_info->fs_roots_radix_lock); if (err) break; - btrfs_qgroup_free_meta_all(root); + btrfs_qgroup_free_meta_all_pertrans(root); } } spin_unlock(&fs_info->fs_roots_radix_lock); @@ -1366,15 +1353,23 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans, return 0; /* + * Ensure dirty @src will be commited. Or, after comming + * commit_fs_roots() and switch_commit_roots(), any dirty but not + * recorded root will never be updated again, causing an outdated root + * item. + */ + record_root_in_trans(trans, src, 1); + + /* * We are going to commit transaction, see btrfs_commit_transaction() * comment for reason locking tree_log_mutex */ mutex_lock(&fs_info->tree_log_mutex); - ret = commit_fs_roots(trans, fs_info); + ret = commit_fs_roots(trans); if (ret) goto out; - ret = btrfs_qgroup_account_extents(trans, fs_info); + ret = btrfs_qgroup_account_extents(trans); if (ret < 0) goto out; @@ -1397,11 +1392,11 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans, * like chunk and root tree, as they won't affect qgroup. * And we don't write super to avoid half committed status. */ - ret = commit_cowonly_roots(trans, fs_info); + ret = commit_cowonly_roots(trans); if (ret) goto out; - switch_commit_roots(trans->transaction, fs_info); - ret = btrfs_write_and_wait_transaction(trans, fs_info); + switch_commit_roots(trans->transaction); + ret = btrfs_write_and_wait_transaction(trans); if (ret) btrfs_handle_fs_error(fs_info, ret, "Error while writing out transaction for qgroup"); @@ -1430,9 +1425,10 @@ out: * the creation of the pending snapshots, just return 0. */ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct btrfs_pending_snapshot *pending) { + + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_key key; struct btrfs_root_item *new_root_item; struct btrfs_root *tree_root = fs_info->tree_root; @@ -1524,7 +1520,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, * otherwise we corrupt the FS during * snapshot */ - ret = btrfs_run_delayed_items(trans, fs_info); + ret = btrfs_run_delayed_items(trans); if (ret) { /* Transaction aborted */ btrfs_abort_transaction(trans, ret); goto fail; @@ -1620,7 +1616,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, goto fail; } - ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1); + ret = btrfs_run_delayed_refs(trans, (unsigned long)-1); if (ret) { btrfs_abort_transaction(trans, ret); goto fail; @@ -1674,7 +1670,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, } } - ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1); + ret = btrfs_run_delayed_refs(trans, (unsigned long)-1); if (ret) { btrfs_abort_transaction(trans, ret); goto fail; @@ -1699,8 +1695,7 @@ no_free_objectid: /* * create all the snapshots we've scheduled for creation */ -static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) +static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans) { struct btrfs_pending_snapshot *pending, *next; struct list_head *head = &trans->transaction->pending_snapshots; @@ -1708,7 +1703,7 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, list_for_each_entry_safe(pending, next, head, list) { list_del(&pending->list); - ret = create_pending_snapshot(trans, fs_info, pending); + ret = create_pending_snapshot(trans, pending); if (ret) break; } @@ -1861,10 +1856,9 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, } -static void cleanup_transaction(struct btrfs_trans_handle *trans, - struct btrfs_root *root, int err) +static void cleanup_transaction(struct btrfs_trans_handle *trans, int err) { - struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_transaction *cur_trans = trans->transaction; DEFINE_WAIT(wait); @@ -1904,7 +1898,7 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, btrfs_put_transaction(cur_trans); btrfs_put_transaction(cur_trans); - trace_btrfs_transaction_commit(root); + trace_btrfs_transaction_commit(trans->root); if (current->journal_info == trans) current->journal_info = NULL; @@ -1959,13 +1953,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) /* make a pass through all the delayed refs we have so far * any runnings procs may add more while we are here */ - ret = btrfs_run_delayed_refs(trans, fs_info, 0); + ret = btrfs_run_delayed_refs(trans, 0); if (ret) { btrfs_end_transaction(trans); return ret; } - btrfs_trans_release_metadata(trans, fs_info); + btrfs_trans_release_metadata(trans); trans->block_rsv = NULL; cur_trans = trans->transaction; @@ -1978,9 +1972,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) smp_wmb(); if (!list_empty(&trans->new_bgs)) - btrfs_create_pending_block_groups(trans, fs_info); + btrfs_create_pending_block_groups(trans); - ret = btrfs_run_delayed_refs(trans, fs_info, 0); + ret = btrfs_run_delayed_refs(trans, 0); if (ret) { btrfs_end_transaction(trans); return ret; @@ -2008,12 +2002,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) run_it = 1; mutex_unlock(&fs_info->ro_block_group_mutex); - if (run_it) - ret = btrfs_start_dirty_block_groups(trans, fs_info); - } - if (ret) { - btrfs_end_transaction(trans); - return ret; + if (run_it) { + ret = btrfs_start_dirty_block_groups(trans); + if (ret) { + btrfs_end_transaction(trans); + return ret; + } + } } spin_lock(&fs_info->trans_lock); @@ -2061,7 +2056,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) if (ret) goto cleanup_transaction; - ret = btrfs_run_delayed_items(trans, fs_info); + ret = btrfs_run_delayed_items(trans); if (ret) goto cleanup_transaction; @@ -2069,7 +2064,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) extwriter_counter_read(cur_trans) == 0); /* some pending stuffs might be added after the previous flush. */ - ret = btrfs_run_delayed_items(trans, fs_info); + ret = btrfs_run_delayed_items(trans); if (ret) goto cleanup_transaction; @@ -2106,7 +2101,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) * deal with them in create_pending_snapshot(), which is the * core function of the snapshot creation. */ - ret = create_pending_snapshots(trans, fs_info); + ret = create_pending_snapshots(trans); if (ret) { mutex_unlock(&fs_info->reloc_mutex); goto scrub_continue; @@ -2122,13 +2117,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) * because all the tree which are snapshoted will be forced to COW * the nodes and leaves. */ - ret = btrfs_run_delayed_items(trans, fs_info); + ret = btrfs_run_delayed_items(trans); if (ret) { mutex_unlock(&fs_info->reloc_mutex); goto scrub_continue; } - ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1); + ret = btrfs_run_delayed_refs(trans, (unsigned long)-1); if (ret) { mutex_unlock(&fs_info->reloc_mutex); goto scrub_continue; @@ -2157,7 +2152,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) */ mutex_lock(&fs_info->tree_log_mutex); - ret = commit_fs_roots(trans, fs_info); + ret = commit_fs_roots(trans); if (ret) { mutex_unlock(&fs_info->tree_log_mutex); mutex_unlock(&fs_info->reloc_mutex); @@ -2179,7 +2174,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) * commit_fs_roots() can call btrfs_save_ino_cache(), which generates * new delayed refs. Must handle them or qgroup can be wrong. */ - ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1); + ret = btrfs_run_delayed_refs(trans, (unsigned long)-1); if (ret) { mutex_unlock(&fs_info->tree_log_mutex); mutex_unlock(&fs_info->reloc_mutex); @@ -2190,14 +2185,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) * Since fs roots are all committed, we can get a quite accurate * new_roots. So let's do quota accounting. */ - ret = btrfs_qgroup_account_extents(trans, fs_info); + ret = btrfs_qgroup_account_extents(trans); if (ret < 0) { mutex_unlock(&fs_info->tree_log_mutex); mutex_unlock(&fs_info->reloc_mutex); goto scrub_continue; } - ret = commit_cowonly_roots(trans, fs_info); + ret = commit_cowonly_roots(trans); if (ret) { mutex_unlock(&fs_info->tree_log_mutex); mutex_unlock(&fs_info->reloc_mutex); @@ -2229,7 +2224,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) list_add_tail(&fs_info->chunk_root->dirty_list, &cur_trans->switch_commits); - switch_commit_roots(cur_trans, fs_info); + switch_commit_roots(cur_trans); ASSERT(list_empty(&cur_trans->dirty_bgs)); ASSERT(list_empty(&cur_trans->io_bgs)); @@ -2241,7 +2236,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) sizeof(*fs_info->super_copy)); btrfs_update_commit_device_size(fs_info); - btrfs_update_commit_device_bytes_used(fs_info, cur_trans); + btrfs_update_commit_device_bytes_used(cur_trans); clear_bit(BTRFS_FS_LOG1_ERR, &fs_info->flags); clear_bit(BTRFS_FS_LOG2_ERR, &fs_info->flags); @@ -2256,7 +2251,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) wake_up(&fs_info->transaction_wait); - ret = btrfs_write_and_wait_transaction(trans, fs_info); + ret = btrfs_write_and_wait_transaction(trans); if (ret) { btrfs_handle_fs_error(fs_info, ret, "Error while writing out transaction"); @@ -2273,7 +2268,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) if (ret) goto scrub_continue; - btrfs_finish_extent_commit(trans, fs_info); + btrfs_finish_extent_commit(trans); if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &cur_trans->flags)) btrfs_clear_space_info_full(fs_info); @@ -2319,13 +2314,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) scrub_continue: btrfs_scrub_continue(fs_info); cleanup_transaction: - btrfs_trans_release_metadata(trans, fs_info); + btrfs_trans_release_metadata(trans); btrfs_trans_release_chunk_metadata(trans); trans->block_rsv = NULL; btrfs_warn(fs_info, "Skipping commit of aborted transaction."); if (current->journal_info == trans) current->journal_info = NULL; - cleanup_transaction(trans, trans->root, ret); + cleanup_transaction(trans, ret); return ret; } |