From c871b0f2fd27e7f9097d507f47de5270f88003b9 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 6 Jun 2016 12:01:23 -0700 Subject: Btrfs: check if extent buffer is aligned to sectorsize Thanks to fuzz testing, we can pass an invalid bytenr to extent buffer via alloc_extent_buffer(). An unaligned eb can have more pages than it should have, which ends up extent buffer's leak or some corrupted content in extent buffer. This adds a warning to let us quickly know what was happening. Now that alloc_extent_buffer() no more returns NULL, this changes its caller and callers of its caller to match with the new error handling. Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 689d25ac6a68..5439e85c4813 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8016,8 +8016,9 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf; buf = btrfs_find_create_tree_block(root, bytenr); - if (!buf) - return ERR_PTR(-ENOMEM); + if (IS_ERR(buf)) + return buf; + btrfs_set_header_generation(buf, trans->transid); btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); btrfs_tree_lock(buf); @@ -8659,8 +8660,9 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, next = btrfs_find_tree_block(root->fs_info, bytenr); if (!next) { next = btrfs_find_create_tree_block(root, bytenr); - if (!next) - return -ENOMEM; + if (IS_ERR(next)) + return PTR_ERR(next); + btrfs_set_buffer_lockdep_class(root->root_key.objectid, next, level - 1); reada = 1; -- cgit v1.2.3-70-g09d2 From 64c12921e11b3a0c10d088606e328c58e29274d8 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 8 Jun 2016 00:36:38 -0400 Subject: btrfs: account for non-CoW'd blocks in btrfs_abort_transaction The test for !trans->blocks_used in btrfs_abort_transaction is insufficient to determine whether it's safe to drop the transaction handle on the floor. btrfs_cow_block, informed by should_cow_block, can return blocks that have already been CoW'd in the current transaction. trans->blocks_used is only incremented for new block allocations. If an operation overlaps the blocks in the current transaction entirely and must abort the transaction, we'll happily let it clean up the trans handle even though it may have modified the blocks and will commit an incomplete operation. In the long-term, I'd like to do closer tracking of when the fs is actually modified so we can still recover as gracefully as possible, but that approach will need some discussion. In the short term, since this is the only code using trans->blocks_used, let's just switch it to a bool indicating whether any blocks were used and set it when should_cow_block returns false. Cc: stable@vger.kernel.org # 3.4+ Signed-off-by: Jeff Mahoney Reviewed-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 5 ++++- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/super.c | 2 +- fs/btrfs/transaction.h | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 827c949fa4bc..6276add8538a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1554,6 +1554,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, trans->transid, root->fs_info->generation); if (!should_cow_block(trans, root, buf)) { + trans->dirty = true; *cow_ret = buf; return 0; } @@ -2777,8 +2778,10 @@ again: * then we don't want to set the path blocking, * so we test it here */ - if (!should_cow_block(trans, root, b)) + if (!should_cow_block(trans, root, b)) { + trans->dirty = true; goto cow_done; + } /* * must have write locks on this node and the diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5439e85c4813..29e5d000bbee 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8045,7 +8045,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); } - trans->blocks_used++; + trans->dirty = true; /* this returns a buffer locked for blocking */ return buf; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4339b6613f19..bf70d33b5791 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -235,7 +235,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, trans->aborted = errno; /* Nothing used. The other threads that have joined this * transaction may be able to continue. */ - if (!trans->blocks_used && list_empty(&trans->new_bgs)) { + if (!trans->dirty && list_empty(&trans->new_bgs)) { const char *errstr; errstr = btrfs_decode_error(errno); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 9fe0ec2bf0fe..c5abee4f01ad 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -110,7 +110,6 @@ struct btrfs_trans_handle { u64 chunk_bytes_reserved; unsigned long use_count; unsigned long blocks_reserved; - unsigned long blocks_used; unsigned long delayed_ref_updates; struct btrfs_transaction *transaction; struct btrfs_block_rsv *block_rsv; @@ -121,6 +120,7 @@ struct btrfs_trans_handle { bool can_flush_pending_bgs; bool reloc_reserved; bool sync; + bool dirty; unsigned int type; /* * this root is only needed to validate that the root passed to -- cgit v1.2.3-70-g09d2 From 31b9655f439a26856edca0f3f8daa368a61f16d5 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 11 Apr 2016 17:37:40 -0400 Subject: Btrfs: track transid for delayed ref flushing Using the offwakecputime bpf script I noticed most of our time was spent waiting on the delayed ref throttling. This is what is supposed to happen, but sometimes the transaction can commit and then we're waiting for throttling that doesn't matter anymore. So change this stuff to be a little smarter by tracking the transid we were in when we initiated the throttling. If the transaction we get is different then we can just bail out. This resulted in a 50% speedup in my fs_mark test, and reduced the amount of time spent throttling by 60 seconds over the entire run (which is about 30 minutes). Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- fs/btrfs/extent-tree.c | 15 ++++++++++++--- fs/btrfs/inode.c | 1 + fs/btrfs/transaction.c | 3 ++- 4 files changed, 16 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 101c3cfd3f7c..4274a7bfdaed 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2518,7 +2518,7 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache); int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_root *root, unsigned long count); int btrfs_async_run_delayed_refs(struct btrfs_root *root, - unsigned long count, int wait); + unsigned long count, u64 transid, int wait); int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len); int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 29e5d000bbee..ecfa52002363 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2835,6 +2835,7 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, struct async_delayed_refs { struct btrfs_root *root; + u64 transid; int count; int error; int sync; @@ -2850,9 +2851,16 @@ static void delayed_ref_async_start(struct btrfs_work *work) async = container_of(work, struct async_delayed_refs, work); - trans = btrfs_join_transaction(async->root); + trans = btrfs_attach_transaction(async->root); if (IS_ERR(trans)) { - async->error = PTR_ERR(trans); + if (PTR_ERR(trans) != -ENOENT) + async->error = PTR_ERR(trans); + goto done; + } + + /* Don't bother flushing if we got into a different transaction */ + if (trans->transid != async->transid) { + btrfs_end_transaction(trans, async->root); goto done; } @@ -2876,7 +2884,7 @@ done: } int btrfs_async_run_delayed_refs(struct btrfs_root *root, - unsigned long count, int wait) + unsigned long count, u64 transid, int wait) { struct async_delayed_refs *async; int ret; @@ -2888,6 +2896,7 @@ int btrfs_async_run_delayed_refs(struct btrfs_root *root, async->root = root->fs_info->tree_root; async->count = count; async->error = 0; + async->transid = transid; if (wait) async->sync = 1; else diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bb62418b8023..78582e339f33 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4558,6 +4558,7 @@ delete: BUG_ON(ret); if (btrfs_should_throttle_delayed_refs(trans, root)) btrfs_async_run_delayed_refs(root, + trans->transid, trans->delayed_ref_updates * 2, 0); if (be_nice) { if (truncate_space_check(trans, root, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 765845742fde..948aa186b353 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -818,6 +818,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, { struct btrfs_transaction *cur_trans = trans->transaction; struct btrfs_fs_info *info = root->fs_info; + u64 transid = trans->transid; unsigned long cur = trans->delayed_ref_updates; int lock = (trans->type != TRANS_JOIN_NOLOCK); int err = 0; @@ -905,7 +906,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, kmem_cache_free(btrfs_trans_handle_cachep, trans); if (must_run_delayed_refs) { - btrfs_async_run_delayed_refs(root, cur, + btrfs_async_run_delayed_refs(root, cur, transid, must_run_delayed_refs == 1); } return err; -- cgit v1.2.3-70-g09d2 From 0f873eca82a0bee45f38862e0ea2ac7b1c2a31bd Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 27 Apr 2016 09:59:38 -0400 Subject: btrfs: fix deadlock in delayed_ref_async_start "Btrfs: track transid for delayed ref flushing" was deadlocking on btrfs_attach_transaction because its not safe to call from the async delayed ref start code. This commit brings back btrfs_join_transaction instead and checks for a blocked commit. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ecfa52002363..82b912a293ab 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2851,16 +2851,13 @@ static void delayed_ref_async_start(struct btrfs_work *work) async = container_of(work, struct async_delayed_refs, work); - trans = btrfs_attach_transaction(async->root); - if (IS_ERR(trans)) { - if (PTR_ERR(trans) != -ENOENT) - async->error = PTR_ERR(trans); + /* if the commit is already started, we don't need to wait here */ + if (btrfs_transaction_blocked(async->root->fs_info)) goto done; - } - /* Don't bother flushing if we got into a different transaction */ - if (trans->transid != async->transid) { - btrfs_end_transaction(trans, async->root); + trans = btrfs_join_transaction(async->root); + if (IS_ERR(trans)) { + async->error = PTR_ERR(trans); goto done; } @@ -2869,10 +2866,15 @@ static void delayed_ref_async_start(struct btrfs_work *work) * wait on delayed refs */ trans->sync = true; + + /* Don't bother flushing if we got into a different transaction */ + if (trans->transid > async->transid) + goto end; + ret = btrfs_run_delayed_refs(trans, async->root, async->count); if (ret) async->error = ret; - +end: ret = btrfs_end_transaction(trans, async->root); if (ret && !async->error) async->error = ret; -- cgit v1.2.3-70-g09d2