From 2f3014fc2ab1e25c36531e19164c48182c168995 Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Thu, 25 Mar 2010 17:22:45 +0000 Subject: Btrfs: remove duplicate include in ioctl.c fs/btrfs/ioctl.c: ctree.h is included more than once. Signed-off-by: Andrea Gelmini Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 2845c6ceecd2..5c9f8b30608c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -48,7 +48,6 @@ #include "print-tree.h" #include "volumes.h" #include "locking.h" -#include "ctree.h" /* Mask out flags that are inappropriate for the given type of inode. */ static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) -- cgit v1.2.3-70-g09d2 From 90d2c51dbb4db05c040bc7db264bb7ab35e35455 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 25 Mar 2010 12:37:12 +0000 Subject: Btrfs: add NULL check for do_walk_down() btrfs_find_create_tree_block() may return NULL, so we must check the returned value, or we will access a NULL pointer. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1727b26fb194..503a18eaef52 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5205,6 +5205,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, next = btrfs_find_tree_block(root, bytenr, blocksize); if (!next) { next = btrfs_find_create_tree_block(root, bytenr, blocksize); + if (!next) + return -ENOMEM; reada = 1; } btrfs_tree_lock(next); @@ -5417,7 +5419,8 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, if (ret > 0) { path->slots[level]++; continue; - } + } else if (ret < 0) + return ret; level = wc->level; } return 0; -- cgit v1.2.3-70-g09d2 From 471fa17dff556ad38caf26de097c0630530d8cbe Mon Sep 17 00:00:00 2001 From: Zhao Lei Date: Thu, 25 Mar 2010 12:35:14 +0000 Subject: Btrfs: Remove unnecessary finish_wait() in wait_current_trans() We only need to call finish_wait() after wait loop. By the way, this patch makes code of waiting loop similar to example in wait.h(no functional change) Signed-off-by: Zhao Lei Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/transaction.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2d654c1c794d..43054285f638 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -147,18 +147,13 @@ static void wait_current_trans(struct btrfs_root *root) while (1) { prepare_to_wait(&root->fs_info->transaction_wait, &wait, TASK_UNINTERRUPTIBLE); - if (cur_trans->blocked) { - mutex_unlock(&root->fs_info->trans_mutex); - schedule(); - mutex_lock(&root->fs_info->trans_mutex); - finish_wait(&root->fs_info->transaction_wait, - &wait); - } else { - finish_wait(&root->fs_info->transaction_wait, - &wait); + if (!cur_trans->blocked) break; - } + mutex_unlock(&root->fs_info->trans_mutex); + schedule(); + mutex_lock(&root->fs_info->trans_mutex); } + finish_wait(&root->fs_info->transaction_wait, &wait); put_transaction(cur_trans); } } -- cgit v1.2.3-70-g09d2 From ab59381ea43f81c977cbd09add26950aaf6cb9fe Mon Sep 17 00:00:00 2001 From: Zhao Lei Date: Thu, 25 Mar 2010 12:34:49 +0000 Subject: Btrfs: Add error handle for btrfs_search_slot() in btrfs_read_chunk_tree() We need to check return value of btrfs_search_slot() in btrfs_read_chunk_tree() and do corresponding error handing. Signed-off-by: Zhao Lei Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9df8e3f1ccab..c6c80093eac0 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3389,6 +3389,8 @@ int btrfs_read_chunk_tree(struct btrfs_root *root) key.type = 0; again: ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto error; while (1) { leaf = path->nodes[0]; slot = path->slots[0]; -- cgit v1.2.3-70-g09d2 From f3eae7e8a5ed124bbc781e18ea10c21856017322 Mon Sep 17 00:00:00 2001 From: Zhao Lei Date: Thu, 25 Mar 2010 12:32:59 +0000 Subject: Btrfs: Simplify num_stripes's calculation logical for __btrfs_alloc_chunk() We can use this simple method to make source more readable. Signed-off-by: Zhao Lei Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c6c80093eac0..bf3bec3e4130 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2198,9 +2198,9 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_RAID1)) { - num_stripes = min_t(u64, 2, fs_devices->rw_devices); - if (num_stripes < 2) + if (fs_devices->rw_devices < 2) return -ENOSPC; + num_stripes = 2; min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_RAID10)) { -- cgit v1.2.3-70-g09d2 From 683be16eb6e19a35aca2473668652259ed074094 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 20 Mar 2010 11:24:48 +0000 Subject: Btrfs: dereferencing freed memory The original code dereferenced range on the next line. Signed-off-by: Dan Carpenter Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5c9f8b30608c..874d36e5f167 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1374,6 +1374,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) sizeof(*range))) { ret = -EFAULT; kfree(range); + goto out; } /* compression requires us to start the IO */ if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { -- cgit v1.2.3-70-g09d2 From c2b96929e2ca6914cf4a66cd8fe2a34c4a98277f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 20 Mar 2010 11:24:15 +0000 Subject: Btrfs: handle kmalloc() failure in inode lookup ioctl Return -ENOMEM if kmalloc() fails. Signed-off-by: Dan Carpenter Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/btrfs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 874d36e5f167..74d89133f768 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1211,6 +1211,9 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file, return -EPERM; args = kmalloc(sizeof(*args), GFP_KERNEL); + if (!args) + return -ENOMEM; + if (copy_from_user(args, argp, sizeof(*args))) { kfree(args); return -EFAULT; -- cgit v1.2.3-70-g09d2 From 6cf8bfbf5e88edfb09a2bf0631a067060f534592 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 20 Mar 2010 11:22:10 +0000 Subject: Btrfs: check btrfs_get_extent return for IS_ERR() btrfs_get_extent() never returns NULL, only a valid pointer or ERR_PTR() Signed-off-by: Dan Carpenter Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 74d89133f768..2b7dd88fc54f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -510,7 +510,7 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, em = btrfs_get_extent(inode, NULL, 0, start, len, 0); unlock_extent(io_tree, start, start + len - 1, GFP_NOFS); - if (!em) + if (IS_ERR(em)) return 0; } -- cgit v1.2.3-70-g09d2 From 1b1d1f6625e517a08640ddb4b8f8a0e025243fe3 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 19 Mar 2010 20:49:55 +0000 Subject: Btrfs: fail to mount if we have problems reading the block groups We don't actually check the return value of btrfs_read_block_groups, so we can possibly succeed to mount, but then fail to say read the superblock xattr for selinux which will cause the vfs code to deactivate the super. This is a problem because in find_free_extent we just assume that we will find the right space_info for the allocation we want. But if we failed to read the block groups, we won't have setup any space_info's, and we'll hit a NULL pointer deref in find_free_extent. This patch fixes that problem by checking the return value of btrfs_read_block_groups, and failing out properly. I've also added a check in find_free_extent so if for some reason we don't find an appropriate space_info, we just return -ENOSPC. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 11 ++++++++--- fs/btrfs/extent-tree.c | 5 ++++- 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 11d0ad30e203..5ae1c0fcfce0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1922,7 +1922,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, csum_root->track_dirty = 1; - btrfs_read_block_groups(extent_root); + ret = btrfs_read_block_groups(extent_root); + if (ret) { + printk(KERN_ERR "Failed to read block groups: %d\n", ret); + goto fail_block_groups; + } fs_info->generation = generation; fs_info->last_trans_committed = generation; @@ -1932,7 +1936,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, "btrfs-cleaner"); if (IS_ERR(fs_info->cleaner_kthread)) - goto fail_csum_root; + goto fail_block_groups; fs_info->transaction_kthread = kthread_run(transaction_kthread, tree_root, @@ -2020,7 +2024,8 @@ fail_cleaner: filemap_write_and_wait(fs_info->btree_inode->i_mapping); invalidate_inode_pages2(fs_info->btree_inode->i_mapping); -fail_csum_root: +fail_block_groups: + btrfs_free_block_groups(fs_info); free_extent_buffer(csum_root->node); free_extent_buffer(csum_root->commit_root); fail_dev_root: diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 503a18eaef52..4c910359c807 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4170,6 +4170,10 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, ins->offset = 0; space_info = __find_space_info(root->fs_info, data); + if (!space_info) { + printk(KERN_ERR "No space info for %d\n", data); + return -ENOSPC; + } if (orig_root->ref_cows || empty_size) allowed_chunk_alloc = 1; @@ -7372,7 +7376,6 @@ static int find_first_block_group(struct btrfs_root *root, } path->slots[0]++; } - ret = -ENOENT; out: return ret; } -- cgit v1.2.3-70-g09d2 From 287a0ab91d25ca982f895a76402e5893b47ed7a6 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 19 Mar 2010 18:07:23 +0000 Subject: Btrfs: kill max_extent mount option As Yan pointed out, theres not much reason for all this complicated math to account for file extents being split up into max_extent chunks, since they are likely to all end up in the same leaf anyway. Since there isn't much reason to use max_extent, just remove the option altogether so we have one less thing we need to test. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 - fs/btrfs/disk-io.c | 1 - fs/btrfs/extent-tree.c | 4 ++-- fs/btrfs/inode.c | 59 ++++--------------------------------------------- fs/btrfs/ordered-data.c | 6 +++-- fs/btrfs/super.c | 23 +++---------------- 6 files changed, 13 insertions(+), 81 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 11115847d875..ae8c40922c54 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -834,7 +834,6 @@ struct btrfs_fs_info { u64 last_trans_log_full_commit; u64 open_ioctl_trans; unsigned long mount_opt; - u64 max_extent; u64 max_inline; u64 alloc_start; struct btrfs_transaction *running_transaction; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5ae1c0fcfce0..6632e5c4c8bb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1634,7 +1634,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, atomic_set(&fs_info->async_submit_draining, 0); atomic_set(&fs_info->nr_async_bios, 0); fs_info->sb = sb; - fs_info->max_extent = (u64)-1; fs_info->max_inline = 8192 * 1024; fs_info->metadata_ratio = 0; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4c910359c807..5f6bc283c0c8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2846,7 +2846,7 @@ int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, } spin_unlock(&BTRFS_I(inode)->accounting_lock); - BTRFS_I(inode)->reserved_extents--; + BTRFS_I(inode)->reserved_extents -= num_items; BUG_ON(BTRFS_I(inode)->reserved_extents < 0); if (meta_sinfo->bytes_delalloc < num_bytes) { @@ -3111,7 +3111,7 @@ again: return -ENOSPC; } - BTRFS_I(inode)->reserved_extents++; + BTRFS_I(inode)->reserved_extents += num_items; check_force_delalloc(meta_sinfo); spin_unlock(&meta_sinfo->lock); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2a337a09c650..a85b90c86cb0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -796,7 +796,7 @@ static noinline int cow_file_range(struct inode *inode, while (disk_num_bytes > 0) { unsigned long op; - cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); + cur_alloc_size = disk_num_bytes; ret = btrfs_reserve_extent(trans, root, cur_alloc_size, root->sectorsize, 0, alloc_hint, (u64)-1, &ins, 1); @@ -1227,30 +1227,9 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, static int btrfs_split_extent_hook(struct inode *inode, struct extent_state *orig, u64 split) { - struct btrfs_root *root = BTRFS_I(inode)->root; - u64 size; - if (!(orig->state & EXTENT_DELALLOC)) return 0; - size = orig->end - orig->start + 1; - if (size > root->fs_info->max_extent) { - u64 num_extents; - u64 new_size; - - new_size = orig->end - split + 1; - num_extents = div64_u64(size + root->fs_info->max_extent - 1, - root->fs_info->max_extent); - - /* - * if we break a large extent up then leave oustanding_extents - * be, since we've already accounted for the large extent. - */ - if (div64_u64(new_size + root->fs_info->max_extent - 1, - root->fs_info->max_extent) < num_extents) - return 0; - } - spin_lock(&BTRFS_I(inode)->accounting_lock); BTRFS_I(inode)->outstanding_extents++; spin_unlock(&BTRFS_I(inode)->accounting_lock); @@ -1268,38 +1247,10 @@ static int btrfs_merge_extent_hook(struct inode *inode, struct extent_state *new, struct extent_state *other) { - struct btrfs_root *root = BTRFS_I(inode)->root; - u64 new_size, old_size; - u64 num_extents; - /* not delalloc, ignore it */ if (!(other->state & EXTENT_DELALLOC)) return 0; - old_size = other->end - other->start + 1; - if (new->start < other->start) - new_size = other->end - new->start + 1; - else - new_size = new->end - other->start + 1; - - /* we're not bigger than the max, unreserve the space and go */ - if (new_size <= root->fs_info->max_extent) { - spin_lock(&BTRFS_I(inode)->accounting_lock); - BTRFS_I(inode)->outstanding_extents--; - spin_unlock(&BTRFS_I(inode)->accounting_lock); - return 0; - } - - /* - * If we grew by another max_extent, just return, we want to keep that - * reserved amount. - */ - num_extents = div64_u64(old_size + root->fs_info->max_extent - 1, - root->fs_info->max_extent); - if (div64_u64(new_size + root->fs_info->max_extent - 1, - root->fs_info->max_extent) > num_extents) - return 0; - spin_lock(&BTRFS_I(inode)->accounting_lock); BTRFS_I(inode)->outstanding_extents--; spin_unlock(&BTRFS_I(inode)->accounting_lock); @@ -1328,6 +1279,7 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, BTRFS_I(inode)->outstanding_extents++; spin_unlock(&BTRFS_I(inode)->accounting_lock); btrfs_delalloc_reserve_space(root, inode, end - start + 1); + spin_lock(&root->fs_info->delalloc_lock); BTRFS_I(inode)->delalloc_bytes += end - start + 1; root->fs_info->delalloc_bytes += end - start + 1; @@ -1356,6 +1308,7 @@ static int btrfs_clear_bit_hook(struct inode *inode, if (bits & EXTENT_DO_ACCOUNTING) { spin_lock(&BTRFS_I(inode)->accounting_lock); + WARN_ON(!BTRFS_I(inode)->outstanding_extents); BTRFS_I(inode)->outstanding_extents--; spin_unlock(&BTRFS_I(inode)->accounting_lock); btrfs_unreserve_metadata_for_delalloc(root, inode, 1); @@ -5384,7 +5337,6 @@ free: void btrfs_drop_inode(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; - if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0) generic_delete_inode(inode); else @@ -5788,18 +5740,15 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key ins; - u64 alloc_size; u64 cur_offset = start; u64 num_bytes = end - start; int ret = 0; u64 i_size; while (num_bytes > 0) { - alloc_size = min(num_bytes, root->fs_info->max_extent); - trans = btrfs_start_transaction(root, 1); - ret = btrfs_reserve_extent(trans, root, alloc_size, + ret = btrfs_reserve_extent(trans, root, num_bytes, root->sectorsize, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) { diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index a8ffecd0b491..5c99882b9763 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -303,6 +303,7 @@ static int __btrfs_remove_ordered_extent(struct inode *inode, struct btrfs_ordered_extent *entry) { struct btrfs_ordered_inode_tree *tree; + struct btrfs_root *root = BTRFS_I(inode)->root; struct rb_node *node; tree = &BTRFS_I(inode)->ordered_tree; @@ -312,12 +313,13 @@ static int __btrfs_remove_ordered_extent(struct inode *inode, set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); spin_lock(&BTRFS_I(inode)->accounting_lock); + WARN_ON(!BTRFS_I(inode)->outstanding_extents); BTRFS_I(inode)->outstanding_extents--; spin_unlock(&BTRFS_I(inode)->accounting_lock); btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root, inode, 1); - spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); + spin_lock(&root->fs_info->ordered_extent_lock); list_del_init(&entry->root_extent_list); /* @@ -329,7 +331,7 @@ static int __btrfs_remove_ordered_extent(struct inode *inode, !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { list_del_init(&BTRFS_I(inode)->ordered_operations); } - spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); + spin_unlock(&root->fs_info->ordered_extent_lock); return 0; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9ac612e6ca60..d11b12fc086b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -64,10 +64,9 @@ static void btrfs_put_super(struct super_block *sb) enum { Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum, - Opt_nodatacow, Opt_max_extent, Opt_max_inline, Opt_alloc_start, - Opt_nobarrier, Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, - Opt_noacl, Opt_compress, Opt_compress_force, Opt_notreelog, Opt_ratio, - Opt_flushoncommit, + Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, + Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, + Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, Opt_err, }; @@ -79,7 +78,6 @@ static match_table_t tokens = { {Opt_nodatasum, "nodatasum"}, {Opt_nodatacow, "nodatacow"}, {Opt_nobarrier, "nobarrier"}, - {Opt_max_extent, "max_extent=%s"}, {Opt_max_inline, "max_inline=%s"}, {Opt_alloc_start, "alloc_start=%s"}, {Opt_thread_pool, "thread_pool=%d"}, @@ -188,18 +186,6 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) info->thread_pool_size); } break; - case Opt_max_extent: - num = match_strdup(&args[0]); - if (num) { - info->max_extent = memparse(num, NULL); - kfree(num); - - info->max_extent = max_t(u64, - info->max_extent, root->sectorsize); - printk(KERN_INFO "btrfs: max_extent at %llu\n", - (unsigned long long)info->max_extent); - } - break; case Opt_max_inline: num = match_strdup(&args[0]); if (num) { @@ -529,9 +515,6 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",nodatacow"); if (btrfs_test_opt(root, NOBARRIER)) seq_puts(seq, ",nobarrier"); - if (info->max_extent != (u64)-1) - seq_printf(seq, ",max_extent=%llu", - (unsigned long long)info->max_extent); if (info->max_inline != 8192 * 1024) seq_printf(seq, ",max_inline=%llu", (unsigned long long)info->max_inline); -- cgit v1.2.3-70-g09d2 From 0cad8a1130f77c7c445e3298c0e3593b3c0ef439 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 17 Mar 2010 20:45:56 +0000 Subject: Btrfs: fix chunk allocate size calculation If the amount of free space left in a device is less than what we think should be the minimum size, just ignore the minimum size and use the amount we have. I ran into this running tests on a 600mb volume, the chunk allocator wouldn't let me allocate the last 52mb of the disk for data because we want to have at least 64mb chunks for data. This patch fixes that problem. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index bf3bec3e4130..9bf1f581b872 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2244,8 +2244,10 @@ again: do_div(calc_size, stripe_len); calc_size *= stripe_len; } + /* we don't want tiny stripes */ - calc_size = max_t(u64, min_stripe_size, calc_size); + if (!looped) + calc_size = max_t(u64, min_stripe_size, calc_size); do_div(calc_size, stripe_len); calc_size *= stripe_len; -- cgit v1.2.3-70-g09d2 From 28ecb60906e86e74e9ad4ac7e0218d8631e73a94 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 17 Mar 2010 13:31:04 +0000 Subject: Btrfs: use add_to_page_cache_lru, use __page_cache_alloc Pagecache pages should be allocated with __page_cache_alloc, so they obey pagecache memory policies. add_to_page_cache_lru is exported, so it should be used. Benefits over using a private pagevec: neater code, 128 bytes fewer stack used, percpu lru ordering is preserved, and finally don't need to flush pagevec before returning so batching may be shared with other LRU insertions. Signed-off-by: Nick Piggin : Signed-off-by: Chris Mason --- fs/btrfs/compression.c | 22 ++++------------------ fs/btrfs/extent_io.c | 15 +-------------- 2 files changed, 5 insertions(+), 32 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 28b92a7218ab..1d54c5308df5 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -31,7 +31,6 @@ #include #include #include -#include #include "compat.h" #include "ctree.h" #include "disk-io.h" @@ -445,7 +444,6 @@ static noinline int add_ra_bio_pages(struct inode *inode, unsigned long nr_pages = 0; struct extent_map *em; struct address_space *mapping = inode->i_mapping; - struct pagevec pvec; struct extent_map_tree *em_tree; struct extent_io_tree *tree; u64 end; @@ -461,7 +459,6 @@ static noinline int add_ra_bio_pages(struct inode *inode, end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; - pagevec_init(&pvec, 0); while (last_offset < compressed_end) { page_index = last_offset >> PAGE_CACHE_SHIFT; @@ -478,26 +475,17 @@ static noinline int add_ra_bio_pages(struct inode *inode, goto next; } - page = alloc_page(mapping_gfp_mask(mapping) & ~__GFP_FS); + page = __page_cache_alloc(mapping_gfp_mask(mapping) & + ~__GFP_FS); if (!page) break; - page->index = page_index; - /* - * what we want to do here is call add_to_page_cache_lru, - * but that isn't exported, so we reproduce it here - */ - if (add_to_page_cache(page, mapping, - page->index, GFP_NOFS)) { + if (add_to_page_cache_lru(page, mapping, page_index, + GFP_NOFS)) { page_cache_release(page); goto next; } - /* open coding of lru_cache_add, also not exported */ - page_cache_get(page); - if (!pagevec_add(&pvec, page)) - __pagevec_lru_add_file(&pvec); - end = last_offset + PAGE_CACHE_SIZE - 1; /* * at this point, we have a locked page in the page cache @@ -551,8 +539,6 @@ static noinline int add_ra_bio_pages(struct inode *inode, next: last_offset += PAGE_CACHE_SIZE; } - if (pagevec_count(&pvec)) - __pagevec_lru_add_file(&pvec); return 0; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c99121ac5d6b..fc742e59815e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2679,33 +2679,20 @@ int extent_readpages(struct extent_io_tree *tree, { struct bio *bio = NULL; unsigned page_idx; - struct pagevec pvec; unsigned long bio_flags = 0; - pagevec_init(&pvec, 0); for (page_idx = 0; page_idx < nr_pages; page_idx++) { struct page *page = list_entry(pages->prev, struct page, lru); prefetchw(&page->flags); list_del(&page->lru); - /* - * what we want to do here is call add_to_page_cache_lru, - * but that isn't exported, so we reproduce it here - */ - if (!add_to_page_cache(page, mapping, + if (!add_to_page_cache_lru(page, mapping, page->index, GFP_KERNEL)) { - - /* open coding of lru_cache_add, also not exported */ - page_cache_get(page); - if (!pagevec_add(&pvec, page)) - __pagevec_lru_add_file(&pvec); __extent_read_full_page(tree, page, get_extent, &bio, 0, &bio_flags); } page_cache_release(page); } - if (pagevec_count(&pvec)) - __pagevec_lru_add_file(&pvec); BUG_ON(!list_empty(pages)); if (bio) submit_one_bio(READ, bio, 0, bio_flags); -- cgit v1.2.3-70-g09d2 From b5cb160084fad438c513d0952849e597ffe9e3d9 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 12 Mar 2010 19:28:18 +0000 Subject: Btrfs: fix small race with delalloc flushing waitqueue's Everytime we start a new flushing thread, we init the waitqueue if there isn't a flushing thread running. The problem with this is we check space_info->flushing, which we clear right before doing a wake_up on the flushing waitqueue, which causes problems if we init the waitqueue in the middle of clearing the flushing flagh and calling wake_up. This is hard to hit, but the code is wrong anyway, so init the flushing/allocating waitqueue when creating the space info and let it be. I haven't seen the panic since I've been using this patch. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5f6bc283c0c8..101041d4d2b2 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2676,6 +2676,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, INIT_LIST_HEAD(&found->block_groups); init_rwsem(&found->groups_sem); + init_waitqueue_head(&found->flush_wait); + init_waitqueue_head(&found->allocate_wait); spin_lock_init(&found->lock); found->flags = flags; found->total_bytes = total_bytes; @@ -2944,12 +2946,10 @@ static void flush_delalloc(struct btrfs_root *root, spin_lock(&info->lock); - if (!info->flushing) { + if (!info->flushing) info->flushing = 1; - init_waitqueue_head(&info->flush_wait); - } else { + else wait = true; - } spin_unlock(&info->lock); @@ -3011,7 +3011,6 @@ static int maybe_allocate_chunk(struct btrfs_root *root, if (!info->allocating_chunk) { info->force_alloc = 1; info->allocating_chunk = 1; - init_waitqueue_head(&info->allocate_wait); } else { wait = true; } -- cgit v1.2.3-70-g09d2 From 6bdb72ded1e281cd8844918c39d00cdd0e59f655 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 15 Mar 2010 17:27:13 +0000 Subject: Btrfs: create snapshot references in same commit as snapshot This creates the reference to a new snapshot in the same commit as the snapshot itself. This avoids the need for a second commit in order for a snapshot to be persistent, and also avoids the problem of "leaking" a new snapshot tree root if the host crashes before the second commit takes place. It is not at all clear to me why it wasn't always done this way. If there is still a reason for the two-stage {create,finish}_pending_snapshots() approach I'm missing something! :) I've been running this for a couple weeks under pretty heavy usage (a few snapshots per minute) without obvious problems. Signed-off-by: Sage Weil Signed-off-by: Chris Mason --- fs/btrfs/transaction.c | 97 ++++++++++++++++---------------------------------- 1 file changed, 31 insertions(+), 66 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 43054285f638..01cebd661997 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -755,10 +755,17 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root_item *new_root_item; struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *root = pending->root; + struct btrfs_root *parent_root; + struct inode *parent_inode; struct extent_buffer *tmp; struct extent_buffer *old; int ret; u64 objectid; + int namelen; + u64 index = 0; + + parent_inode = pending->dentry->d_parent->d_inode; + parent_root = BTRFS_I(parent_inode)->root; new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); if (!new_root_item) { @@ -769,79 +776,59 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, if (ret) goto fail; - record_root_in_trans(trans, root); - btrfs_set_root_last_snapshot(&root->root_item, trans->transid); - memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); - key.objectid = objectid; /* record when the snapshot was created in key.offset */ key.offset = trans->transid; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); - old = btrfs_lock_root_node(root); - btrfs_cow_block(trans, root, old, NULL, 0, &old); - btrfs_set_lock_blocking(old); - - btrfs_copy_root(trans, root, old, &tmp, objectid); - btrfs_tree_unlock(old); - free_extent_buffer(old); - - btrfs_set_root_node(new_root_item, tmp); - ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, - new_root_item); - btrfs_tree_unlock(tmp); - free_extent_buffer(tmp); - if (ret) - goto fail; - - key.offset = (u64)-1; memcpy(&pending->root_key, &key, sizeof(key)); -fail: - kfree(new_root_item); - return ret; -} - -static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info, - struct btrfs_pending_snapshot *pending) -{ - int ret; - int namelen; - u64 index = 0; - struct btrfs_trans_handle *trans; - struct inode *parent_inode; - struct btrfs_root *parent_root; - - parent_inode = pending->dentry->d_parent->d_inode; - parent_root = BTRFS_I(parent_inode)->root; - trans = btrfs_join_transaction(parent_root, 1); + pending->root_key.offset = (u64)-1; + record_root_in_trans(trans, parent_root); /* * insert the directory item */ namelen = strlen(pending->name); ret = btrfs_set_inode_index(parent_inode, &index); + BUG_ON(ret); ret = btrfs_insert_dir_item(trans, parent_root, pending->name, namelen, parent_inode->i_ino, &pending->root_key, BTRFS_FT_DIR, index); - - if (ret) - goto fail; + BUG_ON(ret); btrfs_i_size_write(parent_inode, parent_inode->i_size + namelen * 2); ret = btrfs_update_inode(trans, parent_root, parent_inode); BUG_ON(ret); + record_root_in_trans(trans, root); + btrfs_set_root_last_snapshot(&root->root_item, trans->transid); + memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); + + old = btrfs_lock_root_node(root); + btrfs_cow_block(trans, root, old, NULL, 0, &old); + btrfs_set_lock_blocking(old); + + btrfs_copy_root(trans, root, old, &tmp, objectid); + btrfs_tree_unlock(old); + free_extent_buffer(old); + + btrfs_set_root_node(new_root_item, tmp); + ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, + new_root_item); + BUG_ON(ret); + btrfs_tree_unlock(tmp); + free_extent_buffer(tmp); + ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, pending->root_key.objectid, parent_root->root_key.objectid, parent_inode->i_ino, index, pending->name, namelen); - BUG_ON(ret); fail: - btrfs_end_transaction(trans, fs_info->fs_root); + kfree(new_root_item); return ret; } @@ -862,25 +849,6 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, return 0; } -static noinline int finish_pending_snapshots(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) -{ - struct btrfs_pending_snapshot *pending; - struct list_head *head = &trans->transaction->pending_snapshots; - int ret; - - while (!list_empty(head)) { - pending = list_entry(head->next, - struct btrfs_pending_snapshot, list); - ret = finish_pending_snapshot(fs_info, pending); - BUG_ON(ret); - list_del(&pending->list); - kfree(pending->name); - kfree(pending); - } - return 0; -} - static void update_super_roots(struct btrfs_root *root) { struct btrfs_root_item *root_item; @@ -1092,9 +1060,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_finish_extent_commit(trans, root); - /* do the directory inserts of any pending snapshot creations */ - finish_pending_snapshots(trans, root->fs_info); - mutex_lock(&root->fs_info->trans_mutex); cur_trans->commit_done = 1; -- cgit v1.2.3-70-g09d2 From 109f6aef5fc436f355ad027f4d97bd696df2049a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 2 Apr 2010 09:20:18 -0400 Subject: Btrfs: add check for changed leaves in setup_leaf_for_split setup_leaf_for_split needs to drop the path and search again, and has checks to see if the item we want to split changed size. But, it misses the case where the leaf changed and now has enough room for the item we want to insert. This adds an extra check to make sure the leaf really needs splitting before we call btrfs_split_leaf(), which keeps us from trying to split a leaf with a single item. btrfs_split_leaf() will blindly split the single item leaf, leaving us with one good leaf and one empty leaf and then a crash. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/btrfs') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index c4bc570a396e..babf7fbaec84 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -3040,6 +3040,10 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0])) goto err; + /* the leaf has changed, it now has room. return now */ + if (btrfs_leaf_free_space(root, path->nodes[0]) >= ins_len) + goto err; + if (key.type == BTRFS_EXTENT_DATA_KEY) { fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); -- cgit v1.2.3-70-g09d2