From 0040e606e35a0db80fc3fac04ccc7c7176a8e2b1 Mon Sep 17 00:00:00 2001 From: Christoph Jaeger Date: Sat, 12 Apr 2014 13:33:13 +0200 Subject: btrfs: fix use-after-free in mount_subvol() Pointer 'newargs' is used after the memory that it points to has already been freed. Picked up by Coverity - CID 1201425. Fixes: 0723a0473f ("btrfs: allow mounting btrfs subvolumes with different ro/rw options") Signed-off-by: Christoph Jaeger Signed-off-by: Chris Mason --- fs/btrfs/super.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 994c40955315..53bc3733d483 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1186,7 +1186,6 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags, return ERR_PTR(-ENOMEM); mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, newargs); - kfree(newargs); if (PTR_RET(mnt) == -EBUSY) { if (flags & MS_RDONLY) { @@ -1196,17 +1195,22 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags, int r; mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, device_name, newargs); - if (IS_ERR(mnt)) + if (IS_ERR(mnt)) { + kfree(newargs); return ERR_CAST(mnt); + } r = btrfs_remount(mnt->mnt_sb, &flags, NULL); if (r < 0) { /* FIXME: release vfsmount mnt ??*/ + kfree(newargs); return ERR_PTR(r); } } } + kfree(newargs); + if (IS_ERR(mnt)) return ERR_CAST(mnt); -- cgit v1.2.3-70-g09d2 From c5f7d0bb29df2e1848a236e58e201daf5b4e0f21 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 15 Apr 2014 10:41:00 +0800 Subject: btrfs: Change the hole range to a more accurate value. Commit 3ac0d7b96a268a98bd474cab8bce3a9f125aaccf fixed the btrfs expanding write problem but the hole punched is sometimes too large for some iovec, which has unmapped data ranges. This patch will change to hole range to a more accurate value using the counts checked by the write check routines. Reported-by: Al Viro Signed-off-by: Qu Wenruo Signed-off-by: Chris Mason --- fs/btrfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 23f6a9d9f104..e7e78fa9085e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1783,7 +1783,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, start_pos = round_down(pos, root->sectorsize); if (start_pos > i_size_read(inode)) { /* Expand hole size to cover write data, preventing empty gap */ - end_pos = round_up(pos + iov->iov_len, root->sectorsize); + end_pos = round_up(pos + count, root->sectorsize); err = btrfs_cont_expand(inode, i_size_read(inode), end_pos); if (err) { mutex_unlock(&inode->i_mutex); -- cgit v1.2.3-70-g09d2 From 3f9e3df8da3c51649c15db249978a10f7374236a Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 15 Apr 2014 18:50:17 +0200 Subject: btrfs: replace error code from btrfs_drop_extents There's a case which clone does not handle and used to BUG_ON instead, (testcase xfstests/btrfs/035), now returns EINVAL. This error code is confusing to the ioctl caller, as it normally signifies errorneous arguments. Change it to ENOPNOTSUPP which allows a fall back to copy instead of clone. This does not affect the common reflink operation. Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/file.c | 6 +++--- fs/btrfs/ioctl.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e7e78fa9085e..1eee3f79d75f 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -805,7 +805,7 @@ next_slot: if (start > key.offset && end < extent_end) { BUG_ON(del_nr > 0); if (extent_type == BTRFS_FILE_EXTENT_INLINE) { - ret = -EINVAL; + ret = -EOPNOTSUPP; break; } @@ -851,7 +851,7 @@ next_slot: */ if (start <= key.offset && end < extent_end) { if (extent_type == BTRFS_FILE_EXTENT_INLINE) { - ret = -EINVAL; + ret = -EOPNOTSUPP; break; } @@ -877,7 +877,7 @@ next_slot: if (start > key.offset && end >= extent_end) { BUG_ON(del_nr > 0); if (extent_type == BTRFS_FILE_EXTENT_INLINE) { - ret = -EINVAL; + ret = -EOPNOTSUPP; break; } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f2e8fcc279a3..7b001abc73c7 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3088,7 +3088,7 @@ process_slot: new_key.offset + datal, 1); if (ret) { - if (ret != -EINVAL) + if (ret != -EOPNOTSUPP) btrfs_abort_transaction(trans, root, ret); btrfs_end_transaction(trans, root); @@ -3163,7 +3163,7 @@ process_slot: new_key.offset + datal, 1); if (ret) { - if (ret != -EINVAL) + if (ret != -EOPNOTSUPP) btrfs_abort_transaction(trans, root, ret); btrfs_end_transaction(trans, root); -- cgit v1.2.3-70-g09d2 From 9d89ce658718d9d21465666127a15ca2c82c4ea7 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 23 Apr 2014 19:33:33 +0800 Subject: Btrfs: move btrfs_{set,clear}_and_info() to ctree.h Signed-off-by: Wang Shilong Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 14 ++++++++++++++ fs/btrfs/super.c | 14 -------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ad1a5943a923..e7c9e1c540f4 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2058,6 +2058,20 @@ struct btrfs_ioctl_defrag_range_args { #define btrfs_raw_test_opt(o, opt) ((o) & BTRFS_MOUNT_##opt) #define btrfs_test_opt(root, opt) ((root)->fs_info->mount_opt & \ BTRFS_MOUNT_##opt) +#define btrfs_set_and_info(root, opt, fmt, args...) \ +{ \ + if (!btrfs_test_opt(root, opt)) \ + btrfs_info(root->fs_info, fmt, ##args); \ + btrfs_set_opt(root->fs_info->mount_opt, opt); \ +} + +#define btrfs_clear_and_info(root, opt, fmt, args...) \ +{ \ + if (btrfs_test_opt(root, opt)) \ + btrfs_info(root->fs_info, fmt, ##args); \ + btrfs_clear_opt(root->fs_info->mount_opt, opt); \ +} + /* * Inode flags */ diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 53bc3733d483..363404b9713f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -385,20 +385,6 @@ static match_table_t tokens = { {Opt_err, NULL}, }; -#define btrfs_set_and_info(root, opt, fmt, args...) \ -{ \ - if (!btrfs_test_opt(root, opt)) \ - btrfs_info(root->fs_info, fmt, ##args); \ - btrfs_set_opt(root->fs_info->mount_opt, opt); \ -} - -#define btrfs_clear_and_info(root, opt, fmt, args...) \ -{ \ - if (btrfs_test_opt(root, opt)) \ - btrfs_info(root->fs_info, fmt, ##args); \ - btrfs_clear_opt(root->fs_info->mount_opt, opt); \ -} - /* * Regular mount options parser. Everything that is needed only when * reading in a new superblock is parsed here. -- cgit v1.2.3-70-g09d2 From e60efa84252c059bde5f65fccc6af94478d39e3b Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 23 Apr 2014 19:33:34 +0800 Subject: Btrfs: avoid triggering bug_on() when we fail to start inode caching task When running stress test(including snapshots,balance,fstress), we trigger the following BUG_ON() which is because we fail to start inode caching task. [ 181.131945] kernel BUG at fs/btrfs/inode-map.c:179! [ 181.137963] invalid opcode: 0000 [#1] SMP [ 181.217096] CPU: 11 PID: 2532 Comm: btrfs Not tainted 3.14.0 #1 [ 181.240521] task: ffff88013b621b30 ti: ffff8800b6ada000 task.ti: ffff8800b6ada000 [ 181.367506] Call Trace: [ 181.371107] [] btrfs_return_ino+0x9e/0x110 [btrfs] [ 181.379191] [] btrfs_evict_inode+0x46b/0x4c0 [btrfs] [ 181.387464] [] ? autoremove_wake_function+0x40/0x40 [ 181.395642] [] evict+0x9e/0x190 [ 181.401882] [] iput+0xf3/0x180 [ 181.408025] [] btrfs_orphan_cleanup+0x1ee/0x430 [btrfs] [ 181.416614] [] btrfs_mksubvol.isra.29+0x3bd/0x450 [btrfs] [ 181.425399] [] btrfs_ioctl_snap_create_transid+0x186/0x190 [btrfs] [ 181.435059] [] btrfs_ioctl_snap_create_v2+0xeb/0x130 [btrfs] [ 181.444148] [] btrfs_ioctl+0xf76/0x2b90 [btrfs] [ 181.451971] [] ? handle_mm_fault+0x475/0xe80 [ 181.459509] [] ? __do_page_fault+0x1ec/0x520 [ 181.467046] [] ? do_mmap_pgoff+0x2f5/0x3c0 [ 181.474393] [] do_vfs_ioctl+0x2d8/0x4b0 [ 181.481450] [] SyS_ioctl+0x81/0xa0 [ 181.488021] [] system_call_fastpath+0x16/0x1b We should avoid triggering BUG_ON() here, instead, we output warning messages and clear inode_cache option. Signed-off-by: Wang Shilong Signed-off-by: Chris Mason --- fs/btrfs/inode-map.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index cc8ca193d830..8ad529e3e67e 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -176,7 +176,11 @@ static void start_caching(struct btrfs_root *root) tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n", root->root_key.objectid); - BUG_ON(IS_ERR(tsk)); /* -ENOMEM */ + if (IS_ERR(tsk)) { + btrfs_warn(root->fs_info, "failed to start inode caching task"); + btrfs_clear_and_info(root, CHANGE_INODE_CACHE, + "disabling inode map caching"); + } } int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid) -- cgit v1.2.3-70-g09d2 From 28c16cbbc32781224309e50cc99c684f2498bc59 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 23 Apr 2014 19:33:35 +0800 Subject: Btrfs: fix possible memory leaks in open_ctree() Fix possible memory leaks in the following error handling paths: read_tree_block() btrfs_recover_log_trees btrfs_commit_super() btrfs_find_orphan_roots() btrfs_cleanup_fs_roots() Signed-off-by: Wang Shilong Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6d1ac7d46f81..0e4fb4a5c7f0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2864,7 +2864,7 @@ retry_root_backup: printk(KERN_ERR "BTRFS: failed to read log tree\n"); free_extent_buffer(log_tree_root->node); kfree(log_tree_root); - goto fail_trans_kthread; + goto fail_qgroup; } /* returns with log_tree_root freed on success */ ret = btrfs_recover_log_trees(log_tree_root); @@ -2873,24 +2873,24 @@ retry_root_backup: "Failed to recover log tree"); free_extent_buffer(log_tree_root->node); kfree(log_tree_root); - goto fail_trans_kthread; + goto fail_qgroup; } if (sb->s_flags & MS_RDONLY) { ret = btrfs_commit_super(tree_root); if (ret) - goto fail_trans_kthread; + goto fail_qgroup; } } ret = btrfs_find_orphan_roots(tree_root); if (ret) - goto fail_trans_kthread; + goto fail_qgroup; if (!(sb->s_flags & MS_RDONLY)) { ret = btrfs_cleanup_fs_roots(fs_info); if (ret) - goto fail_trans_kthread; + goto fail_qgroup; ret = btrfs_recover_relocation(tree_root); if (ret < 0) { -- cgit v1.2.3-70-g09d2 From 1c70d8fb4dfa95bee491816b2a6767b5ca1080e7 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 23 Apr 2014 19:33:36 +0800 Subject: Btrfs: fix inode caching vs tree log Currently, with inode cache enabled, we will reuse its inode id immediately after unlinking file, we may hit something like following: |->iput inode |->return inode id into inode cache |->create dir,fsync |->power off An easy way to reproduce this problem is: mkfs.btrfs -f /dev/sdb mount /dev/sdb /mnt -o inode_cache,commit=100 dd if=/dev/zero of=/mnt/data bs=1M count=10 oflag=sync inode_id=`ls -i /mnt/data | awk '{print $1}'` rm -f /mnt/data i=1 while [ 1 ] do mkdir /mnt/dir_$i test1=`stat /mnt/dir_$i | grep Inode: | awk '{print $4}'` if [ $test1 -eq $inode_id ] then dd if=/dev/zero of=/mnt/dir_$i/data bs=1M count=1 oflag=sync echo b > /proc/sysrq-trigger fi sleep 1 i=$(($i+1)) done mount /dev/sdb /mnt umount /dev/sdb btrfs check /dev/sdb We fix this problem by adding unlinked inode's id into pinned tree, and we can not reuse them until committing transaction. Cc: stable@vger.kernel.org Signed-off-by: Miao Xie Signed-off-by: Wang Shilong Signed-off-by: Chris Mason --- fs/btrfs/inode-map.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 8ad529e3e67e..86935f5ae291 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -209,24 +209,14 @@ again: void btrfs_return_ino(struct btrfs_root *root, u64 objectid) { - struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; struct btrfs_free_space_ctl *pinned = root->free_ino_pinned; if (!btrfs_test_opt(root, INODE_MAP_CACHE)) return; - again: if (root->cached == BTRFS_CACHE_FINISHED) { - __btrfs_add_free_space(ctl, objectid, 1); + __btrfs_add_free_space(pinned, objectid, 1); } else { - /* - * If we are in the process of caching free ino chunks, - * to avoid adding the same inode number to the free_ino - * tree twice due to cross transaction, we'll leave it - * in the pinned tree until a transaction is committed - * or the caching work is done. - */ - down_write(&root->fs_info->commit_root_sem); spin_lock(&root->cache_lock); if (root->cached == BTRFS_CACHE_FINISHED) { @@ -238,11 +228,7 @@ again: start_caching(root); - if (objectid <= root->cache_progress || - objectid >= root->highest_objectid) - __btrfs_add_free_space(ctl, objectid, 1); - else - __btrfs_add_free_space(pinned, objectid, 1); + __btrfs_add_free_space(pinned, objectid, 1); up_write(&root->fs_info->commit_root_sem); } -- cgit v1.2.3-70-g09d2 From 9ce49a0b4ff7f13961d8d106ffae959823d2e758 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 24 Apr 2014 15:15:28 +0100 Subject: Btrfs: use correct key when repeating search for extent item If skinny metadata is enabled and our first tree search fails to find a skinny extent item, we may repeat a tree search for a "fat" extent item (if the previous item in the leaf is not the "fat" extent we're looking for). However we were not setting the new key's objectid to the right value, as we previously used the same key variable to peek at the previous item in the leaf, which has a different objectid. So just set the right objectid to avoid modifying/deleting a wrong item if we repeat the tree search. Signed-off-by: Filipe David Borba Manana Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1306487c82cf..678cb352902c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1542,6 +1542,7 @@ again: ret = 0; } if (ret) { + key.objectid = bytenr; key.type = BTRFS_EXTENT_ITEM_KEY; key.offset = num_bytes; btrfs_release_path(path); @@ -5719,6 +5720,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, if (ret > 0 && skinny_metadata) { skinny_metadata = false; + key.objectid = bytenr; key.type = BTRFS_EXTENT_ITEM_KEY; key.offset = num_bytes; btrfs_release_path(path); -- cgit v1.2.3-70-g09d2 From f8213bdc89719bad895a02c62c4a85066ff76720 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 24 Apr 2014 15:15:29 +0100 Subject: Btrfs: correctly set profile flags on seqlock retry If we had to retry on the profiles seqlock (due to a concurrent write), we would set bits on the input flags that corresponded both to the current profile and to previous values of the profile. Signed-off-by: Filipe David Borba Manana Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 678cb352902c..5590af92094b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3543,11 +3543,13 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) return extended_to_chunk(flags | tmp); } -static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) +static u64 get_alloc_profile(struct btrfs_root *root, u64 orig_flags) { unsigned seq; + u64 flags; do { + flags = orig_flags; seq = read_seqbegin(&root->fs_info->profiles_lock); if (flags & BTRFS_BLOCK_GROUP_DATA) -- cgit v1.2.3-70-g09d2 From cfd4a535b68faf651b238586011f5bae128391c4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 26 Apr 2014 05:02:03 -0700 Subject: Btrfs: limit the path size in send to PATH_MAX fs_path_ensure_buf is used to make sure our path buffers for send are big enough for the path names as we construct them. The buffer size is limited to 32K by the length field in the struct. But bugs in the path construction can end up trying to build a huge buffer, and we'll do invalid memmmoves when the buffer length field wraps. This patch is step one, preventing the overflows. Signed-off-by: Chris Mason --- fs/btrfs/send.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 1ac3ca98c429..eb6537a08c1b 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -349,6 +349,11 @@ static int fs_path_ensure_buf(struct fs_path *p, int len) if (p->buf_len >= len) return 0; + if (len > PATH_MAX) { + WARN_ON(1); + return -ENOMEM; + } + path_len = p->end - p->start; old_buf_len = p->buf_len; -- cgit v1.2.3-70-g09d2