diff options
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/block-rsv.c | 3 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 5 | ||||
-rw-r--r-- | fs/btrfs/dev-replace.c | 26 | ||||
-rw-r--r-- | fs/btrfs/file.c | 57 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 58 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 10 | ||||
-rw-r--r-- | fs/btrfs/qgroup.c | 100 | ||||
-rw-r--r-- | fs/btrfs/ref-verify.c | 1 | ||||
-rw-r--r-- | fs/btrfs/relocation.c | 4 | ||||
-rw-r--r-- | fs/btrfs/scrub.c | 5 | ||||
-rw-r--r-- | fs/btrfs/tests/inode-tests.c | 12 | ||||
-rw-r--r-- | fs/btrfs/tree-checker.c | 3 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 34 |
13 files changed, 206 insertions, 112 deletions
diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c index 7e1549a84fcc..bc920afe23bf 100644 --- a/fs/btrfs/block-rsv.c +++ b/fs/btrfs/block-rsv.c @@ -511,7 +511,8 @@ again: /*DEFAULT_RATELIMIT_BURST*/ 1); if (__ratelimit(&_rs)) WARN(1, KERN_DEBUG - "BTRFS: block rsv returned %d\n", ret); + "BTRFS: block rsv %d returned %d\n", + block_rsv->type, ret); } try_reserve: ret = btrfs_reserve_metadata_bytes(root, block_rsv, blocksize, diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0378933d163c..0b29bdb25105 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -878,7 +878,10 @@ struct btrfs_fs_info { */ struct ulist *qgroup_ulist; - /* protect user change for quota operations */ + /* + * Protect user change for quota operations. If a transaction is needed, + * it must be started before locking this lock. + */ struct mutex qgroup_ioctl_lock; /* list of dirty qgroups to be written at next commit */ diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 5b9e3f3ace22..10638537b9ef 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -91,6 +91,17 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info) ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); if (ret) { no_valid_dev_replace_entry_found: + /* + * We don't have a replace item or it's corrupted. If there is + * a replace target, fail the mount. + */ + if (btrfs_find_device(fs_info->fs_devices, + BTRFS_DEV_REPLACE_DEVID, NULL, NULL, false)) { + btrfs_err(fs_info, + "found replace target device without a valid replace item"); + ret = -EUCLEAN; + goto out; + } ret = 0; dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED; @@ -143,8 +154,19 @@ no_valid_dev_replace_entry_found: case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: - dev_replace->srcdev = NULL; - dev_replace->tgtdev = NULL; + /* + * We don't have an active replace item but if there is a + * replace target, fail the mount. + */ + if (btrfs_find_device(fs_info->fs_devices, + BTRFS_DEV_REPLACE_DEVID, NULL, NULL, false)) { + btrfs_err(fs_info, + "replace devid present without an active replace item"); + ret = -EUCLEAN; + } else { + dev_replace->srcdev = NULL; + dev_replace->tgtdev = NULL; + } break; case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 87355a38a654..4373da7bcc0d 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -452,46 +452,6 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages) } } -static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode, - const u64 start, - const u64 len, - struct extent_state **cached_state) -{ - u64 search_start = start; - const u64 end = start + len - 1; - - while (search_start < end) { - const u64 search_len = end - search_start + 1; - struct extent_map *em; - u64 em_len; - int ret = 0; - - em = btrfs_get_extent(inode, NULL, 0, search_start, search_len); - if (IS_ERR(em)) - return PTR_ERR(em); - - if (em->block_start != EXTENT_MAP_HOLE) - goto next; - - em_len = em->len; - if (em->start < search_start) - em_len -= search_start - em->start; - if (em_len > search_len) - em_len = search_len; - - ret = set_extent_bit(&inode->io_tree, search_start, - search_start + em_len - 1, - EXTENT_DELALLOC_NEW, - NULL, cached_state, GFP_NOFS); -next: - search_start = extent_map_end(em); - free_extent_map(em); - if (ret) - return ret; - } - return 0; -} - /* * after copy_from_user, pages need to be dirtied and we need to make * sure holes are created between the current EOF and the start of @@ -528,23 +488,6 @@ int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages, EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, cached); - if (!btrfs_is_free_space_inode(inode)) { - if (start_pos >= isize && - !(inode->flags & BTRFS_INODE_PREALLOC)) { - /* - * There can't be any extents following eof in this case - * so just set the delalloc new bit for the range - * directly. - */ - extra_bits |= EXTENT_DELALLOC_NEW; - } else { - err = btrfs_find_new_delalloc_bytes(inode, start_pos, - num_bytes, cached); - if (err) - return err; - } - } - err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, extra_bits, cached); if (err) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index da58c58ef9aa..7e8d8169779d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2253,11 +2253,69 @@ static int add_pending_csums(struct btrfs_trans_handle *trans, return 0; } +static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode, + const u64 start, + const u64 len, + struct extent_state **cached_state) +{ + u64 search_start = start; + const u64 end = start + len - 1; + + while (search_start < end) { + const u64 search_len = end - search_start + 1; + struct extent_map *em; + u64 em_len; + int ret = 0; + + em = btrfs_get_extent(inode, NULL, 0, search_start, search_len); + if (IS_ERR(em)) + return PTR_ERR(em); + + if (em->block_start != EXTENT_MAP_HOLE) + goto next; + + em_len = em->len; + if (em->start < search_start) + em_len -= search_start - em->start; + if (em_len > search_len) + em_len = search_len; + + ret = set_extent_bit(&inode->io_tree, search_start, + search_start + em_len - 1, + EXTENT_DELALLOC_NEW, + NULL, cached_state, GFP_NOFS); +next: + search_start = extent_map_end(em); + free_extent_map(em); + if (ret) + return ret; + } + return 0; +} + int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end, unsigned int extra_bits, struct extent_state **cached_state) { WARN_ON(PAGE_ALIGNED(end)); + + if (start >= i_size_read(&inode->vfs_inode) && + !(inode->flags & BTRFS_INODE_PREALLOC)) { + /* + * There can't be any extents following eof in this case so just + * set the delalloc new bit for the range directly. + */ + extra_bits |= EXTENT_DELALLOC_NEW; + } else { + int ret; + + ret = btrfs_find_new_delalloc_bytes(inode, start, + end + 1 - start, + cached_state); + if (ret) + return ret; + } + return set_extent_delalloc(&inode->io_tree, start, end, extra_bits, cached_state); } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ab408a23ba32..69a384145dc6 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1274,6 +1274,7 @@ static int cluster_pages_for_defrag(struct inode *inode, u64 page_start; u64 page_end; u64 page_cnt; + u64 start = (u64)start_index << PAGE_SHIFT; int ret; int i; int i_done; @@ -1290,8 +1291,7 @@ static int cluster_pages_for_defrag(struct inode *inode, page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1); ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved, - start_index << PAGE_SHIFT, - page_cnt << PAGE_SHIFT); + start, page_cnt << PAGE_SHIFT); if (ret) return ret; i_done = 0; @@ -1380,8 +1380,7 @@ again: btrfs_mod_outstanding_extents(BTRFS_I(inode), 1); spin_unlock(&BTRFS_I(inode)->lock); btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, - start_index << PAGE_SHIFT, - (page_cnt - i_done) << PAGE_SHIFT, true); + start, (page_cnt - i_done) << PAGE_SHIFT, true); } @@ -1408,8 +1407,7 @@ out: put_page(pages[i]); } btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, - start_index << PAGE_SHIFT, - page_cnt << PAGE_SHIFT, true); + start, page_cnt << PAGE_SHIFT, true); btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT); extent_changeset_free(data_reserved); return ret; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index c54ea6586632..87bd37b70738 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -11,6 +11,7 @@ #include <linux/slab.h> #include <linux/workqueue.h> #include <linux/btrfs.h> +#include <linux/sched/mm.h> #include "ctree.h" #include "transaction.h" @@ -497,13 +498,13 @@ next2: break; } out: + btrfs_free_path(path); fs_info->qgroup_flags |= flags; if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN && ret >= 0) ret = qgroup_rescan_init(fs_info, rescan_progress, 0); - btrfs_free_path(path); if (ret < 0) { ulist_free(fs_info->qgroup_ulist); @@ -936,6 +937,7 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info) struct btrfs_key found_key; struct btrfs_qgroup *qgroup = NULL; struct btrfs_trans_handle *trans = NULL; + struct ulist *ulist = NULL; int ret = 0; int slot; @@ -943,8 +945,8 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info) if (fs_info->quota_root) goto out; - fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); - if (!fs_info->qgroup_ulist) { + ulist = ulist_alloc(GFP_KERNEL); + if (!ulist) { ret = -ENOMEM; goto out; } @@ -952,6 +954,22 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info) ret = btrfs_sysfs_add_qgroups(fs_info); if (ret < 0) goto out; + + /* + * Unlock qgroup_ioctl_lock before starting the transaction. This is to + * avoid lock acquisition inversion problems (reported by lockdep) between + * qgroup_ioctl_lock and the vfs freeze semaphores, acquired when we + * start a transaction. + * After we started the transaction lock qgroup_ioctl_lock again and + * check if someone else created the quota root in the meanwhile. If so, + * just return success and release the transaction handle. + * + * Also we don't need to worry about someone else calling + * btrfs_sysfs_add_qgroups() after we unlock and getting an error because + * that function returns 0 (success) when the sysfs entries already exist. + */ + mutex_unlock(&fs_info->qgroup_ioctl_lock); + /* * 1 for quota root item * 1 for BTRFS_QGROUP_STATUS item @@ -961,12 +979,20 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info) * would be a lot of overkill. */ trans = btrfs_start_transaction(tree_root, 2); + + mutex_lock(&fs_info->qgroup_ioctl_lock); if (IS_ERR(trans)) { ret = PTR_ERR(trans); trans = NULL; goto out; } + if (fs_info->quota_root) + goto out; + + fs_info->qgroup_ulist = ulist; + ulist = NULL; + /* * initially create the quota tree */ @@ -1124,11 +1150,14 @@ out: if (ret) { ulist_free(fs_info->qgroup_ulist); fs_info->qgroup_ulist = NULL; - if (trans) - btrfs_end_transaction(trans); btrfs_sysfs_del_qgroups(fs_info); } mutex_unlock(&fs_info->qgroup_ioctl_lock); + if (ret && trans) + btrfs_end_transaction(trans); + else if (trans) + ret = btrfs_end_transaction(trans); + ulist_free(ulist); return ret; } @@ -1141,19 +1170,29 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) mutex_lock(&fs_info->qgroup_ioctl_lock); if (!fs_info->quota_root) goto out; + mutex_unlock(&fs_info->qgroup_ioctl_lock); /* * 1 For the root item * * We should also reserve enough items for the quota tree deletion in * btrfs_clean_quota_tree but this is not done. + * + * Also, we must always start a transaction without holding the mutex + * qgroup_ioctl_lock, see btrfs_quota_enable(). */ trans = btrfs_start_transaction(fs_info->tree_root, 1); + + mutex_lock(&fs_info->qgroup_ioctl_lock); if (IS_ERR(trans)) { ret = PTR_ERR(trans); + trans = NULL; goto out; } + if (!fs_info->quota_root) + goto out; + clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); btrfs_qgroup_wait_for_completion(fs_info, false); spin_lock(&fs_info->qgroup_lock); @@ -1167,13 +1206,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) ret = btrfs_clean_quota_tree(trans, quota_root); if (ret) { btrfs_abort_transaction(trans, ret); - goto end_trans; + goto out; } ret = btrfs_del_root(trans, "a_root->root_key); if (ret) { btrfs_abort_transaction(trans, ret); - goto end_trans; + goto out; } list_del("a_root->dirty_list); @@ -1185,10 +1224,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) btrfs_put_root(quota_root); -end_trans: - ret = btrfs_end_transaction(trans); out: mutex_unlock(&fs_info->qgroup_ioctl_lock); + if (ret && trans) + btrfs_end_transaction(trans); + else if (trans) + ret = btrfs_end_transaction(trans); + return ret; } @@ -1324,13 +1366,17 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, struct btrfs_qgroup *member; struct btrfs_qgroup_list *list; struct ulist *tmp; + unsigned int nofs_flag; int ret = 0; /* Check the level of src and dst first */ if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) return -EINVAL; + /* We hold a transaction handle open, must do a NOFS allocation. */ + nofs_flag = memalloc_nofs_save(); tmp = ulist_alloc(GFP_KERNEL); + memalloc_nofs_restore(nofs_flag); if (!tmp) return -ENOMEM; @@ -1387,10 +1433,14 @@ static int __del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, struct btrfs_qgroup_list *list; struct ulist *tmp; bool found = false; + unsigned int nofs_flag; int ret = 0; int ret2; + /* We hold a transaction handle open, must do a NOFS allocation. */ + nofs_flag = memalloc_nofs_save(); tmp = ulist_alloc(GFP_KERNEL); + memalloc_nofs_restore(nofs_flag); if (!tmp) return -ENOMEM; @@ -3435,24 +3485,20 @@ static int qgroup_unreserve_range(struct btrfs_inode *inode, { struct rb_node *node; struct rb_node *next; - struct ulist_node *entry = NULL; + struct ulist_node *entry; int ret = 0; node = reserved->range_changed.root.rb_node; + if (!node) + return 0; while (node) { entry = rb_entry(node, struct ulist_node, rb_node); if (entry->val < start) node = node->rb_right; - else if (entry) - node = node->rb_left; else - break; + node = node->rb_left; } - /* Empty changeset */ - if (!entry) - return 0; - if (entry->val > start && rb_prev(&entry->rb_node)) entry = rb_entry(rb_prev(&entry->rb_node), struct ulist_node, rb_node); @@ -3516,6 +3562,7 @@ static int try_flush_qgroup(struct btrfs_root *root) { struct btrfs_trans_handle *trans; int ret; + bool can_commit = true; /* * We don't want to run flush again and again, so if there is a running @@ -3527,6 +3574,20 @@ static int try_flush_qgroup(struct btrfs_root *root) return 0; } + /* + * If current process holds a transaction, we shouldn't flush, as we + * assume all space reservation happens before a transaction handle is + * held. + * + * But there are cases like btrfs_delayed_item_reserve_metadata() where + * we try to reserve space with one transction handle already held. + * In that case we can't commit transaction, but at least try to end it + * and hope the started data writes can free some space. + */ + if (current->journal_info && + current->journal_info != BTRFS_SEND_TRANS_STUB) + can_commit = false; + ret = btrfs_start_delalloc_snapshot(root); if (ret < 0) goto out; @@ -3538,7 +3599,10 @@ static int try_flush_qgroup(struct btrfs_root *root) goto out; } - ret = btrfs_commit_transaction(trans); + if (can_commit) + ret = btrfs_commit_transaction(trans); + else + ret = btrfs_end_transaction(trans); out: clear_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state); wake_up(&root->qgroup_flush_wait); diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index 7f03dbe5b609..78693d3dd15b 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -860,6 +860,7 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info, "dropping a ref for a root that doesn't have a ref on the block"); dump_block_entry(fs_info, be); dump_ref_action(fs_info, ra); + kfree(ref); kfree(ra); goto out_unlock; } diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 3602806d71bd..9ba92d86da0b 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1648,6 +1648,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, struct btrfs_root_item *root_item; struct btrfs_path *path; struct extent_buffer *leaf; + int reserve_level; int level; int max_level; int replaced = 0; @@ -1696,7 +1697,8 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, * Thus the needed metadata size is at most root_level * nodesize, * and * 2 since we have two trees to COW. */ - min_reserved = fs_info->nodesize * btrfs_root_level(root_item) * 2; + reserve_level = max_t(int, 1, btrfs_root_level(root_item)); + min_reserved = fs_info->nodesize * reserve_level * 2; memset(&next_key, 0, sizeof(next_key)); while (1) { diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index cf63f1e27a27..e71e7586e9eb 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3866,8 +3866,9 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, if (!is_dev_replace && !readonly && !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) { mutex_unlock(&fs_info->fs_devices->device_list_mutex); - btrfs_err_in_rcu(fs_info, "scrub: device %s is not writable", - rcu_str_deref(dev->name)); + btrfs_err_in_rcu(fs_info, + "scrub on devid %llu: filesystem on %s is not writable", + devid, rcu_str_deref(dev->name)); ret = -EROFS; goto out; } diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index e6719f7db386..04022069761d 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -983,7 +983,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, BTRFS_MAX_EXTENT_SIZE >> 1, (BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1, - EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL); + EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | + EXTENT_UPTODATE, 0, 0, NULL); if (ret) { test_err("clear_extent_bit returned %d", ret); goto out; @@ -1050,7 +1051,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, BTRFS_MAX_EXTENT_SIZE + sectorsize, BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, - EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL); + EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | + EXTENT_UPTODATE, 0, 0, NULL); if (ret) { test_err("clear_extent_bit returned %d", ret); goto out; @@ -1082,7 +1084,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) /* Empty */ ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, - EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL); + EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | + EXTENT_UPTODATE, 0, 0, NULL); if (ret) { test_err("clear_extent_bit returned %d", ret); goto out; @@ -1097,7 +1100,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) out: if (ret) clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, - EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL); + EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | + EXTENT_UPTODATE, 0, 0, NULL); iput(inode); btrfs_free_dummy_root(root); btrfs_free_dummy_fs_info(fs_info); diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 8784b74f5232..ea2bb4cb5890 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -1068,6 +1068,7 @@ static int check_root_item(struct extent_buffer *leaf, struct btrfs_key *key, "invalid root item size, have %u expect %zu or %u", btrfs_item_size_nr(leaf, slot), sizeof(ri), btrfs_legacy_root_item_size()); + return -EUCLEAN; } /* @@ -1423,6 +1424,7 @@ static int check_extent_data_ref(struct extent_buffer *leaf, "invalid item size, have %u expect aligned to %zu for key type %u", btrfs_item_size_nr(leaf, slot), sizeof(*dref), key->type); + return -EUCLEAN; } if (!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize)) { generic_err(leaf, slot, @@ -1451,6 +1453,7 @@ static int check_extent_data_ref(struct extent_buffer *leaf, extent_err(leaf, slot, "invalid extent data backref offset, have %llu expect aligned to %u", offset, leaf->fs_info->sectorsize); + return -EUCLEAN; } } return 0; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b1e48078c318..78637665166e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -940,7 +940,13 @@ static noinline struct btrfs_device *device_list_add(const char *path, if (device->bdev != path_bdev) { bdput(path_bdev); mutex_unlock(&fs_devices->device_list_mutex); - btrfs_warn_in_rcu(device->fs_info, + /* + * device->fs_info may not be reliable here, so + * pass in a NULL instead. This avoids a + * possible use-after-free when the fs_info and + * fs_info->sb are already torn down. + */ + btrfs_warn_in_rcu(NULL, "duplicate device %s devid %llu generation %llu scanned by %s (%d)", path, devid, found_transid, current->comm, @@ -1056,22 +1062,13 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, continue; } - if (device->devid == BTRFS_DEV_REPLACE_DEVID) { - /* - * In the first step, keep the device which has - * the correct fsid and the devid that is used - * for the dev_replace procedure. - * In the second step, the dev_replace state is - * read from the device tree and it is known - * whether the procedure is really active or - * not, which means whether this device is - * used or whether it should be removed. - */ - if (step == 0 || test_bit(BTRFS_DEV_STATE_REPLACE_TGT, - &device->dev_state)) { - continue; - } - } + /* + * We have already validated the presence of BTRFS_DEV_REPLACE_DEVID, + * in btrfs_init_dev_replace() so just continue. + */ + if (device->devid == BTRFS_DEV_REPLACE_DEVID) + continue; + if (device->bdev) { blkdev_put(device->bdev, device->mode); device->bdev = NULL; @@ -1080,9 +1077,6 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { list_del_init(&device->dev_alloc_list); clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); - if (!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, - &device->dev_state)) - fs_devices->rw_devices--; } list_del_init(&device->dev_list); fs_devices->num_devices--; |