summaryrefslogtreecommitdiff
path: root/fs/btrfs/delayed-inode.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-09-12 11:28:00 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-09-12 11:28:00 -0700
commit3669558bdf354cd352be955ef2764cde6a9bf5ec (patch)
tree4a17f3cf6733942ec5e63e23ce2870f3600afda0 /fs/btrfs/delayed-inode.c
parent2c758cef66865310b59959679dbd5d450174d15d (diff)
parent5facccc9402301d67d48bef06159b91f7e41efc0 (diff)
Merge tag 'for-6.6-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba: - several fixes for handling directory item (inserting, removing, iteration, error handling) - fix transaction commit stalls when auto relocation is running and blocks other tasks that want to commit - fix a build error when DEBUG is enabled - fix lockdep warning in inode number lookup ioctl - fix race when finishing block group creation - remove link to obsolete wiki in several files * tag 'for-6.6-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: MAINTAINERS: remove links to obsolete btrfs.wiki.kernel.org btrfs: assert delayed node locked when removing delayed item btrfs: remove BUG() after failure to insert delayed dir index item btrfs: improve error message after failure to add delayed dir index item btrfs: fix a compilation error if DEBUG is defined in btree_dirty_folio btrfs: check for BTRFS_FS_ERROR in pending ordered assert btrfs: fix lockdep splat and potential deadlock after failure running delayed items btrfs: do not block starts waiting on previous transaction commit btrfs: release path before inode lookup during the ino lookup ioctl btrfs: fix race between finishing block group creation and its item update
Diffstat (limited to 'fs/btrfs/delayed-inode.c')
-rw-r--r--fs/btrfs/delayed-inode.c104
1 files changed, 71 insertions, 33 deletions
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 53c1211dd60b..caf0bbd028d1 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -412,6 +412,7 @@ static void finish_one_item(struct btrfs_delayed_root *delayed_root)
static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
{
+ struct btrfs_delayed_node *delayed_node = delayed_item->delayed_node;
struct rb_root_cached *root;
struct btrfs_delayed_root *delayed_root;
@@ -419,18 +420,21 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
if (RB_EMPTY_NODE(&delayed_item->rb_node))
return;
- delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
+ /* If it's in a rbtree, then we need to have delayed node locked. */
+ lockdep_assert_held(&delayed_node->mutex);
+
+ delayed_root = delayed_node->root->fs_info->delayed_root;
BUG_ON(!delayed_root);
if (delayed_item->type == BTRFS_DELAYED_INSERTION_ITEM)
- root = &delayed_item->delayed_node->ins_root;
+ root = &delayed_node->ins_root;
else
- root = &delayed_item->delayed_node->del_root;
+ root = &delayed_node->del_root;
rb_erase_cached(&delayed_item->rb_node, root);
RB_CLEAR_NODE(&delayed_item->rb_node);
- delayed_item->delayed_node->count--;
+ delayed_node->count--;
finish_one_item(delayed_root);
}
@@ -1153,20 +1157,33 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
ret = __btrfs_commit_inode_delayed_items(trans, path,
curr_node);
if (ret) {
- btrfs_release_delayed_node(curr_node);
- curr_node = NULL;
btrfs_abort_transaction(trans, ret);
break;
}
prev_node = curr_node;
curr_node = btrfs_next_delayed_node(curr_node);
+ /*
+ * See the comment below about releasing path before releasing
+ * node. If the commit of delayed items was successful the path
+ * should always be released, but in case of an error, it may
+ * point to locked extent buffers (a leaf at the very least).
+ */
+ ASSERT(path->nodes[0] == NULL);
btrfs_release_delayed_node(prev_node);
}
+ /*
+ * Release the path to avoid a potential deadlock and lockdep splat when
+ * releasing the delayed node, as that requires taking the delayed node's
+ * mutex. If another task starts running delayed items before we take
+ * the mutex, it will first lock the mutex and then it may try to lock
+ * the same btree path (leaf).
+ */
+ btrfs_free_path(path);
+
if (curr_node)
btrfs_release_delayed_node(curr_node);
- btrfs_free_path(path);
trans->block_rsv = block_rsv;
return ret;
@@ -1413,7 +1430,29 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
}
-/* Will return 0 or -ENOMEM */
+static void btrfs_release_dir_index_item_space(struct btrfs_trans_handle *trans)
+{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
+
+ if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
+ return;
+
+ /*
+ * Adding the new dir index item does not require touching another
+ * leaf, so we can release 1 unit of metadata that was previously
+ * reserved when starting the transaction. This applies only to
+ * the case where we had a transaction start and excludes the
+ * transaction join case (when replaying log trees).
+ */
+ trace_btrfs_space_reservation(fs_info, "transaction",
+ trans->transid, bytes, 0);
+ btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
+ ASSERT(trans->bytes_reserved >= bytes);
+ trans->bytes_reserved -= bytes;
+}
+
+/* Will return 0, -ENOMEM or -EEXIST (index number collision, unexpected). */
int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
const char *name, int name_len,
struct btrfs_inode *dir,
@@ -1455,6 +1494,27 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
mutex_lock(&delayed_node->mutex);
+ /*
+ * First attempt to insert the delayed item. This is to make the error
+ * handling path simpler in case we fail (-EEXIST). There's no risk of
+ * any other task coming in and running the delayed item before we do
+ * the metadata space reservation below, because we are holding the
+ * delayed node's mutex and that mutex must also be locked before the
+ * node's delayed items can be run.
+ */
+ ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
+ if (unlikely(ret)) {
+ btrfs_err(trans->fs_info,
+"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d",
+ name_len, name, index, btrfs_root_id(delayed_node->root),
+ delayed_node->inode_id, dir->index_cnt,
+ delayed_node->index_cnt, ret);
+ btrfs_release_delayed_item(delayed_item);
+ btrfs_release_dir_index_item_space(trans);
+ mutex_unlock(&delayed_node->mutex);
+ goto release_node;
+ }
+
if (delayed_node->index_item_leaves == 0 ||
delayed_node->curr_index_batch_size + data_len > leaf_data_size) {
delayed_node->curr_index_batch_size = data_len;
@@ -1472,36 +1532,14 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
* impossible.
*/
if (WARN_ON(ret)) {
- mutex_unlock(&delayed_node->mutex);
btrfs_release_delayed_item(delayed_item);
+ mutex_unlock(&delayed_node->mutex);
goto release_node;
}
delayed_node->index_item_leaves++;
- } else if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
- const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
-
- /*
- * Adding the new dir index item does not require touching another
- * leaf, so we can release 1 unit of metadata that was previously
- * reserved when starting the transaction. This applies only to
- * the case where we had a transaction start and excludes the
- * transaction join case (when replaying log trees).
- */
- trace_btrfs_space_reservation(fs_info, "transaction",
- trans->transid, bytes, 0);
- btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
- ASSERT(trans->bytes_reserved >= bytes);
- trans->bytes_reserved -= bytes;
- }
-
- ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
- if (unlikely(ret)) {
- btrfs_err(trans->fs_info,
- "err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
- name_len, name, delayed_node->root->root_key.objectid,
- delayed_node->inode_id, ret);
- BUG();
+ } else {
+ btrfs_release_dir_index_item_space(trans);
}
mutex_unlock(&delayed_node->mutex);