summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-03-12 12:28:34 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-03-12 12:28:34 -0700
commit43a7548e28a6df12a6170421d9d016c576010baa (patch)
treef956b1fc70acceca74f4c69502e592fcbad7ec77 /fs
parent35d4aeea10558d12022d752b20be371aced557da (diff)
parent1cab1375ba6d5337a25acb346996106c12bb2dd0 (diff)
Merge tag 'for-6.9-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "Mostly stabilization, refactoring and cleanup changes. There rest are minor performance optimizations due to caching or lock contention reduction and a few notable fixes. Performance improvements: - minor speedup in logging when repeatedly allocated structure is preallocated only once, improves latency and decreases lock contention - minor throughput increase (+6%), reduced lock contention after clearing delayed allocation bits, applies to several common workload types - skip full quota rescan if a new relation is added in the same transaction Fixes: - zstd fix for inline compressed file in subpage mode, updated version from the 6.8 time - proper qgroup inheritance ioctl parameter validation - more fiemap followup fixes after reduced locking done in 6.8: - fix race when detecting delalloc ranges Core changes: - more debugging code: - added assertions for a very rare crash in raid56 calculation - tree-checker dumps page state to give more insights into possible reference counting issues - add checksum calculation offloading sysfs knob, for now enabled under DEBUG only to determine a good heuristic for deciding the offload or synchronous, depends on various factors (block group profile, device speed) and is not as clear as initially thought (checksum type) - error handling improvements, added assertions - more page to folio conversion (defrag, truncate), cached size and shift - preparation for more fine grained locking of sectors in subpage mode - cleanups and refactoring: - include cleanups, forward declarations - pointer-to-structure helpers - redundant argument removals - removed unused code - slab cache updates, last use of SLAB_MEM_SPREAD removed" * tag 'for-6.9-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (114 commits) btrfs: reuse cloned extent buffer during fiemap to avoid re-allocations btrfs: fix race when detecting delalloc ranges during fiemap btrfs: fix off-by-one chunk length calculation at contains_pending_extent() btrfs: qgroup: allow quick inherit if snapshot is created and added to the same parent btrfs: qgroup: validate btrfs_qgroup_inherit parameter btrfs: include device major and minor numbers in the device scan notice btrfs: mark btrfs_put_caching_control() static btrfs: remove SLAB_MEM_SPREAD flag use btrfs: qgroup: always free reserved space for extent records btrfs: tree-checker: dump the page status if hit something wrong btrfs: compression: remove dead comments in btrfs_compress_heuristic() btrfs: subpage: make writer lock utilize bitmap btrfs: subpage: make reader lock utilize bitmap btrfs: unexport btrfs_subpage_start_writer() and btrfs_subpage_end_and_test_writer() btrfs: pass a valid extent map cache pointer to __get_extent_map() btrfs: merge btrfs_del_delalloc_inode() helpers btrfs: pass btrfs_device to btrfs_scratch_superblocks() btrfs: handle transaction commit errors in flush_reservations() btrfs: use KMEM_CACHE() to create btrfs_free_space cache btrfs: use KMEM_CACHE() to create delayed ref caches ...
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/accessors.c15
-rw-r--r--fs/btrfs/accessors.h50
-rw-r--r--fs/btrfs/acl.c1
-rw-r--r--fs/btrfs/acl.h11
-rw-r--r--fs/btrfs/async-thread.c1
-rw-r--r--fs/btrfs/async-thread.h3
-rw-r--r--fs/btrfs/backref.c119
-rw-r--r--fs/btrfs/backref.h136
-rw-r--r--fs/btrfs/bio.c17
-rw-r--r--fs/btrfs/bio.h2
-rw-r--r--fs/btrfs/block-group.c15
-rw-r--r--fs/btrfs/block-group.h14
-rw-r--r--fs/btrfs/block-rsv.c1
-rw-r--r--fs/btrfs/block-rsv.h7
-rw-r--r--fs/btrfs/btrfs_inode.h25
-rw-r--r--fs/btrfs/compression.c18
-rw-r--r--fs/btrfs/compression.h12
-rw-r--r--fs/btrfs/ctree.c10
-rw-r--r--fs/btrfs/ctree.h28
-rw-r--r--fs/btrfs/defrag.c104
-rw-r--r--fs/btrfs/defrag.h10
-rw-r--r--fs/btrfs/delalloc-space.c2
-rw-r--r--fs/btrfs/delalloc-space.h4
-rw-r--r--fs/btrfs/delayed-inode.c21
-rw-r--r--fs/btrfs/delayed-inode.h21
-rw-r--r--fs/btrfs/delayed-ref.c85
-rw-r--r--fs/btrfs/delayed-ref.h82
-rw-r--r--fs/btrfs/dev-replace.c5
-rw-r--r--fs/btrfs/dev-replace.h4
-rw-r--r--fs/btrfs/dir-item.h6
-rw-r--r--fs/btrfs/disk-io.c30
-rw-r--r--fs/btrfs/disk-io.h20
-rw-r--r--fs/btrfs/export.c12
-rw-r--r--fs/btrfs/export.h4
-rw-r--r--fs/btrfs/extent-io-tree.c6
-rw-r--r--fs/btrfs/extent-io-tree.h7
-rw-r--r--fs/btrfs/extent-tree.c51
-rw-r--r--fs/btrfs/extent-tree.h10
-rw-r--r--fs/btrfs/extent_io.c387
-rw-r--r--fs/btrfs/extent_io.h44
-rw-r--r--fs/btrfs/extent_map.c23
-rw-r--r--fs/btrfs/extent_map.h8
-rw-r--r--fs/btrfs/file-item.c6
-rw-r--r--fs/btrfs/file-item.h13
-rw-r--r--fs/btrfs/file.c43
-rw-r--r--fs/btrfs/file.h15
-rw-r--r--fs/btrfs/free-space-cache.c12
-rw-r--r--fs/btrfs/free-space-cache.h15
-rw-r--r--fs/btrfs/free-space-tree.c56
-rw-r--r--fs/btrfs/free-space-tree.h6
-rw-r--r--fs/btrfs/fs.h59
-rw-r--r--fs/btrfs/inode-item.c1
-rw-r--r--fs/btrfs/inode-item.h5
-rw-r--r--fs/btrfs/inode.c238
-rw-r--r--fs/btrfs/ioctl.c120
-rw-r--r--fs/btrfs/ioctl.h9
-rw-r--r--fs/btrfs/locking.c3
-rw-r--r--fs/btrfs/locking.h8
-rw-r--r--fs/btrfs/lru_cache.h7
-rw-r--r--fs/btrfs/lzo.c4
-rw-r--r--fs/btrfs/messages.c2
-rw-r--r--fs/btrfs/misc.h2
-rw-r--r--fs/btrfs/ordered-data.c6
-rw-r--r--fs/btrfs/ordered-data.h15
-rw-r--r--fs/btrfs/orphan.c1
-rw-r--r--fs/btrfs/orphan.h5
-rw-r--r--fs/btrfs/print-tree.h3
-rw-r--r--fs/btrfs/props.c3
-rw-r--r--fs/btrfs/props.h7
-rw-r--r--fs/btrfs/qgroup.c148
-rw-r--r--fs/btrfs/qgroup.h20
-rw-r--r--fs/btrfs/raid-stripe-tree.c1
-rw-r--r--fs/btrfs/raid-stripe-tree.h5
-rw-r--r--fs/btrfs/raid56.c31
-rw-r--r--fs/btrfs/raid56.h9
-rw-r--r--fs/btrfs/rcu-string.h6
-rw-r--r--fs/btrfs/ref-verify.h9
-rw-r--r--fs/btrfs/reflink.c12
-rw-r--r--fs/btrfs/reflink.h4
-rw-r--r--fs/btrfs/relocation.c5
-rw-r--r--fs/btrfs/relocation.h9
-rw-r--r--fs/btrfs/root-tree.c17
-rw-r--r--fs/btrfs/root-tree.h10
-rw-r--r--fs/btrfs/scrub.c9
-rw-r--r--fs/btrfs/scrub.h6
-rw-r--r--fs/btrfs/send.c64
-rw-r--r--fs/btrfs/send.h8
-rw-r--r--fs/btrfs/space-info.c1
-rw-r--r--fs/btrfs/space-info.h9
-rw-r--r--fs/btrfs/subpage.c74
-rw-r--r--fs/btrfs/subpage.h21
-rw-r--r--fs/btrfs/super.c9
-rw-r--r--fs/btrfs/super.h7
-rw-r--r--fs/btrfs/sysfs.c53
-rw-r--r--fs/btrfs/sysfs.h9
-rw-r--r--fs/btrfs/tests/extent-io-tests.c28
-rw-r--r--fs/btrfs/tests/inode-tests.c40
-rw-r--r--fs/btrfs/transaction.c19
-rw-r--r--fs/btrfs/transaction.h18
-rw-r--r--fs/btrfs/tree-checker.c8
-rw-r--r--fs/btrfs/tree-checker.h2
-rw-r--r--fs/btrfs/tree-log.c141
-rw-r--r--fs/btrfs/tree-log.h49
-rw-r--r--fs/btrfs/tree-mod-log.c13
-rw-r--r--fs/btrfs/tree-mod-log.h8
-rw-r--r--fs/btrfs/ulist.c1
-rw-r--r--fs/btrfs/ulist.h1
-rw-r--r--fs/btrfs/uuid-tree.c3
-rw-r--r--fs/btrfs/uuid-tree.h5
-rw-r--r--fs/btrfs/verity.c1
-rw-r--r--fs/btrfs/verity.h7
-rw-r--r--fs/btrfs/volumes.c98
-rw-r--r--fs/btrfs/volumes.h53
-rw-r--r--fs/btrfs/xattr.h6
-rw-r--r--fs/btrfs/zlib.c2
-rw-r--r--fs/btrfs/zoned.c2
-rw-r--r--fs/btrfs/zoned.h15
-rw-r--r--fs/btrfs/zstd.c77
118 files changed, 2131 insertions, 1117 deletions
diff --git a/fs/btrfs/accessors.c b/fs/btrfs/accessors.c
index 1925a0919ca6..79026917db19 100644
--- a/fs/btrfs/accessors.c
+++ b/fs/btrfs/accessors.c
@@ -5,7 +5,8 @@
#include <asm/unaligned.h>
#include "messages.h"
-#include "ctree.h"
+#include "extent_io.h"
+#include "fs.h"
#include "accessors.h"
static bool check_setget_bounds(const struct extent_buffer *eb,
@@ -63,8 +64,8 @@ u##bits btrfs_get_token_##bits(struct btrfs_map_token *token, \
const unsigned long idx = get_eb_folio_index(token->eb, member_offset); \
const unsigned long oil = get_eb_offset_in_folio(token->eb, \
member_offset);\
- const int unit_size = folio_size(token->eb->folios[0]); \
- const int unit_shift = folio_shift(token->eb->folios[0]); \
+ const int unit_size = token->eb->folio_size; \
+ const int unit_shift = token->eb->folio_shift; \
const int size = sizeof(u##bits); \
u8 lebytes[sizeof(u##bits)]; \
const int part = unit_size - oil; \
@@ -94,7 +95,7 @@ u##bits btrfs_get_##bits(const struct extent_buffer *eb, \
const unsigned long idx = get_eb_folio_index(eb, member_offset);\
const unsigned long oil = get_eb_offset_in_folio(eb, \
member_offset);\
- const int unit_size = folio_size(eb->folios[0]); \
+ const int unit_size = eb->folio_size; \
char *kaddr = folio_address(eb->folios[idx]); \
const int size = sizeof(u##bits); \
const int part = unit_size - oil; \
@@ -117,8 +118,8 @@ void btrfs_set_token_##bits(struct btrfs_map_token *token, \
const unsigned long idx = get_eb_folio_index(token->eb, member_offset); \
const unsigned long oil = get_eb_offset_in_folio(token->eb, \
member_offset);\
- const int unit_size = folio_size(token->eb->folios[0]); \
- const int unit_shift = folio_shift(token->eb->folios[0]); \
+ const int unit_size = token->eb->folio_size; \
+ const int unit_shift = token->eb->folio_shift; \
const int size = sizeof(u##bits); \
u8 lebytes[sizeof(u##bits)]; \
const int part = unit_size - oil; \
@@ -151,7 +152,7 @@ void btrfs_set_##bits(const struct extent_buffer *eb, void *ptr, \
const unsigned long idx = get_eb_folio_index(eb, member_offset);\
const unsigned long oil = get_eb_offset_in_folio(eb, \
member_offset);\
- const int unit_size = folio_size(eb->folios[0]); \
+ const int unit_size = eb->folio_size; \
char *kaddr = folio_address(eb->folios[idx]); \
const int size = sizeof(u##bits); \
const int part = unit_size - oil; \
diff --git a/fs/btrfs/accessors.h b/fs/btrfs/accessors.h
index ed7aa32972ad..6fce3e8d3dac 100644
--- a/fs/btrfs/accessors.h
+++ b/fs/btrfs/accessors.h
@@ -3,8 +3,17 @@
#ifndef BTRFS_ACCESSORS_H
#define BTRFS_ACCESSORS_H
-#include <linux/stddef.h>
#include <asm/unaligned.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
+#include <linux/align.h>
+#include <linux/build_bug.h>
+#include <linux/compiler.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <uapi/linux/btrfs_tree.h>
+
+struct extent_buffer;
struct btrfs_map_token {
struct extent_buffer *eb;
@@ -844,45 +853,6 @@ static inline void btrfs_set_balance_sys(struct extent_buffer *eb,
write_eb_member(eb, bi, struct btrfs_balance_item, sys, ba);
}
-static inline void btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu,
- const struct btrfs_disk_balance_args *disk)
-{
- memset(cpu, 0, sizeof(*cpu));
-
- cpu->profiles = le64_to_cpu(disk->profiles);
- cpu->usage = le64_to_cpu(disk->usage);
- cpu->devid = le64_to_cpu(disk->devid);
- cpu->pstart = le64_to_cpu(disk->pstart);
- cpu->pend = le64_to_cpu(disk->pend);
- cpu->vstart = le64_to_cpu(disk->vstart);
- cpu->vend = le64_to_cpu(disk->vend);
- cpu->target = le64_to_cpu(disk->target);
- cpu->flags = le64_to_cpu(disk->flags);
- cpu->limit = le64_to_cpu(disk->limit);
- cpu->stripes_min = le32_to_cpu(disk->stripes_min);
- cpu->stripes_max = le32_to_cpu(disk->stripes_max);
-}
-
-static inline void btrfs_cpu_balance_args_to_disk(
- struct btrfs_disk_balance_args *disk,
- const struct btrfs_balance_args *cpu)
-{
- memset(disk, 0, sizeof(*disk));
-
- disk->profiles = cpu_to_le64(cpu->profiles);
- disk->usage = cpu_to_le64(cpu->usage);
- disk->devid = cpu_to_le64(cpu->devid);
- disk->pstart = cpu_to_le64(cpu->pstart);
- disk->pend = cpu_to_le64(cpu->pend);
- disk->vstart = cpu_to_le64(cpu->vstart);
- disk->vend = cpu_to_le64(cpu->vend);
- disk->target = cpu_to_le64(cpu->target);
- disk->flags = cpu_to_le64(cpu->flags);
- disk->limit = cpu_to_le64(cpu->limit);
- disk->stripes_min = cpu_to_le32(cpu->stripes_min);
- disk->stripes_max = cpu_to_le32(cpu->stripes_max);
-}
-
/* struct btrfs_super_block */
BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64);
BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64);
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 7427449a04a3..e0ba00d64ea0 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -12,7 +12,6 @@
#include <linux/sched/mm.h>
#include <linux/slab.h>
#include "ctree.h"
-#include "btrfs_inode.h"
#include "xattr.h"
#include "acl.h"
diff --git a/fs/btrfs/acl.h b/fs/btrfs/acl.h
index a270e71ec05f..48b9ddae4a46 100644
--- a/fs/btrfs/acl.h
+++ b/fs/btrfs/acl.h
@@ -3,8 +3,15 @@
#ifndef BTRFS_ACL_H
#define BTRFS_ACL_H
+struct posix_acl;
+struct inode;
+struct btrfs_trans_handle;
+
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
+struct mnt_idmap;
+struct dentry;
+
struct posix_acl *btrfs_get_acl(struct inode *inode, int type, bool rcu);
int btrfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type);
@@ -13,6 +20,10 @@ int __btrfs_set_acl(struct btrfs_trans_handle *trans, struct inode *inode,
#else
+#include <linux/errno.h>
+
+struct btrfs_trans_handle;
+
#define btrfs_get_acl NULL
#define btrfs_set_acl NULL
static inline int __btrfs_set_acl(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 9e261aac671e..361a866c1995 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -11,7 +11,6 @@
#include <linux/freezer.h>
#include <trace/events/btrfs.h>
#include "async-thread.h"
-#include "ctree.h"
enum {
WORK_DONE_BIT,
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 62b8a0d57898..04c2f3175828 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -7,11 +7,14 @@
#ifndef BTRFS_ASYNC_THREAD_H
#define BTRFS_ASYNC_THREAD_H
+#include <linux/compiler_types.h>
#include <linux/workqueue.h>
+#include <linux/list.h>
struct btrfs_fs_info;
struct btrfs_workqueue;
struct btrfs_work;
+
typedef void (*btrfs_func_t)(struct btrfs_work *arg);
typedef void (*btrfs_ordered_func_t)(struct btrfs_work *arg, bool);
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index beed7e459dab..c1e6a5bbeeaf 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -198,10 +198,7 @@ static struct kmem_cache *btrfs_prelim_ref_cache;
int __init btrfs_prelim_ref_init(void)
{
btrfs_prelim_ref_cache = kmem_cache_create("btrfs_prelim_ref",
- sizeof(struct prelim_ref),
- 0,
- SLAB_MEM_SPREAD,
- NULL);
+ sizeof(struct prelim_ref), 0, 0, NULL);
if (!btrfs_prelim_ref_cache)
return -ENOMEM;
return 0;
@@ -1036,8 +1033,6 @@ static int add_inline_refs(struct btrfs_backref_walk_ctx *ctx,
slot = path->slots[0];
item_size = btrfs_item_size(leaf, slot);
- BUG_ON(item_size < sizeof(*ei));
-
ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
if (ctx->check_extent_item) {
@@ -1435,8 +1430,10 @@ again:
if (ret < 0)
goto out;
if (ret == 0) {
- /* This shouldn't happen, indicates a bug or fs corruption. */
- ASSERT(ret != 0);
+ /*
+ * Key with offset -1 found, there would have to exist an extent
+ * item with such offset, but this is out of the valid range.
+ */
ret = -EUCLEAN;
goto out;
}
@@ -2225,6 +2222,13 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
if (ret < 0)
return ret;
+ if (ret == 0) {
+ /*
+ * Key with offset -1 found, there would have to exist an extent
+ * item with such offset, but this is out of the valid range.
+ */
+ return -EUCLEAN;
+ }
ret = btrfs_previous_extent_item(extent_root, path, 0);
if (ret) {
@@ -2247,7 +2251,6 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
eb = path->nodes[0];
item_size = btrfs_item_size(eb, path->slots[0]);
- BUG_ON(item_size < sizeof(*ei));
ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
flags = btrfs_extent_flags(eb, ei);
@@ -2850,6 +2853,16 @@ struct btrfs_backref_iter *btrfs_backref_iter_alloc(struct btrfs_fs_info *fs_inf
return ret;
}
+static void btrfs_backref_iter_release(struct btrfs_backref_iter *iter)
+{
+ iter->bytenr = 0;
+ iter->item_ptr = 0;
+ iter->cur_ptr = 0;
+ iter->end_ptr = 0;
+ btrfs_release_path(iter->path);
+ memset(&iter->cur_key, 0, sizeof(iter->cur_key));
+}
+
int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr)
{
struct btrfs_fs_info *fs_info = iter->fs_info;
@@ -2868,6 +2881,10 @@ int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr)
if (ret < 0)
return ret;
if (ret == 0) {
+ /*
+ * Key with offset -1 found, there would have to exist an extent
+ * item with such offset, but this is out of the valid range.
+ */
ret = -EUCLEAN;
goto release;
}
@@ -2938,6 +2955,14 @@ release:
return ret;
}
+static bool btrfs_backref_iter_is_inline_ref(struct btrfs_backref_iter *iter)
+{
+ if (iter->cur_key.type == BTRFS_EXTENT_ITEM_KEY ||
+ iter->cur_key.type == BTRFS_METADATA_ITEM_KEY)
+ return true;
+ return false;
+}
+
/*
* Go to the next backref item of current bytenr, can be either inlined or
* keyed.
@@ -2950,7 +2975,7 @@ release:
*/
int btrfs_backref_iter_next(struct btrfs_backref_iter *iter)
{
- struct extent_buffer *eb = btrfs_backref_get_eb(iter);
+ struct extent_buffer *eb = iter->path->nodes[0];
struct btrfs_root *extent_root;
struct btrfs_path *path = iter->path;
struct btrfs_extent_inline_ref *iref;
@@ -3038,6 +3063,19 @@ struct btrfs_backref_node *btrfs_backref_alloc_node(
return node;
}
+void btrfs_backref_free_node(struct btrfs_backref_cache *cache,
+ struct btrfs_backref_node *node)
+{
+ if (node) {
+ ASSERT(list_empty(&node->list));
+ ASSERT(list_empty(&node->lower));
+ ASSERT(node->eb == NULL);
+ cache->nr_nodes--;
+ btrfs_put_root(node->root);
+ kfree(node);
+ }
+}
+
struct btrfs_backref_edge *btrfs_backref_alloc_edge(
struct btrfs_backref_cache *cache)
{
@@ -3049,6 +3087,52 @@ struct btrfs_backref_edge *btrfs_backref_alloc_edge(
return edge;
}
+void btrfs_backref_free_edge(struct btrfs_backref_cache *cache,
+ struct btrfs_backref_edge *edge)
+{
+ if (edge) {
+ cache->nr_edges--;
+ kfree(edge);
+ }
+}
+
+void btrfs_backref_unlock_node_buffer(struct btrfs_backref_node *node)
+{
+ if (node->locked) {
+ btrfs_tree_unlock(node->eb);
+ node->locked = 0;
+ }
+}
+
+void btrfs_backref_drop_node_buffer(struct btrfs_backref_node *node)
+{
+ if (node->eb) {
+ btrfs_backref_unlock_node_buffer(node);
+ free_extent_buffer(node->eb);
+ node->eb = NULL;
+ }
+}
+
+/*
+ * Drop the backref node from cache without cleaning up its children
+ * edges.
+ *
+ * This can only be called on node without parent edges.
+ * The children edges are still kept as is.
+ */
+void btrfs_backref_drop_node(struct btrfs_backref_cache *tree,
+ struct btrfs_backref_node *node)
+{
+ ASSERT(list_empty(&node->upper));
+
+ btrfs_backref_drop_node_buffer(node);
+ list_del_init(&node->list);
+ list_del_init(&node->lower);
+ if (!RB_EMPTY_NODE(&node->rb_node))
+ rb_erase(&node->rb_node, &tree->rb_root);
+ btrfs_backref_free_node(tree, node);
+}
+
/*
* Drop the backref node from cache, also cleaning up all its
* upper edges and any uncached nodes in the path.
@@ -3120,6 +3204,19 @@ void btrfs_backref_release_cache(struct btrfs_backref_cache *cache)
ASSERT(!cache->nr_edges);
}
+void btrfs_backref_link_edge(struct btrfs_backref_edge *edge,
+ struct btrfs_backref_node *lower,
+ struct btrfs_backref_node *upper,
+ int link_which)
+{
+ ASSERT(upper && lower && upper->level == lower->level + 1);
+ edge->node[LOWER] = lower;
+ edge->node[UPPER] = upper;
+ if (link_which & LINK_LOWER)
+ list_add_tail(&edge->list[LOWER], &lower->upper);
+ if (link_which & LINK_UPPER)
+ list_add_tail(&edge->list[UPPER], &upper->lower);
+}
/*
* Handle direct tree backref
*
@@ -3428,7 +3525,7 @@ int btrfs_backref_add_tree_node(struct btrfs_trans_handle *trans,
int type;
cond_resched();
- eb = btrfs_backref_get_eb(iter);
+ eb = iter->path->nodes[0];
key.objectid = iter->bytenr;
if (btrfs_backref_iter_is_inline_ref(iter)) {
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index ab4ca0eda605..e8c22cccb5c1 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -6,11 +6,23 @@
#ifndef BTRFS_BACKREF_H
#define BTRFS_BACKREF_H
-#include <linux/btrfs.h>
+#include <linux/types.h>
+#include <linux/rbtree.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <uapi/linux/btrfs.h>
+#include <uapi/linux/btrfs_tree.h>
#include "messages.h"
-#include "ulist.h"
+#include "locking.h"
#include "disk-io.h"
#include "extent_io.h"
+#include "ctree.h"
+
+struct extent_inode_elem;
+struct ulist;
+struct btrfs_extent_item;
+struct btrfs_trans_handle;
+struct btrfs_fs_info;
/*
* Used by implementations of iterate_extent_inodes_t (see definition below) to
@@ -271,22 +283,6 @@ struct btrfs_backref_iter {
struct btrfs_backref_iter *btrfs_backref_iter_alloc(struct btrfs_fs_info *fs_info);
-static inline void btrfs_backref_iter_free(struct btrfs_backref_iter *iter)
-{
- if (!iter)
- return;
- btrfs_free_path(iter->path);
- kfree(iter);
-}
-
-static inline struct extent_buffer *btrfs_backref_get_eb(
- struct btrfs_backref_iter *iter)
-{
- if (!iter)
- return NULL;
- return iter->path->nodes[0];
-}
-
/*
* For metadata with EXTENT_ITEM key (non-skinny) case, the first inline data
* is btrfs_tree_block_info, without a btrfs_extent_inline_ref header.
@@ -306,25 +302,6 @@ int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr);
int btrfs_backref_iter_next(struct btrfs_backref_iter *iter);
-static inline bool btrfs_backref_iter_is_inline_ref(
- struct btrfs_backref_iter *iter)
-{
- if (iter->cur_key.type == BTRFS_EXTENT_ITEM_KEY ||
- iter->cur_key.type == BTRFS_METADATA_ITEM_KEY)
- return true;
- return false;
-}
-
-static inline void btrfs_backref_iter_release(struct btrfs_backref_iter *iter)
-{
- iter->bytenr = 0;
- iter->item_ptr = 0;
- iter->cur_ptr = 0;
- iter->end_ptr = 0;
- btrfs_release_path(iter->path);
- memset(&iter->cur_key, 0, sizeof(iter->cur_key));
-}
-
/*
* Backref cache related structures
*
@@ -452,83 +429,22 @@ struct btrfs_backref_edge *btrfs_backref_alloc_edge(
#define LINK_LOWER (1 << 0)
#define LINK_UPPER (1 << 1)
-static inline void btrfs_backref_link_edge(struct btrfs_backref_edge *edge,
- struct btrfs_backref_node *lower,
- struct btrfs_backref_node *upper,
- int link_which)
-{
- ASSERT(upper && lower && upper->level == lower->level + 1);
- edge->node[LOWER] = lower;
- edge->node[UPPER] = upper;
- if (link_which & LINK_LOWER)
- list_add_tail(&edge->list[LOWER], &lower->upper);
- if (link_which & LINK_UPPER)
- list_add_tail(&edge->list[UPPER], &upper->lower);
-}
-
-static inline void btrfs_backref_free_node(struct btrfs_backref_cache *cache,
- struct btrfs_backref_node *node)
-{
- if (node) {
- ASSERT(list_empty(&node->list));
- ASSERT(list_empty(&node->lower));
- ASSERT(node->eb == NULL);
- cache->nr_nodes--;
- btrfs_put_root(node->root);
- kfree(node);
- }
-}
-
-static inline void btrfs_backref_free_edge(struct btrfs_backref_cache *cache,
- struct btrfs_backref_edge *edge)
-{
- if (edge) {
- cache->nr_edges--;
- kfree(edge);
- }
-}
-
-static inline void btrfs_backref_unlock_node_buffer(
- struct btrfs_backref_node *node)
-{
- if (node->locked) {
- btrfs_tree_unlock(node->eb);
- node->locked = 0;
- }
-}
-static inline void btrfs_backref_drop_node_buffer(
- struct btrfs_backref_node *node)
-{
- if (node->eb) {
- btrfs_backref_unlock_node_buffer(node);
- free_extent_buffer(node->eb);
- node->eb = NULL;
- }
-}
-
-/*
- * Drop the backref node from cache without cleaning up its children
- * edges.
- *
- * This can only be called on node without parent edges.
- * The children edges are still kept as is.
- */
-static inline void btrfs_backref_drop_node(struct btrfs_backref_cache *tree,
- struct btrfs_backref_node *node)
-{
- ASSERT(list_empty(&node->upper));
-
- btrfs_backref_drop_node_buffer(node);
- list_del_init(&node->list);
- list_del_init(&node->lower);
- if (!RB_EMPTY_NODE(&node->rb_node))
- rb_erase(&node->rb_node, &tree->rb_root);
- btrfs_backref_free_node(tree, node);
-}
+void btrfs_backref_link_edge(struct btrfs_backref_edge *edge,
+ struct btrfs_backref_node *lower,
+ struct btrfs_backref_node *upper,
+ int link_which);
+void btrfs_backref_free_node(struct btrfs_backref_cache *cache,
+ struct btrfs_backref_node *node);
+void btrfs_backref_free_edge(struct btrfs_backref_cache *cache,
+ struct btrfs_backref_edge *edge);
+void btrfs_backref_unlock_node_buffer(struct btrfs_backref_node *node);
+void btrfs_backref_drop_node_buffer(struct btrfs_backref_node *node);
void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache,
struct btrfs_backref_node *node);
+void btrfs_backref_drop_node(struct btrfs_backref_cache *tree,
+ struct btrfs_backref_node *node);
void btrfs_backref_release_cache(struct btrfs_backref_cache *cache);
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 928f512cdb4a..477f350a8bd0 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -11,7 +11,6 @@
#include "raid56.h"
#include "async-thread.h"
#include "dev-replace.h"
-#include "rcu-string.h"
#include "zoned.h"
#include "file-item.h"
#include "raid-stripe-tree.h"
@@ -509,8 +508,6 @@ static void __btrfs_submit_bio(struct bio *bio, struct btrfs_io_context *bioc,
if (!bioc) {
/* Single mirror read/write fast path. */
btrfs_bio(bio)->mirror_num = mirror_num;
- if (bio_op(bio) != REQ_OP_READ)
- btrfs_bio(bio)->orig_physical = smap->physical;
bio->bi_iter.bi_sector = smap->physical >> SECTOR_SHIFT;
if (bio_op(bio) != REQ_OP_READ)
btrfs_bio(bio)->orig_physical = smap->physical;
@@ -611,8 +608,20 @@ static void run_one_async_done(struct btrfs_work *work, bool do_free)
static bool should_async_write(struct btrfs_bio *bbio)
{
+ bool auto_csum_mode = true;
+
+#ifdef CONFIG_BTRFS_DEBUG
+ struct btrfs_fs_devices *fs_devices = bbio->fs_info->fs_devices;
+ enum btrfs_offload_csum_mode csum_mode = READ_ONCE(fs_devices->offload_csum_mode);
+
+ if (csum_mode == BTRFS_OFFLOAD_CSUM_FORCE_OFF)
+ return false;
+
+ auto_csum_mode = (csum_mode == BTRFS_OFFLOAD_CSUM_AUTO);
+#endif
+
/* Submit synchronously if the checksum implementation is fast. */
- if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &bbio->fs_info->flags))
+ if (auto_csum_mode && test_bit(BTRFS_FS_CSUM_IMPL_FAST, &bbio->fs_info->flags))
return false;
/*
diff --git a/fs/btrfs/bio.h b/fs/btrfs/bio.h
index bbaed317161a..d9dd5276093d 100644
--- a/fs/btrfs/bio.h
+++ b/fs/btrfs/bio.h
@@ -7,12 +7,14 @@
#ifndef BTRFS_BIO_H
#define BTRFS_BIO_H
+#include <linux/types.h>
#include <linux/bio.h>
#include <linux/workqueue.h>
#include "tree-checker.h"
struct btrfs_bio;
struct btrfs_fs_info;
+struct btrfs_inode;
#define BTRFS_BIO_INLINE_CSUM_SIZE 64
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 378d9103a207..5f7587ca1ca7 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -418,7 +418,7 @@ struct btrfs_caching_control *btrfs_get_caching_control(
return ctl;
}
-void btrfs_put_caching_control(struct btrfs_caching_control *ctl)
+static void btrfs_put_caching_control(struct btrfs_caching_control *ctl)
{
if (refcount_dec_and_test(&ctl->count))
kfree(ctl);
@@ -1063,7 +1063,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
bool remove_rsv = false;
block_group = btrfs_lookup_block_group(fs_info, map->start);
- BUG_ON(!block_group);
+ if (!block_group)
+ return -ENOENT;
+
BUG_ON(!block_group->ro);
trace_btrfs_remove_block_group(block_group);
@@ -1429,7 +1431,7 @@ static bool clean_pinned_extents(struct btrfs_trans_handle *trans,
* group in pinned_extents before we were able to clear the whole block
* group range from pinned_extents. This means that task can lookup for
* the block group after we unpinned it from pinned_extents and removed
- * it, leading to a BUG_ON() at unpin_extent_range().
+ * it, leading to an error at unpin_extent_range().
*/
mutex_lock(&fs_info->unused_bg_unpin_mutex);
if (prev_trans) {
@@ -1522,6 +1524,13 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
* outstanding allocations in this block group. We do
* the ro check in case balance is currently acting on
* this block group.
+ *
+ * Also bail out if this is the only block group for its
+ * type, because otherwise we would lose profile
+ * information from fs_info->avail_*_alloc_bits and the
+ * next block group of this type would be created with a
+ * "single" profile (even if we're in a raid fs) because
+ * fs_info->avail_*_alloc_bits would be 0.
*/
trace_btrfs_skip_unused_block_group(block_group);
spin_unlock(&block_group->lock);
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index 962b11983901..85e2d4cd12dc 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -3,9 +3,22 @@
#ifndef BTRFS_BLOCK_GROUP_H
#define BTRFS_BLOCK_GROUP_H
+#include <linux/atomic.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/refcount.h>
+#include <linux/wait.h>
+#include <linux/sizes.h>
+#include <linux/rwsem.h>
+#include <linux/rbtree.h>
+#include <uapi/linux/btrfs_tree.h>
#include "free-space-cache.h"
struct btrfs_chunk_map;
+struct btrfs_fs_info;
+struct btrfs_inode;
+struct btrfs_trans_handle;
enum btrfs_disk_cache_state {
BTRFS_DC_WRITTEN,
@@ -297,7 +310,6 @@ void btrfs_wait_nocow_writers(struct btrfs_block_group *bg);
void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
u64 num_bytes);
int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait);
-void btrfs_put_caching_control(struct btrfs_caching_control *ctl);
struct btrfs_caching_control *btrfs_get_caching_control(
struct btrfs_block_group *cache);
int btrfs_add_new_free_space(struct btrfs_block_group *block_group,
diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c
index 1043a8142351..95c174f9fd4f 100644
--- a/fs/btrfs/block-rsv.c
+++ b/fs/btrfs/block-rsv.c
@@ -6,7 +6,6 @@
#include "space-info.h"
#include "transaction.h"
#include "block-group.h"
-#include "disk-io.h"
#include "fs.h"
#include "accessors.h"
diff --git a/fs/btrfs/block-rsv.h b/fs/btrfs/block-rsv.h
index 43a9a6b5a79f..1f53b967d069 100644
--- a/fs/btrfs/block-rsv.h
+++ b/fs/btrfs/block-rsv.h
@@ -3,8 +3,15 @@
#ifndef BTRFS_BLOCK_RSV_H
#define BTRFS_BLOCK_RSV_H
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <linux/spinlock.h>
+
struct btrfs_trans_handle;
struct btrfs_root;
+struct btrfs_space_info;
+struct btrfs_block_rsv;
+struct btrfs_fs_info;
enum btrfs_reserve_flush_enum;
/*
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 7f7c5a92d2b8..100020ca4658 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -8,13 +8,32 @@
#include <linux/hash.h>
#include <linux/refcount.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/compiler.h>
#include <linux/fscrypt.h>
+#include <linux/lockdep.h>
+#include <uapi/linux/btrfs_tree.h>
#include <trace/events/btrfs.h>
+#include "block-rsv.h"
+#include "btrfs_inode.h"
#include "extent_map.h"
#include "extent_io.h"
+#include "extent-io-tree.h"
#include "ordered-data.h"
#include "delayed-inode.h"
+struct extent_state;
+struct posix_acl;
+struct iov_iter;
+struct writeback_control;
+struct btrfs_root;
+struct btrfs_fs_info;
+struct btrfs_trans_handle;
+
/*
* Since we search a directory based on f_pos (struct dir_context::pos) we have
* to start at 2 since '.' and '..' have f_pos of 0 and 1 respectively, so
@@ -41,7 +60,6 @@ enum {
*/
BTRFS_INODE_NEEDS_FULL_SYNC,
BTRFS_INODE_COPY_EVERYTHING,
- BTRFS_INODE_IN_DELALLOC_LIST,
BTRFS_INODE_HAS_PROPS,
BTRFS_INODE_SNAPSHOT_FLUSH,
/*
@@ -428,7 +446,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
u64 *orig_start, u64 *orig_block_len,
u64 *ram_bytes, bool nowait, bool strict);
-void __btrfs_del_delalloc_inode(struct btrfs_root *root, struct btrfs_inode *inode);
+void btrfs_del_delalloc_inode(struct btrfs_inode *inode);
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index);
int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
@@ -490,8 +508,7 @@ struct inode *btrfs_iget_path(struct super_block *s, u64 ino,
struct btrfs_root *root, struct btrfs_path *path);
struct inode *btrfs_iget(struct super_block *s, u64 ino, struct btrfs_root *root);
struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
- struct page *page, size_t pg_offset,
- u64 start, u64 len);
+ struct page *page, u64 start, u64 len);
int btrfs_update_inode(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode);
int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 68345f73d429..b2b94009959d 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -25,8 +25,6 @@
#include "misc.h"
#include "ctree.h"
#include "fs.h"
-#include "disk-io.h"
-#include "transaction.h"
#include "btrfs_inode.h"
#include "bio.h"
#include "ordered-data.h"
@@ -34,8 +32,7 @@
#include "extent_io.h"
#include "extent_map.h"
#include "subpage.h"
-#include "zoned.h"
-#include "file-item.h"
+#include "messages.h"
#include "super.h"
static struct bio_set btrfs_compressed_bioset;
@@ -284,7 +281,7 @@ static void end_bbio_comprssed_read(struct btrfs_bio *bbio)
static noinline void end_compressed_writeback(const struct compressed_bio *cb)
{
struct inode *inode = &cb->bbio.inode->vfs_inode;
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
unsigned long index = cb->start >> PAGE_SHIFT;
unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT;
struct folio_batch fbatch;
@@ -415,7 +412,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
struct compressed_bio *cb,
int *memstall, unsigned long *pflags)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
unsigned long end_index;
struct bio *orig_bio = &cb->orig_bbio->bio;
u64 cur = cb->orig_bbio->file_offset + orig_bio->bi_iter.bi_size;
@@ -441,7 +438,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
* This makes readahead less effective, so here disable readahead for
* subpage for now, until full compressed write is supported.
*/
- if (btrfs_sb(inode->i_sb)->sectorsize < PAGE_SIZE)
+ if (fs_info->sectorsize < PAGE_SIZE)
return 0;
end_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
@@ -1039,7 +1036,7 @@ static int btrfs_decompress_bio(struct compressed_bio *cb)
int btrfs_decompress(int type, const u8 *data_in, struct page *dest_page,
unsigned long dest_pgoff, size_t srclen, size_t destlen)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(dest_page->mapping->host->i_sb);
+ struct btrfs_fs_info *fs_info = page_to_fs_info(dest_page);
struct list_head *workspace;
const u32 sectorsize = fs_info->sectorsize;
int ret;
@@ -1479,11 +1476,6 @@ static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end,
/*
* Compression heuristic.
*
- * For now is's a naive and optimistic 'return true', we'll extend the logic to
- * quickly (compared to direct compression) detect data characteristics
- * (compressible/incompressible) to avoid wasting CPU time on incompressible
- * data.
- *
* The following types of analysis can be performed:
* - detect mostly zero data
* - detect data with low "byte set" size (text, etc)
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index afd7e50d073d..4691a84ca838 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -7,10 +7,18 @@
#define BTRFS_COMPRESSION_H
#include <linux/sizes.h>
+#include <linux/mm.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/wait.h>
#include "bio.h"
+struct address_space;
+struct page;
+struct inode;
struct btrfs_inode;
struct btrfs_ordered_extent;
+struct btrfs_bio;
/*
* We want to make sure that amount of RAM required to uncompress an extent is
@@ -32,8 +40,6 @@ static_assert((BTRFS_MAX_COMPRESSED % PAGE_SIZE) == 0);
#define BTRFS_ZLIB_DEFAULT_LEVEL 3
-struct page;
-
struct compressed_bio {
/* Number of compressed pages in the array */
unsigned int nr_pages;
@@ -169,7 +175,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
unsigned long *total_in, unsigned long *total_out);
int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
int zstd_decompress(struct list_head *ws, const u8 *data_in,
- struct page *dest_page, unsigned long start_byte, size_t srclen,
+ struct page *dest_page, unsigned long dest_pgoff, size_t srclen,
size_t destlen);
void zstd_init_workspace_manager(void);
void zstd_cleanup_workspace_manager(void);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index e65e012bac55..aaf53fd84358 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -820,7 +820,7 @@ int btrfs_bin_search(struct extent_buffer *eb, int first_slot,
}
while (low < high) {
- const int unit_size = folio_size(eb->folios[0]);
+ const int unit_size = eb->folio_size;
unsigned long oil;
unsigned long offset;
struct btrfs_disk_key *tmp;
@@ -4280,6 +4280,10 @@ void btrfs_setup_item_for_insert(struct btrfs_trans_handle *trans,
/*
* Given a key and some data, insert items into the tree.
* This does all the path init required, making room in the tree if needed.
+ *
+ * Returns: 0 on success
+ * -EEXIST if the first key already exists
+ * < 0 on other errors
*/
int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -5082,9 +5086,7 @@ int btrfs_previous_extent_item(struct btrfs_root *root,
int __init btrfs_ctree_init(void)
{
- btrfs_path_cachep = kmem_cache_create("btrfs_path",
- sizeof(struct btrfs_path), 0,
- SLAB_MEM_SPREAD, NULL);
+ btrfs_path_cachep = KMEM_CACHE(btrfs_path, 0);
if (!btrfs_path_cachep)
return -ENOMEM;
return 0;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 70e828d33177..c03c58246033 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -7,25 +7,24 @@
#define BTRFS_CTREE_H
#include <linux/pagemap.h>
+#include <linux/spinlock.h>
+#include <linux/rbtree.h>
+#include <linux/mutex.h>
+#include <linux/wait.h>
+#include <linux/list.h>
+#include <linux/atomic.h>
+#include <linux/xarray.h>
+#include <linux/refcount.h>
+#include <uapi/linux/btrfs_tree.h>
#include "locking.h"
#include "fs.h"
#include "accessors.h"
+#include "extent-io-tree.h"
+struct extent_buffer;
+struct btrfs_block_rsv;
struct btrfs_trans_handle;
-struct btrfs_transaction;
-struct btrfs_pending_snapshot;
-struct btrfs_delayed_ref_root;
-struct btrfs_space_info;
struct btrfs_block_group;
-struct btrfs_ordered_sum;
-struct btrfs_ref;
-struct btrfs_bio;
-struct btrfs_ioctl_encoded_io_args;
-struct btrfs_device;
-struct btrfs_fs_devices;
-struct btrfs_balance_control;
-struct btrfs_delayed_root;
-struct reloc_control;
/* Read ahead values for struct btrfs_path.reada */
enum {
@@ -478,8 +477,7 @@ static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping)
return mapping_gfp_constraint(mapping, ~__GFP_FS);
}
-int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
- u64 start, u64 end);
+void btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, u64 start, u64 end);
int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
u64 num_bytes, u64 *actual_bytes);
int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range);
diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
index 5b0b64571418..f015fa1b6301 100644
--- a/fs/btrfs/defrag.c
+++ b/fs/btrfs/defrag.c
@@ -6,7 +6,6 @@
#include <linux/sched.h>
#include "ctree.h"
#include "disk-io.h"
-#include "print-tree.h"
#include "transaction.h"
#include "locking.h"
#include "accessors.h"
@@ -521,7 +520,7 @@ static int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
* keep_locks set and lowest_level is 1, regardless of the value of
* path->slots[1].
*/
- BUG_ON(path->locks[1] == 0);
+ ASSERT(path->locks[1] != 0);
ret = btrfs_realloc_node(trans, root,
path->nodes[1], 0,
&last_ret,
@@ -810,7 +809,7 @@ static u32 get_extent_max_capacity(const struct btrfs_fs_info *fs_info,
static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em,
u32 extent_thresh, u64 newer_than, bool locked)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct extent_map *next;
bool ret = false;
@@ -861,20 +860,21 @@ out:
* NOTE: Caller should also wait for page writeback after the cluster is
* prepared, here we don't do writeback wait for each page.
*/
-static struct page *defrag_prepare_one_page(struct btrfs_inode *inode, pgoff_t index)
+static struct folio *defrag_prepare_one_folio(struct btrfs_inode *inode, pgoff_t index)
{
struct address_space *mapping = inode->vfs_inode.i_mapping;
gfp_t mask = btrfs_alloc_write_mask(mapping);
u64 page_start = (u64)index << PAGE_SHIFT;
u64 page_end = page_start + PAGE_SIZE - 1;
struct extent_state *cached_state = NULL;
- struct page *page;
+ struct folio *folio;
int ret;
again:
- page = find_or_create_page(mapping, index, mask);
- if (!page)
- return ERR_PTR(-ENOMEM);
+ folio = __filemap_get_folio(mapping, index,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT, mask);
+ if (IS_ERR(folio))
+ return folio;
/*
* Since we can defragment files opened read-only, we can encounter
@@ -884,16 +884,16 @@ again:
* executables that explicitly enable them, so this isn't very
* restrictive.
*/
- if (PageCompound(page)) {
- unlock_page(page);
- put_page(page);
+ if (folio_test_large(folio)) {
+ folio_unlock(folio);
+ folio_put(folio);
return ERR_PTR(-ETXTBSY);
}
- ret = set_page_extent_mapped(page);
+ ret = set_folio_extent_mapped(folio);
if (ret < 0) {
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
return ERR_PTR(ret);
}
@@ -908,17 +908,17 @@ again:
if (!ordered)
break;
- unlock_page(page);
+ folio_unlock(folio);
btrfs_start_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
- lock_page(page);
+ folio_lock(folio);
/*
- * We unlocked the page above, so we need check if it was
+ * We unlocked the folio above, so we need check if it was
* released or not.
*/
- if (page->mapping != mapping || !PagePrivate(page)) {
- unlock_page(page);
- put_page(page);
+ if (folio->mapping != mapping || !folio->private) {
+ folio_unlock(folio);
+ folio_put(folio);
goto again;
}
}
@@ -927,21 +927,21 @@ again:
* Now the page range has no ordered extent any more. Read the page to
* make it uptodate.
*/
- if (!PageUptodate(page)) {
- btrfs_read_folio(NULL, page_folio(page));
- lock_page(page);
- if (page->mapping != mapping || !PagePrivate(page)) {
- unlock_page(page);
- put_page(page);
+ if (!folio_test_uptodate(folio)) {
+ btrfs_read_folio(NULL, folio);
+ folio_lock(folio);
+ if (folio->mapping != mapping || !folio->private) {
+ folio_unlock(folio);
+ folio_put(folio);
goto again;
}
- if (!PageUptodate(page)) {
- unlock_page(page);
- put_page(page);
+ if (!folio_test_uptodate(folio)) {
+ folio_unlock(folio);
+ folio_put(folio);
return ERR_PTR(-EIO);
}
}
- return page;
+ return folio;
}
struct defrag_target_range {
@@ -1162,7 +1162,7 @@ static_assert(PAGE_ALIGNED(CLUSTER_SIZE));
*/
static int defrag_one_locked_target(struct btrfs_inode *inode,
struct defrag_target_range *target,
- struct page **pages, int nr_pages,
+ struct folio **folios, int nr_pages,
struct extent_state **cached_state)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
@@ -1171,7 +1171,7 @@ static int defrag_one_locked_target(struct btrfs_inode *inode,
const u64 len = target->len;
unsigned long last_index = (start + len - 1) >> PAGE_SHIFT;
unsigned long start_index = start >> PAGE_SHIFT;
- unsigned long first_index = page_index(pages[0]);
+ unsigned long first_index = folios[0]->index;
int ret = 0;
int i;
@@ -1188,8 +1188,8 @@ static int defrag_one_locked_target(struct btrfs_inode *inode,
/* Update the page status */
for (i = start_index - first_index; i <= last_index - first_index; i++) {
- ClearPageChecked(pages[i]);
- btrfs_folio_clamp_set_dirty(fs_info, page_folio(pages[i]), start, len);
+ folio_clear_checked(folios[i]);
+ btrfs_folio_clamp_set_dirty(fs_info, folios[i], start, len);
}
btrfs_delalloc_release_extents(inode, len);
extent_changeset_free(data_reserved);
@@ -1205,7 +1205,7 @@ static int defrag_one_range(struct btrfs_inode *inode, u64 start, u32 len,
struct defrag_target_range *entry;
struct defrag_target_range *tmp;
LIST_HEAD(target_list);
- struct page **pages;
+ struct folio **folios;
const u32 sectorsize = inode->root->fs_info->sectorsize;
u64 last_index = (start + len - 1) >> PAGE_SHIFT;
u64 start_index = start >> PAGE_SHIFT;
@@ -1216,21 +1216,21 @@ static int defrag_one_range(struct btrfs_inode *inode, u64 start, u32 len,
ASSERT(nr_pages <= CLUSTER_SIZE / PAGE_SIZE);
ASSERT(IS_ALIGNED(start, sectorsize) && IS_ALIGNED(len, sectorsize));
- pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
- if (!pages)
+ folios = kcalloc(nr_pages, sizeof(struct folio *), GFP_NOFS);
+ if (!folios)
return -ENOMEM;
/* Prepare all pages */
for (i = 0; i < nr_pages; i++) {
- pages[i] = defrag_prepare_one_page(inode, start_index + i);
- if (IS_ERR(pages[i])) {
- ret = PTR_ERR(pages[i]);
- pages[i] = NULL;
- goto free_pages;
+ folios[i] = defrag_prepare_one_folio(inode, start_index + i);
+ if (IS_ERR(folios[i])) {
+ ret = PTR_ERR(folios[i]);
+ nr_pages = i;
+ goto free_folios;
}
}
for (i = 0; i < nr_pages; i++)
- wait_on_page_writeback(pages[i]);
+ folio_wait_writeback(folios[i]);
/* Lock the pages range */
lock_extent(&inode->io_tree, start_index << PAGE_SHIFT,
@@ -1250,7 +1250,7 @@ static int defrag_one_range(struct btrfs_inode *inode, u64 start, u32 len,
goto unlock_extent;
list_for_each_entry(entry, &target_list, list) {
- ret = defrag_one_locked_target(inode, entry, pages, nr_pages,
+ ret = defrag_one_locked_target(inode, entry, folios, nr_pages,
&cached_state);
if (ret < 0)
break;
@@ -1264,14 +1264,12 @@ unlock_extent:
unlock_extent(&inode->io_tree, start_index << PAGE_SHIFT,
(last_index << PAGE_SHIFT) + PAGE_SIZE - 1,
&cached_state);
-free_pages:
+free_folios:
for (i = 0; i < nr_pages; i++) {
- if (pages[i]) {
- unlock_page(pages[i]);
- put_page(pages[i]);
- }
+ folio_unlock(folios[i]);
+ folio_put(folios[i]);
}
- kfree(pages);
+ kfree(folios);
return ret;
}
@@ -1366,7 +1364,7 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
struct btrfs_ioctl_defrag_range_args *range,
u64 newer_than, unsigned long max_to_defrag)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
unsigned long sectors_defragged = 0;
u64 isize = i_size_read(inode);
u64 cur;
@@ -1512,9 +1510,7 @@ void __cold btrfs_auto_defrag_exit(void)
int __init btrfs_auto_defrag_init(void)
{
btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag",
- sizeof(struct inode_defrag), 0,
- SLAB_MEM_SPREAD,
- NULL);
+ sizeof(struct inode_defrag), 0, 0, NULL);
if (!btrfs_inode_defrag_cachep)
return -ENOMEM;
diff --git a/fs/btrfs/defrag.h b/fs/btrfs/defrag.h
index 5a62763528d1..878528e086fb 100644
--- a/fs/btrfs/defrag.h
+++ b/fs/btrfs/defrag.h
@@ -3,6 +3,16 @@
#ifndef BTRFS_DEFRAG_H
#define BTRFS_DEFRAG_H
+#include <linux/types.h>
+#include <linux/compiler_types.h>
+
+struct inode;
+struct file_ra_state;
+struct btrfs_fs_info;
+struct btrfs_root;
+struct btrfs_trans_handle;
+struct btrfs_ioctl_defrag_range_args;
+
int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
struct btrfs_ioctl_defrag_range_args *range,
u64 newer_than, unsigned long max_to_defrag);
diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c
index acf9f4b6c044..b3527efd0b4b 100644
--- a/fs/btrfs/delalloc-space.c
+++ b/fs/btrfs/delalloc-space.c
@@ -6,9 +6,7 @@
#include "block-rsv.h"
#include "btrfs_inode.h"
#include "space-info.h"
-#include "transaction.h"
#include "qgroup.h"
-#include "block-group.h"
#include "fs.h"
/*
diff --git a/fs/btrfs/delalloc-space.h b/fs/btrfs/delalloc-space.h
index c5d573f2366e..ce4f889e4f17 100644
--- a/fs/btrfs/delalloc-space.h
+++ b/fs/btrfs/delalloc-space.h
@@ -3,7 +3,11 @@
#ifndef BTRFS_DELALLOC_SPACE_H
#define BTRFS_DELALLOC_SPACE_H
+#include <linux/types.h>
+
struct extent_changeset;
+struct btrfs_inode;
+struct btrfs_fs_info;
int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes);
int btrfs_check_data_free_space(struct btrfs_inode *inode,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 08102883f560..dd6f566a383f 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -28,11 +28,7 @@ static struct kmem_cache *delayed_node_cache;
int __init btrfs_delayed_inode_init(void)
{
- delayed_node_cache = kmem_cache_create("btrfs_delayed_node",
- sizeof(struct btrfs_delayed_node),
- 0,
- SLAB_MEM_SPREAD,
- NULL);
+ delayed_node_cache = KMEM_CACHE(btrfs_delayed_node, 0);
if (!delayed_node_cache)
return -ENOMEM;
return 0;
@@ -43,6 +39,17 @@ void __cold btrfs_delayed_inode_exit(void)
kmem_cache_destroy(delayed_node_cache);
}
+void btrfs_init_delayed_root(struct btrfs_delayed_root *delayed_root)
+{
+ atomic_set(&delayed_root->items, 0);
+ atomic_set(&delayed_root->items_seq, 0);
+ delayed_root->nodes = 0;
+ spin_lock_init(&delayed_root->lock);
+ init_waitqueue_head(&delayed_root->wait);
+ INIT_LIST_HEAD(&delayed_root->node_list);
+ INIT_LIST_HEAD(&delayed_root->prepare_list);
+}
+
static inline void btrfs_init_delayed_node(
struct btrfs_delayed_node *delayed_node,
struct btrfs_root *root, u64 inode_id)
@@ -430,8 +437,6 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
delayed_root = delayed_node->root->fs_info->delayed_root;
- BUG_ON(!delayed_root);
-
if (delayed_item->type == BTRFS_DELAYED_INSERTION_ITEM)
root = &delayed_node->ins_root;
else
@@ -980,7 +985,7 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
if (delayed_node &&
test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
- BUG_ON(!delayed_node->root);
+ ASSERT(delayed_node->root);
clear_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
delayed_node->count--;
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 5cceb31bbd16..64e115d97499 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -7,15 +7,23 @@
#ifndef BTRFS_DELAYED_INODE_H
#define BTRFS_DELAYED_INODE_H
+#include <linux/types.h>
#include <linux/rbtree.h>
#include <linux/spinlock.h>
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/wait.h>
+#include <linux/fs.h>
#include <linux/atomic.h>
#include <linux/refcount.h>
#include "ctree.h"
+struct btrfs_disk_key;
+struct btrfs_fs_info;
+struct btrfs_inode;
+struct btrfs_root;
+struct btrfs_trans_handle;
+
enum btrfs_delayed_item_type {
BTRFS_DELAYED_INSERTION_ITEM,
BTRFS_DELAYED_DELETION_ITEM
@@ -98,18 +106,7 @@ struct btrfs_delayed_item {
char data[] __counted_by(data_len);
};
-static inline void btrfs_init_delayed_root(
- struct btrfs_delayed_root *delayed_root)
-{
- atomic_set(&delayed_root->items, 0);
- atomic_set(&delayed_root->items_seq, 0);
- delayed_root->nodes = 0;
- spin_lock_init(&delayed_root->lock);
- init_waitqueue_head(&delayed_root->wait);
- INIT_LIST_HEAD(&delayed_root->node_list);
- INIT_LIST_HEAD(&delayed_root->prepare_list);
-}
-
+void btrfs_init_delayed_root(struct btrfs_delayed_root *delayed_root);
int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
const char *name, int name_len,
struct btrfs_inode *dir,
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 891ea2fa263c..e44e62cf76bc 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -1004,6 +1004,52 @@ static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
INIT_LIST_HEAD(&ref->add_list);
}
+void btrfs_init_generic_ref(struct btrfs_ref *generic_ref, int action, u64 bytenr,
+ u64 len, u64 parent, u64 owning_root)
+{
+ generic_ref->action = action;
+ generic_ref->bytenr = bytenr;
+ generic_ref->len = len;
+ generic_ref->parent = parent;
+ generic_ref->owning_root = owning_root;
+}
+
+void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, int level, u64 root,
+ u64 mod_root, bool skip_qgroup)
+{
+#ifdef CONFIG_BTRFS_FS_REF_VERIFY
+ /* If @real_root not set, use @root as fallback */
+ generic_ref->real_root = mod_root ?: root;
+#endif
+ generic_ref->tree_ref.level = level;
+ generic_ref->tree_ref.ref_root = root;
+ generic_ref->type = BTRFS_REF_METADATA;
+ if (skip_qgroup || !(is_fstree(root) &&
+ (!mod_root || is_fstree(mod_root))))
+ generic_ref->skip_qgroup = true;
+ else
+ generic_ref->skip_qgroup = false;
+
+}
+
+void btrfs_init_data_ref(struct btrfs_ref *generic_ref, u64 ref_root, u64 ino,
+ u64 offset, u64 mod_root, bool skip_qgroup)
+{
+#ifdef CONFIG_BTRFS_FS_REF_VERIFY
+ /* If @real_root not set, use @root as fallback */
+ generic_ref->real_root = mod_root ?: ref_root;
+#endif
+ generic_ref->data_ref.ref_root = ref_root;
+ generic_ref->data_ref.ino = ino;
+ generic_ref->data_ref.offset = offset;
+ generic_ref->type = BTRFS_REF_DATA;
+ if (skip_qgroup || !(is_fstree(ref_root) &&
+ (!mod_root || is_fstree(mod_root))))
+ generic_ref->skip_qgroup = true;
+ else
+ generic_ref->skip_qgroup = false;
+}
+
/*
* add a delayed tree ref. This does all of the accounting required
* to make sure the delayed ref is eventually processed before this
@@ -1220,6 +1266,25 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
return 0;
}
+void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
+{
+ if (refcount_dec_and_test(&ref->refs)) {
+ WARN_ON(!RB_EMPTY_NODE(&ref->ref_node));
+ switch (ref->type) {
+ case BTRFS_TREE_BLOCK_REF_KEY:
+ case BTRFS_SHARED_BLOCK_REF_KEY:
+ kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
+ break;
+ case BTRFS_EXTENT_DATA_REF_KEY:
+ case BTRFS_SHARED_DATA_REF_KEY:
+ kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
+ break;
+ default:
+ BUG();
+ }
+ }
+}
+
/*
* This does a simple search for the head node for a given extent. Returns the
* head node if found, or NULL if not.
@@ -1242,31 +1307,19 @@ void __cold btrfs_delayed_ref_exit(void)
int __init btrfs_delayed_ref_init(void)
{
- btrfs_delayed_ref_head_cachep = kmem_cache_create(
- "btrfs_delayed_ref_head",
- sizeof(struct btrfs_delayed_ref_head), 0,
- SLAB_MEM_SPREAD, NULL);
+ btrfs_delayed_ref_head_cachep = KMEM_CACHE(btrfs_delayed_ref_head, 0);
if (!btrfs_delayed_ref_head_cachep)
goto fail;
- btrfs_delayed_tree_ref_cachep = kmem_cache_create(
- "btrfs_delayed_tree_ref",
- sizeof(struct btrfs_delayed_tree_ref), 0,
- SLAB_MEM_SPREAD, NULL);
+ btrfs_delayed_tree_ref_cachep = KMEM_CACHE(btrfs_delayed_tree_ref, 0);
if (!btrfs_delayed_tree_ref_cachep)
goto fail;
- btrfs_delayed_data_ref_cachep = kmem_cache_create(
- "btrfs_delayed_data_ref",
- sizeof(struct btrfs_delayed_data_ref), 0,
- SLAB_MEM_SPREAD, NULL);
+ btrfs_delayed_data_ref_cachep = KMEM_CACHE(btrfs_delayed_data_ref, 0);
if (!btrfs_delayed_data_ref_cachep)
goto fail;
- btrfs_delayed_extent_op_cachep = kmem_cache_create(
- "btrfs_delayed_extent_op",
- sizeof(struct btrfs_delayed_extent_op), 0,
- SLAB_MEM_SPREAD, NULL);
+ btrfs_delayed_extent_op_cachep = KMEM_CACHE(btrfs_delayed_extent_op, 0);
if (!btrfs_delayed_extent_op_cachep)
goto fail;
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 62d679d40f4f..b291147cb8ab 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -6,7 +6,17 @@
#ifndef BTRFS_DELAYED_REF_H
#define BTRFS_DELAYED_REF_H
+#include <linux/types.h>
#include <linux/refcount.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <uapi/linux/btrfs_tree.h>
+
+struct btrfs_trans_handle;
+struct btrfs_fs_info;
/* these are the possible values of struct btrfs_delayed_ref_node->action */
enum btrfs_delayed_ref_action {
@@ -308,53 +318,12 @@ static inline u64 btrfs_calc_delayed_ref_csum_bytes(const struct btrfs_fs_info *
return btrfs_calc_metadata_size(fs_info, num_csum_items);
}
-static inline void btrfs_init_generic_ref(struct btrfs_ref *generic_ref,
- int action, u64 bytenr, u64 len,
- u64 parent, u64 owning_root)
-{
- generic_ref->action = action;
- generic_ref->bytenr = bytenr;
- generic_ref->len = len;
- generic_ref->parent = parent;
- generic_ref->owning_root = owning_root;
-}
-
-static inline void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, int level,
- u64 root, u64 mod_root, bool skip_qgroup)
-{
-#ifdef CONFIG_BTRFS_FS_REF_VERIFY
- /* If @real_root not set, use @root as fallback */
- generic_ref->real_root = mod_root ?: root;
-#endif
- generic_ref->tree_ref.level = level;
- generic_ref->tree_ref.ref_root = root;
- generic_ref->type = BTRFS_REF_METADATA;
- if (skip_qgroup || !(is_fstree(root) &&
- (!mod_root || is_fstree(mod_root))))
- generic_ref->skip_qgroup = true;
- else
- generic_ref->skip_qgroup = false;
-
-}
-
-static inline void btrfs_init_data_ref(struct btrfs_ref *generic_ref,
- u64 ref_root, u64 ino, u64 offset, u64 mod_root,
- bool skip_qgroup)
-{
-#ifdef CONFIG_BTRFS_FS_REF_VERIFY
- /* If @real_root not set, use @root as fallback */
- generic_ref->real_root = mod_root ?: ref_root;
-#endif
- generic_ref->data_ref.ref_root = ref_root;
- generic_ref->data_ref.ino = ino;
- generic_ref->data_ref.offset = offset;
- generic_ref->type = BTRFS_REF_DATA;
- if (skip_qgroup || !(is_fstree(ref_root) &&
- (!mod_root || is_fstree(mod_root))))
- generic_ref->skip_qgroup = true;
- else
- generic_ref->skip_qgroup = false;
-}
+void btrfs_init_generic_ref(struct btrfs_ref *generic_ref, int action, u64 bytenr,
+ u64 len, u64 parent, u64 owning_root);
+void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, int level, u64 root,
+ u64 mod_root, bool skip_qgroup);
+void btrfs_init_data_ref(struct btrfs_ref *generic_ref, u64 ref_root, u64 ino,
+ u64 offset, u64 mod_root, bool skip_qgroup);
static inline struct btrfs_delayed_extent_op *
btrfs_alloc_delayed_extent_op(void)
@@ -369,24 +338,7 @@ btrfs_free_delayed_extent_op(struct btrfs_delayed_extent_op *op)
kmem_cache_free(btrfs_delayed_extent_op_cachep, op);
}
-static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
-{
- if (refcount_dec_and_test(&ref->refs)) {
- WARN_ON(!RB_EMPTY_NODE(&ref->ref_node));
- switch (ref->type) {
- case BTRFS_TREE_BLOCK_REF_KEY:
- case BTRFS_SHARED_BLOCK_REF_KEY:
- kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
- break;
- case BTRFS_EXTENT_DATA_REF_KEY:
- case BTRFS_SHARED_DATA_REF_KEY:
- kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
- break;
- default:
- BUG();
- }
- }
-}
+void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref);
static inline u64 btrfs_ref_head_to_space_flags(
struct btrfs_delayed_ref_head *head_ref)
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index fb33027e5a4c..7696beec4c21 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -11,10 +11,8 @@
#include <linux/math64.h>
#include "misc.h"
#include "ctree.h"
-#include "extent_map.h"
#include "disk-io.h"
#include "transaction.h"
-#include "print-tree.h"
#include "volumes.h"
#include "async-thread.h"
#include "dev-replace.h"
@@ -1000,8 +998,7 @@ error:
btrfs_sysfs_remove_device(src_device);
btrfs_sysfs_update_devid(tgt_device);
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &src_device->dev_state))
- btrfs_scratch_superblocks(fs_info, src_device->bdev,
- src_device->name->str);
+ btrfs_scratch_superblocks(fs_info, src_device);
/* write back the superblocks */
trans = btrfs_start_transaction(root, 0);
diff --git a/fs/btrfs/dev-replace.h b/fs/btrfs/dev-replace.h
index 675082ccec89..23e480efe5e6 100644
--- a/fs/btrfs/dev-replace.h
+++ b/fs/btrfs/dev-replace.h
@@ -6,11 +6,15 @@
#ifndef BTRFS_DEV_REPLACE_H
#define BTRFS_DEV_REPLACE_H
+#include <linux/types.h>
+#include <linux/compiler_types.h>
+
struct btrfs_ioctl_dev_replace_args;
struct btrfs_fs_info;
struct btrfs_trans_handle;
struct btrfs_dev_replace;
struct btrfs_block_group;
+struct btrfs_device;
int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info);
int btrfs_run_dev_replace(struct btrfs_trans_handle *trans);
diff --git a/fs/btrfs/dir-item.h b/fs/btrfs/dir-item.h
index e40a226373d7..00b3d83d7569 100644
--- a/fs/btrfs/dir-item.h
+++ b/fs/btrfs/dir-item.h
@@ -3,9 +3,15 @@
#ifndef BTRFS_DIR_ITEM_H
#define BTRFS_DIR_ITEM_H
+#include <linux/types.h>
#include <linux/crc32c.h>
struct fscrypt_str;
+struct btrfs_fs_info;
+struct btrfs_key;
+struct btrfs_path;
+struct btrfs_root;
+struct btrfs_trans_handle;
int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
const struct fscrypt_str *name);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c843563914ca..3df5477d48a8 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -29,7 +29,6 @@
#include "tree-log.h"
#include "free-space-cache.h"
#include "free-space-tree.h"
-#include "rcu-string.h"
#include "dev-replace.h"
#include "raid56.h"
#include "sysfs.h"
@@ -193,7 +192,7 @@ static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb,
struct folio *folio = eb->folios[i];
u64 start = max_t(u64, eb->start, folio_pos(folio));
u64 end = min_t(u64, eb->start + eb->len,
- folio_pos(folio) + folio_size(folio));
+ folio_pos(folio) + eb->folio_size);
u32 len = end - start;
ret = btrfs_repair_io_failure(fs_info, 0, start, len,
@@ -498,15 +497,15 @@ static int btree_migrate_folio(struct address_space *mapping,
static int btree_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
- struct btrfs_fs_info *fs_info;
int ret;
if (wbc->sync_mode == WB_SYNC_NONE) {
+ struct btrfs_fs_info *fs_info;
if (wbc->for_kupdate)
return 0;
- fs_info = BTRFS_I(mapping->host)->root->fs_info;
+ fs_info = inode_to_fs_info(mapping->host);
/* this is a bit racy, but that's ok */
ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes,
BTRFS_DIRTY_METADATA_THRESH,
@@ -529,11 +528,12 @@ static void btree_invalidate_folio(struct folio *folio, size_t offset,
size_t length)
{
struct extent_io_tree *tree;
- tree = &BTRFS_I(folio->mapping->host)->io_tree;
+
+ tree = &folio_to_inode(folio)->io_tree;
extent_invalidate_folio(tree, folio, offset);
btree_release_folio(folio, GFP_NOFS);
if (folio_get_private(folio)) {
- btrfs_warn(BTRFS_I(folio->mapping->host)->root->fs_info,
+ btrfs_warn(folio_to_fs_info(folio),
"folio private not zero on folio %llu",
(unsigned long long)folio_pos(folio));
folio_detach_private(folio);
@@ -544,7 +544,7 @@ static void btree_invalidate_folio(struct folio *folio, size_t offset,
static bool btree_dirty_folio(struct address_space *mapping,
struct folio *folio)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(mapping->host->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(mapping->host);
struct btrfs_subpage_info *spi = fs_info->subpage_info;
struct btrfs_subpage *subpage;
struct extent_buffer *eb;
@@ -1244,6 +1244,7 @@ void btrfs_check_leaked_roots(struct btrfs_fs_info *fs_info)
btrfs_err(fs_info, "leaked root %s refcount %d",
btrfs_root_name(&root->root_key, buf),
refcount_read(&root->refs));
+ WARN_ON_ONCE(1);
while (refcount_read(&root->refs) > 1)
btrfs_put_root(root);
btrfs_put_root(root);
@@ -2239,7 +2240,7 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
struct btrfs_key location;
int ret;
- BUG_ON(!fs_info->tree_root);
+ ASSERT(fs_info->tree_root);
ret = load_global_roots(tree_root);
if (ret)
@@ -2839,6 +2840,7 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block
int ret;
fs_info->sb = sb;
+ /* Temporary fixed values for block size until we read the superblock. */
sb->s_blocksize = BTRFS_BDEV_BLOCKSIZE;
sb->s_blocksize_bits = blksize_bits(BTRFS_BDEV_BLOCKSIZE);
@@ -3356,6 +3358,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super);
sb->s_bdi->ra_pages = max(sb->s_bdi->ra_pages, SZ_4M / PAGE_SIZE);
+ /* Update the values for the current filesystem. */
sb->s_blocksize = sectorsize;
sb->s_blocksize_bits = blksize_bits(sectorsize);
memcpy(&sb->s_uuid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE);
@@ -4626,7 +4629,7 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
struct inode *inode = NULL;
btrfs_inode = list_first_entry(&splice, struct btrfs_inode,
delalloc_inodes);
- __btrfs_del_delalloc_inode(root, btrfs_inode);
+ btrfs_del_delalloc_inode(btrfs_inode);
spin_unlock(&root->delalloc_lock);
/*
@@ -4925,7 +4928,14 @@ int btrfs_init_root_free_objectid(struct btrfs_root *root)
ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
if (ret < 0)
goto error;
- BUG_ON(ret == 0); /* Corruption */
+ if (ret == 0) {
+ /*
+ * Key with offset -1 found, there would have to exist a root
+ * with such id, but this is out of valid range.
+ */
+ ret = -EUCLEAN;
+ goto error;
+ }
if (path->slots[0] > 0) {
slot = path->slots[0] - 1;
l = path->nodes[0];
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index eb3473d1c1ac..76eb53fe7a11 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -6,6 +6,22 @@
#ifndef BTRFS_DISK_IO_H
#define BTRFS_DISK_IO_H
+#include <linux/sizes.h>
+#include <linux/compiler_types.h>
+#include "ctree.h"
+#include "fs.h"
+
+struct block_device;
+struct super_block;
+struct extent_buffer;
+struct btrfs_device;
+struct btrfs_fs_devices;
+struct btrfs_fs_info;
+struct btrfs_super_block;
+struct btrfs_trans_handle;
+struct btrfs_tree_parent_check;
+struct btrfs_transaction;
+
#define BTRFS_SUPER_MIRROR_MAX 3
#define BTRFS_SUPER_MIRROR_SHIFT 12
@@ -25,10 +41,6 @@ static inline u64 btrfs_sb_offset(int mirror)
return BTRFS_SUPER_INFO_OFFSET;
}
-struct btrfs_device;
-struct btrfs_fs_devices;
-struct btrfs_tree_parent_check;
-
void btrfs_check_leaked_roots(struct btrfs_fs_info *fs_info);
void btrfs_init_fs_info(struct btrfs_fs_info *fs_info);
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 744a02b7fd67..8398d345ec5b 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -5,7 +5,6 @@
#include "ctree.h"
#include "disk-io.h"
#include "btrfs_inode.h"
-#include "print-tree.h"
#include "export.h"
#include "accessors.h"
#include "super.h"
@@ -174,8 +173,15 @@ struct dentry *btrfs_get_parent(struct dentry *child)
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto fail;
+ if (ret == 0) {
+ /*
+ * Key with offset of -1 found, there would have to exist an
+ * inode with such number or a root with such id.
+ */
+ ret = -EUCLEAN;
+ goto fail;
+ }
- BUG_ON(ret == 0); /* Key with offset of -1 found */
if (path->slots[0] == 0) {
ret = -ENOENT;
goto fail;
@@ -215,7 +221,7 @@ static int btrfs_get_name(struct dentry *parent, char *name,
{
struct inode *inode = d_inode(child);
struct inode *dir = d_inode(parent);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_path *path;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_inode_ref *iref;
diff --git a/fs/btrfs/export.h b/fs/btrfs/export.h
index eba6bc4f5a61..464582273af9 100644
--- a/fs/btrfs/export.h
+++ b/fs/btrfs/export.h
@@ -4,6 +4,10 @@
#define BTRFS_EXPORT_H
#include <linux/exportfs.h>
+#include <linux/types.h>
+
+struct dentry;
+struct super_block;
extern const struct export_operations btrfs_export_ops;
diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c
index e3ee5449cc4a..c09b428823d7 100644
--- a/fs/btrfs/extent-io-tree.c
+++ b/fs/btrfs/extent-io-tree.c
@@ -6,7 +6,6 @@
#include "ctree.h"
#include "extent-io-tree.h"
#include "btrfs_inode.h"
-#include "misc.h"
static struct kmem_cache *extent_state_cache;
@@ -48,6 +47,7 @@ static inline void btrfs_extent_state_leak_debug_check(void)
extent_state_in_tree(state),
refcount_read(&state->refs));
list_del(&state->leak_list);
+ WARN_ON_ONCE(1);
kmem_cache_free(extent_state_cache, state);
}
}
@@ -1883,8 +1883,8 @@ void __cold extent_state_free_cachep(void)
int __init extent_state_init_cachep(void)
{
extent_state_cache = kmem_cache_create("btrfs_extent_state",
- sizeof(struct extent_state), 0,
- SLAB_MEM_SPREAD, NULL);
+ sizeof(struct extent_state), 0, 0,
+ NULL);
if (!extent_state_cache)
return -ENOMEM;
diff --git a/fs/btrfs/extent-io-tree.h b/fs/btrfs/extent-io-tree.h
index ebe6390d65e9..9d3a52d8f59a 100644
--- a/fs/btrfs/extent-io-tree.h
+++ b/fs/btrfs/extent-io-tree.h
@@ -3,9 +3,16 @@
#ifndef BTRFS_EXTENT_IO_TREE_H
#define BTRFS_EXTENT_IO_TREE_H
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/refcount.h>
+#include <linux/list.h>
+#include <linux/wait.h>
#include "misc.h"
struct extent_changeset;
+struct btrfs_fs_info;
+struct btrfs_inode;
/* Bits for the extent state */
enum {
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 8e8cc1111277..beedd6ed64d3 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -18,7 +18,7 @@
#include <linux/crc32c.h>
#include "ctree.h"
#include "extent-tree.h"
-#include "tree-log.h"
+#include "transaction.h"
#include "disk-io.h"
#include "print-tree.h"
#include "volumes.h"
@@ -26,14 +26,11 @@
#include "locking.h"
#include "free-space-cache.h"
#include "free-space-tree.h"
-#include "sysfs.h"
#include "qgroup.h"
#include "ref-verify.h"
#include "space-info.h"
#include "block-rsv.h"
-#include "delalloc-space.h"
#include "discard.h"
-#include "rcu-string.h"
#include "zoned.h"
#include "dev-replace.h"
#include "fs.h"
@@ -2399,7 +2396,14 @@ static noinline int check_committed_ref(struct btrfs_root *root,
ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
if (ret < 0)
goto out;
- BUG_ON(ret == 0); /* Corruption */
+ if (ret == 0) {
+ /*
+ * Key with offset -1 found, there would have to exist an extent
+ * item with such offset, but this is out of the valid range.
+ */
+ ret = -EUCLEAN;
+ goto out;
+ }
ret = -ENOENT;
if (path->slots[0] == 0)
@@ -2780,6 +2784,7 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
u64 total_unpinned = 0;
u64 empty_cluster = 0;
bool readonly;
+ int ret = 0;
while (start <= end) {
readonly = false;
@@ -2789,7 +2794,11 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
btrfs_put_block_group(cache);
total_unpinned = 0;
cache = btrfs_lookup_block_group(fs_info, start);
- BUG_ON(!cache); /* Logic error */
+ if (cache == NULL) {
+ /* Logic error, something removed the block group. */
+ ret = -EUCLEAN;
+ goto out;
+ }
cluster = fetch_cluster_info(fs_info,
cache->space_info,
@@ -2858,7 +2867,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
if (cache)
btrfs_put_block_group(cache);
- return 0;
+out:
+ return ret;
}
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
@@ -2888,7 +2898,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
end + 1 - start, NULL);
clear_extent_dirty(unpin, start, end, &cached_state);
- unpin_extent_range(fs_info, start, end, true);
+ ret = unpin_extent_range(fs_info, start, end, true);
+ BUG_ON(ret);
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
free_extent_state(cached_state);
cond_resched();
@@ -3447,16 +3458,17 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
u64 parent, int last_ref)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_ref generic_ref = { 0 };
struct btrfs_block_group *bg;
int ret;
- btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
- buf->start, buf->len, parent, btrfs_header_owner(buf));
- btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
- root_id, 0, false);
-
if (root_id != BTRFS_TREE_LOG_OBJECTID) {
+ struct btrfs_ref generic_ref = { 0 };
+
+ btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
+ buf->start, buf->len, parent,
+ btrfs_header_owner(buf));
+ btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
+ root_id, 0, false);
btrfs_ref_tree_mod(fs_info, &generic_ref);
ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL);
BUG_ON(ret); /* -ENOMEM */
@@ -4950,7 +4962,7 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
u64 root_objectid = root->root_key.objectid;
u64 owning_root = root_objectid;
- BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
+ ASSERT(root_objectid != BTRFS_TREE_LOG_OBJECTID);
if (btrfs_is_data_reloc_root(root) && is_fstree(root->relocation_src_root))
owning_root = root->relocation_src_root;
@@ -6167,10 +6179,13 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
return ret;
}
-int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
- u64 start, u64 end)
+/*
+ * Unpin the extent range in an error context and don't add the space back.
+ * Errors are not propagated further.
+ */
+void btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, u64 start, u64 end)
{
- return unpin_extent_range(fs_info, start, end, false);
+ unpin_extent_range(fs_info, start, end, false);
}
/*
diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h
index 2e066035ccee..af9f8800d5ac 100644
--- a/fs/btrfs/extent-tree.h
+++ b/fs/btrfs/extent-tree.h
@@ -3,11 +3,21 @@
#ifndef BTRFS_EXTENT_TREE_H
#define BTRFS_EXTENT_TREE_H
+#include <linux/types.h>
#include "misc.h"
#include "block-group.h"
+#include "locking.h"
+struct extent_buffer;
struct btrfs_free_cluster;
+struct btrfs_fs_info;
+struct btrfs_root;
+struct btrfs_path;
+struct btrfs_ref;
+struct btrfs_disk_key;
struct btrfs_delayed_ref_head;
+struct btrfs_delayed_ref_root;
+struct btrfs_extent_inline_ref;
enum btrfs_extent_allocation_policy {
BTRFS_EXTENT_ALLOC_CLUSTERED,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 8b4bef05e222..7441245b1ceb 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -14,7 +14,6 @@
#include <linux/pagevec.h>
#include <linux/prefetch.h>
#include <linux/fsverity.h>
-#include "misc.h"
#include "extent_io.h"
#include "extent-io-tree.h"
#include "extent_map.h"
@@ -22,7 +21,6 @@
#include "btrfs_inode.h"
#include "bio.h"
#include "locking.h"
-#include "rcu-string.h"
#include "backref.h"
#include "disk-io.h"
#include "subpage.h"
@@ -78,10 +76,11 @@ void btrfs_extent_buffer_leak_debug_check(struct btrfs_fs_info *fs_info)
eb = list_first_entry(&fs_info->allocated_ebs,
struct extent_buffer, leak_list);
pr_err(
- "BTRFS: buffer leak start %llu len %lu refs %d bflags %lu owner %llu\n",
+ "BTRFS: buffer leak start %llu len %u refs %d bflags %lu owner %llu\n",
eb->start, eb->len, atomic_read(&eb->refs), eb->bflags,
btrfs_header_owner(eb));
list_del(&eb->leak_list);
+ WARN_ON_ONCE(1);
kmem_cache_free(extent_buffer_cache, eb);
}
spin_unlock_irqrestore(&fs_info->eb_leak_lock, flags);
@@ -147,8 +146,8 @@ static void submit_write_bio(struct btrfs_bio_ctrl *bio_ctrl, int ret)
int __init extent_buffer_init_cachep(void)
{
extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer",
- sizeof(struct extent_buffer), 0,
- SLAB_MEM_SPREAD, NULL);
+ sizeof(struct extent_buffer), 0, 0,
+ NULL);
if (!extent_buffer_cache)
return -ENOMEM;
@@ -207,7 +206,7 @@ static void __process_pages_contig(struct address_space *mapping,
struct page *locked_page, u64 start, u64 end,
unsigned long page_ops)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(mapping->host->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(mapping->host);
pgoff_t start_index = start >> PAGE_SHIFT;
pgoff_t end_index = end >> PAGE_SHIFT;
pgoff_t index = start_index;
@@ -251,7 +250,7 @@ static noinline int lock_delalloc_pages(struct inode *inode,
u64 start,
u64 end)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct address_space *mapping = inode->i_mapping;
pgoff_t start_index = start >> PAGE_SHIFT;
pgoff_t end_index = end >> PAGE_SHIFT;
@@ -323,7 +322,7 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
struct page *locked_page, u64 *start,
u64 *end)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
const u64 orig_start = *start;
const u64 orig_end = *end;
@@ -433,7 +432,7 @@ static bool btrfs_verify_page(struct page *page, u64 start)
static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
+ struct btrfs_fs_info *fs_info = page_to_fs_info(page);
struct folio *folio = page_folio(page);
ASSERT(page_offset(page) <= start &&
@@ -462,16 +461,15 @@ static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
*/
static void end_bbio_data_write(struct btrfs_bio *bbio)
{
+ struct btrfs_fs_info *fs_info = bbio->fs_info;
struct bio *bio = &bbio->bio;
int error = blk_status_to_errno(bio->bi_status);
struct folio_iter fi;
+ const u32 sectorsize = fs_info->sectorsize;
ASSERT(!bio_flagged(bio, BIO_CLONED));
bio_for_each_folio_all(fi, bio) {
struct folio *folio = fi.folio;
- struct inode *inode = folio->mapping->host;
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- const u32 sectorsize = fs_info->sectorsize;
u64 start = folio_pos(folio) + fi.offset;
u32 len = fi.length;
@@ -593,22 +591,17 @@ static void begin_page_read(struct btrfs_fs_info *fs_info, struct page *page)
*/
static void end_bbio_data_read(struct btrfs_bio *bbio)
{
+ struct btrfs_fs_info *fs_info = bbio->fs_info;
struct bio *bio = &bbio->bio;
struct processed_extent processed = { 0 };
struct folio_iter fi;
- /*
- * The offset to the beginning of a bio, since one bio can never be
- * larger than UINT_MAX, u32 here is enough.
- */
- u32 bio_offset = 0;
+ const u32 sectorsize = fs_info->sectorsize;
ASSERT(!bio_flagged(bio, BIO_CLONED));
bio_for_each_folio_all(fi, &bbio->bio) {
bool uptodate = !bio->bi_status;
struct folio *folio = fi.folio;
struct inode *inode = folio->mapping->host;
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- const u32 sectorsize = fs_info->sectorsize;
u64 start;
u64 end;
u32 len;
@@ -667,10 +660,6 @@ static void end_bbio_data_read(struct btrfs_bio *bbio)
end_page_read(folio_page(folio, 0), uptodate, start, len);
endio_readpage_release_extent(&processed, BTRFS_I(inode),
start, end, uptodate);
-
- ASSERT(bio_offset + len > bio_offset);
- bio_offset += len;
-
}
/* Release the last extent */
endio_readpage_release_extent(&processed, NULL, 0, 0, false);
@@ -738,6 +727,8 @@ static int alloc_eb_folio_array(struct extent_buffer *eb, gfp_t extra_gfp)
for (int i = 0; i < num_pages; i++)
eb->folios[i] = page_folio(page_array[i]);
+ eb->folio_size = PAGE_SIZE;
+ eb->folio_shift = PAGE_SHIFT;
return 0;
}
@@ -827,7 +818,7 @@ static void submit_extent_page(struct btrfs_bio_ctrl *bio_ctrl,
u64 disk_bytenr, struct page *page,
size_t size, unsigned long pg_offset)
{
- struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
+ struct btrfs_inode *inode = page_to_inode(page);
ASSERT(pg_offset + size <= PAGE_SIZE);
ASSERT(bio_ctrl->end_io_func);
@@ -936,17 +927,21 @@ static int attach_extent_buffer_folio(struct extent_buffer *eb,
int set_page_extent_mapped(struct page *page)
{
- struct folio *folio = page_folio(page);
+ return set_folio_extent_mapped(page_folio(page));
+}
+
+int set_folio_extent_mapped(struct folio *folio)
+{
struct btrfs_fs_info *fs_info;
- ASSERT(page->mapping);
+ ASSERT(folio->mapping);
if (folio_test_private(folio))
return 0;
- fs_info = btrfs_sb(page->mapping->host->i_sb);
+ fs_info = folio_to_fs_info(folio);
- if (btrfs_is_subpage(fs_info, page->mapping))
+ if (btrfs_is_subpage(fs_info, folio->mapping))
return btrfs_attach_subpage(fs_info, folio, BTRFS_SUBPAGE_DATA);
folio_attach_private(folio, (void *)EXTENT_FOLIO_PRIVATE);
@@ -963,20 +958,21 @@ void clear_page_extent_mapped(struct page *page)
if (!folio_test_private(folio))
return;
- fs_info = btrfs_sb(page->mapping->host->i_sb);
+ fs_info = page_to_fs_info(page);
if (btrfs_is_subpage(fs_info, page->mapping))
return btrfs_detach_subpage(fs_info, folio);
folio_detach_private(folio);
}
-static struct extent_map *
-__get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
+static struct extent_map *__get_extent_map(struct inode *inode, struct page *page,
u64 start, u64 len, struct extent_map **em_cached)
{
struct extent_map *em;
- if (em_cached && *em_cached) {
+ ASSERT(em_cached);
+
+ if (*em_cached) {
em = *em_cached;
if (extent_map_in_tree(em) && start >= em->start &&
start < extent_map_end(em)) {
@@ -988,8 +984,8 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
*em_cached = NULL;
}
- em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, start, len);
- if (em_cached && !IS_ERR(em)) {
+ em = btrfs_get_extent(BTRFS_I(inode), page, start, len);
+ if (!IS_ERR(em)) {
BUG_ON(*em_cached);
refcount_inc(&em->refs);
*em_cached = em;
@@ -1007,7 +1003,7 @@ static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
struct btrfs_bio_ctrl *bio_ctrl, u64 *prev_em_start)
{
struct inode *inode = page->mapping->host;
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
u64 start = page_offset(page);
const u64 end = start + PAGE_SIZE - 1;
u64 cur = start;
@@ -1018,7 +1014,7 @@ static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
int ret = 0;
size_t pg_offset = 0;
size_t iosize;
- size_t blocksize = inode->i_sb->s_blocksize;
+ size_t blocksize = fs_info->sectorsize;
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
ret = set_page_extent_mapped(page);
@@ -1051,8 +1047,7 @@ static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
end_page_read(page, true, cur, iosize);
break;
}
- em = __get_extent_map(inode, page, pg_offset, cur,
- end - cur + 1, em_cached);
+ em = __get_extent_map(inode, page, cur, end - cur + 1, em_cached);
if (IS_ERR(em)) {
unlock_extent(tree, cur, end, NULL);
end_page_read(page, false, cur, end + 1 - cur);
@@ -1157,15 +1152,18 @@ static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
int btrfs_read_folio(struct file *file, struct folio *folio)
{
struct page *page = &folio->page;
- struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
+ struct btrfs_inode *inode = page_to_inode(page);
u64 start = page_offset(page);
u64 end = start + PAGE_SIZE - 1;
struct btrfs_bio_ctrl bio_ctrl = { .opf = REQ_OP_READ };
+ struct extent_map *em_cached = NULL;
int ret;
btrfs_lock_and_flush_ordered_range(inode, start, end, NULL);
- ret = btrfs_do_readpage(page, NULL, &bio_ctrl, NULL);
+ ret = btrfs_do_readpage(page, &em_cached, &bio_ctrl, NULL);
+ free_extent_map(em_cached);
+
/*
* If btrfs_do_readpage() failed we will want to submit the assembled
* bio to do the cleanup.
@@ -1180,9 +1178,11 @@ static inline void contiguous_readpages(struct page *pages[], int nr_pages,
struct btrfs_bio_ctrl *bio_ctrl,
u64 *prev_em_start)
{
- struct btrfs_inode *inode = BTRFS_I(pages[0]->mapping->host);
+ struct btrfs_inode *inode = page_to_inode(pages[0]);
int index;
+ ASSERT(em_cached);
+
btrfs_lock_and_flush_ordered_range(inode, start, end, NULL);
for (index = 0; index < nr_pages; index++) {
@@ -1371,7 +1371,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
continue;
}
- em = btrfs_get_extent(inode, NULL, 0, cur, len);
+ em = btrfs_get_extent(inode, NULL, cur, len);
if (IS_ERR(em)) {
ret = PTR_ERR_OR_ZERO(em);
goto out_error;
@@ -1739,10 +1739,10 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb,
folio_lock(folio);
folio_clear_dirty_for_io(folio);
folio_start_writeback(folio);
- ret = bio_add_folio(&bbio->bio, folio, folio_size(folio), 0);
+ ret = bio_add_folio(&bbio->bio, folio, eb->folio_size, 0);
ASSERT(ret);
wbc_account_cgroup_owner(wbc, folio_page(folio, 0),
- folio_size(folio));
+ eb->folio_size);
wbc->nr_to_write -= folio_nr_pages(folio);
folio_unlock(folio);
}
@@ -1766,7 +1766,7 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb,
*/
static int submit_eb_subpage(struct page *page, struct writeback_control *wbc)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
+ struct btrfs_fs_info *fs_info = page_to_fs_info(page);
struct folio *folio = page_folio(page);
int submitted = 0;
u64 page_start = page_offset(page);
@@ -1857,7 +1857,7 @@ static int submit_eb_page(struct page *page, struct btrfs_eb_write_context *ctx)
if (!folio_test_private(folio))
return 0;
- if (btrfs_sb(page->mapping->host->i_sb)->nodesize < PAGE_SIZE)
+ if (page_to_fs_info(page)->nodesize < PAGE_SIZE)
return submit_eb_subpage(page, wbc);
spin_lock(&mapping->i_private_lock);
@@ -1915,7 +1915,7 @@ int btree_write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct btrfs_eb_write_context ctx = { .wbc = wbc };
- struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(mapping->host);
int ret = 0;
int done = 0;
int nr_to_write_done = 0;
@@ -2203,7 +2203,7 @@ void extent_write_locked_range(struct inode *inode, struct page *locked_page,
bool found_error = false;
int ret = 0;
struct address_space *mapping = inode->i_mapping;
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
const u32 sectorsize = fs_info->sectorsize;
loff_t i_size = i_size_read(inode);
u64 cur = start;
@@ -2309,7 +2309,7 @@ int extent_invalidate_folio(struct extent_io_tree *tree,
struct extent_state *cached_state = NULL;
u64 start = folio_pos(folio);
u64 end = start + folio_size(folio) - 1;
- size_t blocksize = folio->mapping->host->i_sb->s_blocksize;
+ size_t blocksize = folio_to_fs_info(folio)->sectorsize;
/* This function is only called for the btree inode */
ASSERT(tree->owner == IO_TREE_BTREE_INODE_IO);
@@ -2378,7 +2378,7 @@ int try_release_extent_mapping(struct page *page, gfp_t mask)
struct extent_map *em;
u64 start = page_offset(page);
u64 end = start + PAGE_SIZE - 1;
- struct btrfs_inode *btrfs_inode = BTRFS_I(page->mapping->host);
+ struct btrfs_inode *btrfs_inode = page_to_inode(page);
struct extent_io_tree *tree = &btrfs_inode->io_tree;
struct extent_map_tree *map = &btrfs_inode->extent_tree;
@@ -2453,12 +2453,65 @@ next:
return try_release_extent_state(tree, page, mask);
}
+struct btrfs_fiemap_entry {
+ u64 offset;
+ u64 phys;
+ u64 len;
+ u32 flags;
+};
+
/*
- * To cache previous fiemap extent
+ * Indicate the caller of emit_fiemap_extent() that it needs to unlock the file
+ * range from the inode's io tree, unlock the subvolume tree search path, flush
+ * the fiemap cache and relock the file range and research the subvolume tree.
+ * The value here is something negative that can't be confused with a valid
+ * errno value and different from 1 because that's also a return value from
+ * fiemap_fill_next_extent() and also it's often used to mean some btree search
+ * did not find a key, so make it some distinct negative value.
+ */
+#define BTRFS_FIEMAP_FLUSH_CACHE (-(MAX_ERRNO + 1))
+
+/*
+ * Used to:
+ *
+ * - Cache the next entry to be emitted to the fiemap buffer, so that we can
+ * merge extents that are contiguous and can be grouped as a single one;
*
- * Will be used for merging fiemap extent
+ * - Store extents ready to be written to the fiemap buffer in an intermediary
+ * buffer. This intermediary buffer is to ensure that in case the fiemap
+ * buffer is memory mapped to the fiemap target file, we don't deadlock
+ * during btrfs_page_mkwrite(). This is because during fiemap we are locking
+ * an extent range in order to prevent races with delalloc flushing and
+ * ordered extent completion, which is needed in order to reliably detect
+ * delalloc in holes and prealloc extents. And this can lead to a deadlock
+ * if the fiemap buffer is memory mapped to the file we are running fiemap
+ * against (a silly, useless in practice scenario, but possible) because
+ * btrfs_page_mkwrite() will try to lock the same extent range.
*/
struct fiemap_cache {
+ /* An array of ready fiemap entries. */
+ struct btrfs_fiemap_entry *entries;
+ /* Number of entries in the entries array. */
+ int entries_size;
+ /* Index of the next entry in the entries array to write to. */
+ int entries_pos;
+ /*
+ * Once the entries array is full, this indicates what's the offset for
+ * the next file extent item we must search for in the inode's subvolume
+ * tree after unlocking the extent range in the inode's io tree and
+ * releasing the search path.
+ */
+ u64 next_search_offset;
+ /*
+ * This matches struct fiemap_extent_info::fi_mapped_extents, we use it
+ * to count ourselves emitted extents and stop instead of relying on
+ * fiemap_fill_next_extent() because we buffer ready fiemap entries at
+ * the @entries array, and we want to stop as soon as we hit the max
+ * amount of extents to map, not just to save time but also to make the
+ * logic at extent_fiemap() simpler.
+ */
+ unsigned int extents_mapped;
+ /* Fields for the cached extent (unsubmitted, not ready, extent). */
u64 offset;
u64 phys;
u64 len;
@@ -2466,6 +2519,28 @@ struct fiemap_cache {
bool cached;
};
+static int flush_fiemap_cache(struct fiemap_extent_info *fieinfo,
+ struct fiemap_cache *cache)
+{
+ for (int i = 0; i < cache->entries_pos; i++) {
+ struct btrfs_fiemap_entry *entry = &cache->entries[i];
+ int ret;
+
+ ret = fiemap_fill_next_extent(fieinfo, entry->offset,
+ entry->phys, entry->len,
+ entry->flags);
+ /*
+ * Ignore 1 (reached max entries) because we keep track of that
+ * ourselves in emit_fiemap_extent().
+ */
+ if (ret < 0)
+ return ret;
+ }
+ cache->entries_pos = 0;
+
+ return 0;
+}
+
/*
* Helper to submit fiemap extent.
*
@@ -2480,8 +2555,8 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
struct fiemap_cache *cache,
u64 offset, u64 phys, u64 len, u32 flags)
{
+ struct btrfs_fiemap_entry *entry;
u64 cache_end;
- int ret = 0;
/* Set at the end of extent_fiemap(). */
ASSERT((flags & FIEMAP_EXTENT_LAST) == 0);
@@ -2494,7 +2569,9 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
* find an extent that starts at an offset behind the end offset of the
* previous extent we processed. This happens if fiemap is called
* without FIEMAP_FLAG_SYNC and there are ordered extents completing
- * while we call btrfs_next_leaf() (through fiemap_next_leaf_item()).
+ * after we had to unlock the file range, release the search path, emit
+ * the fiemap extents stored in the buffer (cache->entries array) and
+ * the lock the remainder of the range and re-search the btree.
*
* For example we are in leaf X processing its last item, which is the
* file extent item for file range [512K, 1M[, and after
@@ -2607,11 +2684,35 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
emit:
/* Not mergeable, need to submit cached one */
- ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
- cache->len, cache->flags);
- cache->cached = false;
- if (ret)
- return ret;
+
+ if (cache->entries_pos == cache->entries_size) {
+ /*
+ * We will need to research for the end offset of the last
+ * stored extent and not from the current offset, because after
+ * unlocking the range and releasing the path, if there's a hole
+ * between that end offset and this current offset, a new extent
+ * may have been inserted due to a new write, so we don't want
+ * to miss it.
+ */
+ entry = &cache->entries[cache->entries_size - 1];
+ cache->next_search_offset = entry->offset + entry->len;
+ cache->cached = false;
+
+ return BTRFS_FIEMAP_FLUSH_CACHE;
+ }
+
+ entry = &cache->entries[cache->entries_pos];
+ entry->offset = cache->offset;
+ entry->phys = cache->phys;
+ entry->len = cache->len;
+ entry->flags = cache->flags;
+ cache->entries_pos++;
+ cache->extents_mapped++;
+
+ if (cache->extents_mapped == fieinfo->fi_extents_max) {
+ cache->cached = false;
+ return 1;
+ }
assign:
cache->cached = true;
cache->offset = offset;
@@ -2651,7 +2752,7 @@ static int emit_last_fiemap_cache(struct fiemap_extent_info *fieinfo,
static int fiemap_next_leaf_item(struct btrfs_inode *inode, struct btrfs_path *path)
{
- struct extent_buffer *clone;
+ struct extent_buffer *clone = path->nodes[0];
struct btrfs_key key;
int slot;
int ret;
@@ -2660,29 +2761,45 @@ static int fiemap_next_leaf_item(struct btrfs_inode *inode, struct btrfs_path *p
if (path->slots[0] < btrfs_header_nritems(path->nodes[0]))
return 0;
+ /*
+ * Add a temporary extra ref to an already cloned extent buffer to
+ * prevent btrfs_next_leaf() freeing it, we want to reuse it to avoid
+ * the cost of allocating a new one.
+ */
+ ASSERT(test_bit(EXTENT_BUFFER_UNMAPPED, &clone->bflags));
+ atomic_inc(&clone->refs);
+
ret = btrfs_next_leaf(inode->root, path);
if (ret != 0)
- return ret;
+ goto out;
/*
* Don't bother with cloning if there are no more file extent items for
* our inode.
*/
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
- if (key.objectid != btrfs_ino(inode) || key.type != BTRFS_EXTENT_DATA_KEY)
- return 1;
+ if (key.objectid != btrfs_ino(inode) || key.type != BTRFS_EXTENT_DATA_KEY) {
+ ret = 1;
+ goto out;
+ }
/* See the comment at fiemap_search_slot() about why we clone. */
- clone = btrfs_clone_extent_buffer(path->nodes[0]);
- if (!clone)
- return -ENOMEM;
+ copy_extent_buffer_full(clone, path->nodes[0]);
+ /*
+ * Important to preserve the start field, for the optimizations when
+ * checking if extents are shared (see extent_fiemap()).
+ */
+ clone->start = path->nodes[0]->start;
slot = path->slots[0];
btrfs_release_path(path);
path->nodes[0] = clone;
path->slots[0] = slot;
+out:
+ if (ret)
+ free_extent_buffer(clone);
- return 0;
+ return ret;
}
/*
@@ -2737,8 +2854,8 @@ static int fiemap_search_slot(struct btrfs_inode *inode, struct btrfs_path *path
* neighbour leaf).
* We also need the private clone because holding a read lock on an
* extent buffer of the subvolume's b+tree will make lockdep unhappy
- * when we call fiemap_fill_next_extent(), because that may cause a page
- * fault when filling the user space buffer with fiemap data.
+ * when we check if extents are shared, as backref walking may need to
+ * lock the same leaf we are processing.
*/
clone = btrfs_clone_extent_buffer(path->nodes[0]);
if (!clone)
@@ -2778,34 +2895,16 @@ static int fiemap_process_hole(struct btrfs_inode *inode,
* it beyond i_size.
*/
while (cur_offset < end && cur_offset < i_size) {
- struct extent_state *cached_state = NULL;
u64 delalloc_start;
u64 delalloc_end;
u64 prealloc_start;
- u64 lockstart;
- u64 lockend;
u64 prealloc_len = 0;
bool delalloc;
- lockstart = round_down(cur_offset, inode->root->fs_info->sectorsize);
- lockend = round_up(end, inode->root->fs_info->sectorsize);
-
- /*
- * We are only locking for the delalloc range because that's the
- * only thing that can change here. With fiemap we have a lock
- * on the inode, so no buffered or direct writes can happen.
- *
- * However mmaps and normal page writeback will cause this to
- * change arbitrarily. We have to lock the extent lock here to
- * make sure that nobody messes with the tree while we're doing
- * btrfs_find_delalloc_in_range.
- */
- lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end,
delalloc_cached_state,
&delalloc_start,
&delalloc_end);
- unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
if (!delalloc)
break;
@@ -2973,6 +3072,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
const u64 ino = btrfs_ino(inode);
+ struct extent_state *cached_state = NULL;
struct extent_state *delalloc_cached_state = NULL;
struct btrfs_path *path;
struct fiemap_cache cache = { 0 };
@@ -2985,26 +3085,33 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
bool stopped = false;
int ret;
+ cache.entries_size = PAGE_SIZE / sizeof(struct btrfs_fiemap_entry);
+ cache.entries = kmalloc_array(cache.entries_size,
+ sizeof(struct btrfs_fiemap_entry),
+ GFP_KERNEL);
backref_ctx = btrfs_alloc_backref_share_check_ctx();
path = btrfs_alloc_path();
- if (!backref_ctx || !path) {
+ if (!cache.entries || !backref_ctx || !path) {
ret = -ENOMEM;
goto out;
}
+restart:
range_start = round_down(start, sectorsize);
range_end = round_up(start + len, sectorsize);
prev_extent_end = range_start;
+ lock_extent(&inode->io_tree, range_start, range_end, &cached_state);
+
ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end);
if (ret < 0)
- goto out;
+ goto out_unlock;
btrfs_release_path(path);
path->reada = READA_FORWARD;
ret = fiemap_search_slot(inode, path, range_start);
if (ret < 0) {
- goto out;
+ goto out_unlock;
} else if (ret > 0) {
/*
* No file extent item found, but we may have delalloc between
@@ -3051,7 +3158,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
backref_ctx, 0, 0, 0,
prev_extent_end, hole_end);
if (ret < 0) {
- goto out;
+ goto out_unlock;
} else if (ret > 0) {
/* fiemap_fill_next_extent() told us to stop. */
stopped = true;
@@ -3107,7 +3214,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
extent_gen,
backref_ctx);
if (ret < 0)
- goto out;
+ goto out_unlock;
else if (ret > 0)
flags |= FIEMAP_EXTENT_SHARED;
}
@@ -3118,9 +3225,9 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
}
if (ret < 0) {
- goto out;
+ goto out_unlock;
} else if (ret > 0) {
- /* fiemap_fill_next_extent() told us to stop. */
+ /* emit_fiemap_extent() told us to stop. */
stopped = true;
break;
}
@@ -3129,12 +3236,12 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
next_item:
if (fatal_signal_pending(current)) {
ret = -EINTR;
- goto out;
+ goto out_unlock;
}
ret = fiemap_next_leaf_item(inode, path);
if (ret < 0) {
- goto out;
+ goto out_unlock;
} else if (ret > 0) {
/* No more file extent items for this inode. */
break;
@@ -3143,22 +3250,12 @@ next_item:
}
check_eof_delalloc:
- /*
- * Release (and free) the path before emitting any final entries to
- * fiemap_fill_next_extent() to keep lockdep happy. This is because
- * once we find no more file extent items exist, we may have a
- * non-cloned leaf, and fiemap_fill_next_extent() can trigger page
- * faults when copying data to the user space buffer.
- */
- btrfs_free_path(path);
- path = NULL;
-
if (!stopped && prev_extent_end < range_end) {
ret = fiemap_process_hole(inode, fieinfo, &cache,
&delalloc_cached_state, backref_ctx,
0, 0, 0, prev_extent_end, range_end - 1);
if (ret < 0)
- goto out;
+ goto out_unlock;
prev_extent_end = range_end;
}
@@ -3166,28 +3263,16 @@ check_eof_delalloc:
const u64 i_size = i_size_read(&inode->vfs_inode);
if (prev_extent_end < i_size) {
- struct extent_state *cached_state = NULL;
u64 delalloc_start;
u64 delalloc_end;
- u64 lockstart;
- u64 lockend;
bool delalloc;
- lockstart = round_down(prev_extent_end, sectorsize);
- lockend = round_up(i_size, sectorsize);
-
- /*
- * See the comment in fiemap_process_hole as to why
- * we're doing the locking here.
- */
- lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
delalloc = btrfs_find_delalloc_in_range(inode,
prev_extent_end,
i_size - 1,
&delalloc_cached_state,
&delalloc_start,
&delalloc_end);
- unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
if (!delalloc)
cache.flags |= FIEMAP_EXTENT_LAST;
} else {
@@ -3195,9 +3280,39 @@ check_eof_delalloc:
}
}
+out_unlock:
+ unlock_extent(&inode->io_tree, range_start, range_end, &cached_state);
+
+ if (ret == BTRFS_FIEMAP_FLUSH_CACHE) {
+ btrfs_release_path(path);
+ ret = flush_fiemap_cache(fieinfo, &cache);
+ if (ret)
+ goto out;
+ len -= cache.next_search_offset - start;
+ start = cache.next_search_offset;
+ goto restart;
+ } else if (ret < 0) {
+ goto out;
+ }
+
+ /*
+ * Must free the path before emitting to the fiemap buffer because we
+ * may have a non-cloned leaf and if the fiemap buffer is memory mapped
+ * to a file, a write into it (through btrfs_page_mkwrite()) may trigger
+ * waiting for an ordered extent that in order to complete needs to
+ * modify that leaf, therefore leading to a deadlock.
+ */
+ btrfs_free_path(path);
+ path = NULL;
+
+ ret = flush_fiemap_cache(fieinfo, &cache);
+ if (ret)
+ goto out;
+
ret = emit_last_fiemap_cache(fieinfo, &cache);
out:
free_extent_state(delalloc_cached_state);
+ kfree(cache.entries);
btrfs_free_backref_share_ctx(backref_ctx);
btrfs_free_path(path);
return ret;
@@ -3646,7 +3761,7 @@ retry:
/* For now, we should only have single-page folios for btree inode. */
ASSERT(folio_nr_pages(existing_folio) == 1);
- if (folio_size(existing_folio) != folio_size(eb->folios[0])) {
+ if (folio_size(existing_folio) != eb->folio_size) {
folio_unlock(existing_folio);
folio_put(existing_folio);
return -EAGAIN;
@@ -3789,6 +3904,8 @@ reallocate:
* and free the allocated page.
*/
folio = eb->folios[i];
+ eb->folio_size = folio_size(folio);
+ eb->folio_shift = folio_shift(folio);
spin_lock(&mapping->i_private_lock);
/* Should not fail, as we have preallocated the memory */
ret = attach_extent_buffer_folio(eb, folio, prealloc);
@@ -4238,7 +4355,7 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
for (int i = 0; i < num_folios; i++) {
struct folio *folio = eb->folios[i];
- ret = bio_add_folio(&bbio->bio, folio, folio_size(folio), 0);
+ ret = bio_add_folio(&bbio->bio, folio, eb->folio_size, 0);
ASSERT(ret);
}
}
@@ -4258,7 +4375,7 @@ static bool report_eb_range(const struct extent_buffer *eb, unsigned long start,
unsigned long len)
{
btrfs_warn(eb->fs_info,
- "access to eb bytenr %llu len %lu out of range start %lu len %lu",
+ "access to eb bytenr %llu len %u out of range start %lu len %lu",
eb->start, eb->len, start, len);
WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
@@ -4287,7 +4404,7 @@ static inline int check_eb_range(const struct extent_buffer *eb,
void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
unsigned long start, unsigned long len)
{
- const int unit_size = folio_size(eb->folios[0]);
+ const int unit_size = eb->folio_size;
size_t cur;
size_t offset;
char *dst = (char *)dstv;
@@ -4327,7 +4444,7 @@ int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
void __user *dstv,
unsigned long start, unsigned long len)
{
- const int unit_size = folio_size(eb->folios[0]);
+ const int unit_size = eb->folio_size;
size_t cur;
size_t offset;
char __user *dst = (char __user *)dstv;
@@ -4367,7 +4484,7 @@ int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
unsigned long start, unsigned long len)
{
- const int unit_size = folio_size(eb->folios[0]);
+ const int unit_size = eb->folio_size;
size_t cur;
size_t offset;
char *kaddr;
@@ -4438,7 +4555,7 @@ static void __write_extent_buffer(const struct extent_buffer *eb,
const void *srcv, unsigned long start,
unsigned long len, bool use_memmove)
{
- const int unit_size = folio_size(eb->folios[0]);
+ const int unit_size = eb->folio_size;
size_t cur;
size_t offset;
char *kaddr;
@@ -4487,7 +4604,7 @@ void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
static void memset_extent_buffer(const struct extent_buffer *eb, int c,
unsigned long start, unsigned long len)
{
- const int unit_size = folio_size(eb->folios[0]);
+ const int unit_size = eb->folio_size;
unsigned long cur = start;
if (eb->addr) {
@@ -4518,7 +4635,7 @@ void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start,
void copy_extent_buffer_full(const struct extent_buffer *dst,
const struct extent_buffer *src)
{
- const int unit_size = folio_size(src->folios[0]);
+ const int unit_size = src->folio_size;
unsigned long cur = 0;
ASSERT(dst->len == src->len);
@@ -4540,7 +4657,7 @@ void copy_extent_buffer(const struct extent_buffer *dst,
unsigned long dst_offset, unsigned long src_offset,
unsigned long len)
{
- const int unit_size = folio_size(dst->folios[0]);
+ const int unit_size = dst->folio_size;
u64 dst_len = dst->len;
size_t cur;
size_t offset;
@@ -4596,10 +4713,10 @@ static inline void eb_bitmap_offset(const struct extent_buffer *eb,
* the bitmap item in the extent buffer + the offset of the byte in the
* bitmap item.
*/
- offset = start + offset_in_folio(eb->folios[0], eb->start) + byte_offset;
+ offset = start + offset_in_eb_folio(eb, eb->start) + byte_offset;
- *folio_index = offset >> folio_shift(eb->folios[0]);
- *folio_offset = offset_in_folio(eb->folios[0], offset);
+ *folio_index = offset >> eb->folio_shift;
+ *folio_offset = offset_in_eb_folio(eb, offset);
}
/*
@@ -4713,7 +4830,7 @@ void memcpy_extent_buffer(const struct extent_buffer *dst,
unsigned long dst_offset, unsigned long src_offset,
unsigned long len)
{
- const int unit_size = folio_size(dst->folios[0]);
+ const int unit_size = dst->folio_size;
unsigned long cur_off = 0;
if (check_eb_range(dst, dst_offset, len) ||
@@ -4837,7 +4954,7 @@ out:
static int try_release_subpage_extent_buffer(struct page *page)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
+ struct btrfs_fs_info *fs_info = page_to_fs_info(page);
u64 cur = page_offset(page);
const u64 end = page_offset(page) + PAGE_SIZE;
int ret;
@@ -4910,7 +5027,7 @@ int try_release_extent_buffer(struct page *page)
struct folio *folio = page_folio(page);
struct extent_buffer *eb;
- if (btrfs_sb(page->mapping->host->i_sb)->nodesize < PAGE_SIZE)
+ if (page_to_fs_info(page)->nodesize < PAGE_SIZE)
return try_release_subpage_extent_buffer(page);
/*
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 46050500529b..e3530d427e1f 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -7,11 +7,32 @@
#include <linux/refcount.h>
#include <linux/fiemap.h>
#include <linux/btrfs_tree.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+#include <linux/rwsem.h>
+#include <linux/list.h>
+#include <linux/slab.h>
#include "compression.h"
+#include "messages.h"
#include "ulist.h"
#include "misc.h"
+struct page;
+struct file;
+struct folio;
+struct inode;
+struct fiemap_extent_info;
+struct readahead_control;
+struct address_space;
+struct writeback_control;
+struct extent_io_tree;
+struct extent_map_tree;
+struct btrfs_block_group;
+struct btrfs_fs_info;
+struct btrfs_inode;
+struct btrfs_root;
struct btrfs_trans_handle;
+struct btrfs_tree_parent_check;
enum {
EXTENT_BUFFER_UPTODATE,
@@ -63,11 +84,6 @@ enum {
#define BITMAP_LAST_BYTE_MASK(nbits) \
(BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
-struct btrfs_root;
-struct btrfs_inode;
-struct btrfs_fs_info;
-struct extent_io_tree;
-struct btrfs_tree_parent_check;
int __init extent_buffer_init_cachep(void);
void __cold extent_buffer_free_cachep(void);
@@ -75,7 +91,8 @@ void __cold extent_buffer_free_cachep(void);
#define INLINE_EXTENT_BUFFER_PAGES (BTRFS_MAX_METADATA_BLOCKSIZE / PAGE_SIZE)
struct extent_buffer {
u64 start;
- unsigned long len;
+ u32 len;
+ u32 folio_size;
unsigned long bflags;
struct btrfs_fs_info *fs_info;
@@ -90,6 +107,7 @@ struct extent_buffer {
int read_mirror;
/* >= 0 if eb belongs to a log tree, -1 otherwise */
s8 log_index;
+ u8 folio_shift;
struct rcu_head rcu_head;
struct rw_semaphore lock;
@@ -113,6 +131,13 @@ struct btrfs_eb_write_context {
struct btrfs_block_group *zoned_bg;
};
+static inline unsigned long offset_in_eb_folio(const struct extent_buffer *eb,
+ u64 start)
+{
+ ASSERT(eb->folio_size);
+ return start & (eb->folio_size - 1);
+}
+
/*
* Get the correct offset inside the page of extent buffer.
*
@@ -151,13 +176,13 @@ static inline unsigned long get_eb_folio_index(const struct extent_buffer *eb,
* the folio_shift would be large enough to always make us
* return 0 as index.
* 1.2) Several page sized folios
- * The folio_shift() would be PAGE_SHIFT, giving us the correct
+ * The folio_shift would be PAGE_SHIFT, giving us the correct
* index.
*
* 2) sectorsize < PAGE_SIZE and nodesize < PAGE_SIZE case
* The folio would only be page sized, and always give us 0 as index.
*/
- return offset >> folio_shift(eb->folios[0]);
+ return offset >> eb->folio_shift;
}
/*
@@ -205,8 +230,6 @@ static inline void extent_changeset_free(struct extent_changeset *changeset)
kfree(changeset);
}
-struct extent_map_tree;
-
int try_release_extent_mapping(struct page *page, gfp_t mask);
int try_release_extent_buffer(struct page *page);
@@ -221,6 +244,7 @@ int btree_write_cache_pages(struct address_space *mapping,
void extent_readahead(struct readahead_control *rac);
int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len);
+int set_folio_extent_mapped(struct folio *folio);
int set_page_extent_mapped(struct page *page);
void clear_page_extent_mapped(struct page *page);
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index b61099bf97a8..347ca13d15a9 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -5,7 +5,6 @@
#include <linux/spinlock.h>
#include "messages.h"
#include "ctree.h"
-#include "volumes.h"
#include "extent_map.h"
#include "compression.h"
#include "btrfs_inode.h"
@@ -16,8 +15,7 @@ static struct kmem_cache *extent_map_cache;
int __init extent_map_init(void)
{
extent_map_cache = kmem_cache_create("btrfs_extent_map",
- sizeof(struct extent_map), 0,
- SLAB_MEM_SPREAD, NULL);
+ sizeof(struct extent_map), 0, 0, NULL);
if (!extent_map_cache)
return -ENOMEM;
return 0;
@@ -291,6 +289,10 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
* Called after an extent has been written to disk properly. Set the generation
* to the generation that actually added the file item to the inode so we know
* we need to sync this extent when we call fsync().
+ *
+ * Returns: 0 on success
+ * -ENOENT when the extent is not found in the tree
+ * -EUCLEAN if the found extent does not match the expected start
*/
int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen)
{
@@ -308,14 +310,18 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen)
"no extent map found for inode %llu (root %lld) when unpinning extent range [%llu, %llu), generation %llu",
btrfs_ino(inode), btrfs_root_id(inode->root),
start, len, gen);
+ ret = -ENOENT;
goto out;
}
- if (WARN_ON(em->start != start))
+ if (WARN_ON(em->start != start)) {
btrfs_warn(fs_info,
"found extent map for inode %llu (root %lld) with unexpected start offset %llu when unpinning extent range [%llu, %llu), generation %llu",
btrfs_ino(inode), btrfs_root_id(inode->root),
em->start, start, len, gen);
+ ret = -EUCLEAN;
+ goto out;
+ }
em->generation = gen;
em->flags &= ~EXTENT_FLAG_PINNED;
@@ -531,7 +537,8 @@ static noinline int merge_extent_mapping(struct extent_map_tree *em_tree,
u64 end;
u64 start_diff;
- BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
+ if (map_start < em->start || map_start >= extent_map_end(em))
+ return -EINVAL;
if (existing->start > map_start) {
next = existing;
@@ -626,9 +633,9 @@ int btrfs_add_extent_mapping(struct btrfs_fs_info *fs_info,
free_extent_map(em);
*em_in = NULL;
WARN_ONCE(ret,
-"unexpected error %d: merge existing(start %llu len %llu) with em(start %llu len %llu)\n",
- ret, existing->start, existing->len,
- orig_start, orig_len);
+"extent map merge error existing [%llu, %llu) with em [%llu, %llu) start %llu\n",
+ existing->start, existing->len,
+ orig_start, orig_len, start);
}
free_extent_map(existing);
}
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index e380fc08bbe4..c5a098c99cc6 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -3,10 +3,18 @@
#ifndef BTRFS_EXTENT_MAP_H
#define BTRFS_EXTENT_MAP_H
+#include <linux/compiler_types.h>
+#include <linux/rwlock_types.h>
#include <linux/rbtree.h>
+#include <linux/list.h>
#include <linux/refcount.h>
+#include "misc.h"
+#include "extent_map.h"
#include "compression.h"
+struct btrfs_inode;
+struct btrfs_fs_info;
+
#define EXTENT_MAP_LAST_BYTE ((u64)-4)
#define EXTENT_MAP_HOLE ((u64)-3)
#define EXTENT_MAP_INLINE ((u64)-2)
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 81ac1d474bf1..e58fb5347e65 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -10,17 +10,14 @@
#include <linux/sched/mm.h>
#include <crypto/hash.h>
#include "messages.h"
-#include "misc.h"
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "bio.h"
-#include "print-tree.h"
#include "compression.h"
#include "fs.h"
#include "accessors.h"
#include "file-item.h"
-#include "super.h"
#define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \
sizeof(struct btrfs_item) * 2) / \
@@ -179,7 +176,6 @@ int btrfs_insert_hole_extent(struct btrfs_trans_handle *trans,
sizeof(*item));
if (ret < 0)
goto out;
- BUG_ON(ret); /* Can't happen */
leaf = path->nodes[0];
item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
@@ -1229,8 +1225,6 @@ insert:
ins_size);
if (ret < 0)
goto out;
- if (WARN_ON(ret != 0))
- goto out;
leaf = path->nodes[0];
csum:
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
diff --git a/fs/btrfs/file-item.h b/fs/btrfs/file-item.h
index 04bd2d34efb1..15c05cc0fce6 100644
--- a/fs/btrfs/file-item.h
+++ b/fs/btrfs/file-item.h
@@ -3,8 +3,21 @@
#ifndef BTRFS_FILE_ITEM_H
#define BTRFS_FILE_ITEM_H
+#include <linux/list.h>
+#include <uapi/linux/btrfs_tree.h>
#include "accessors.h"
+struct extent_map;
+struct btrfs_file_extent_item;
+struct btrfs_fs_info;
+struct btrfs_path;
+struct btrfs_bio;
+struct btrfs_trans_handle;
+struct btrfs_root;
+struct btrfs_ordered_sum;
+struct btrfs_path;
+struct btrfs_inode;
+
#define BTRFS_FILE_EXTENT_INLINE_DATA_START \
(offsetof(struct btrfs_file_extent_item, disk_bytenr))
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 38dfcac47609..f9d76072398d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -22,10 +22,8 @@
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
-#include "print-tree.h"
#include "tree-log.h"
#include "locking.h"
-#include "volumes.h"
#include "qgroup.h"
#include "compression.h"
#include "delalloc-space.h"
@@ -1137,7 +1135,7 @@ static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from,
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
loff_t pos = iocb->ki_pos;
int ret;
loff_t oldsize;
@@ -1185,7 +1183,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
struct file *file = iocb->ki_filp;
loff_t pos;
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct page **pages = NULL;
struct extent_changeset *data_reserved = NULL;
u64 release_bytes = 0;
@@ -1461,7 +1459,7 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
loff_t pos;
ssize_t written = 0;
ssize_t written_buffered;
@@ -1787,7 +1785,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
struct dentry *dentry = file_dentry(file);
struct inode *inode = d_inode(dentry);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
struct btrfs_log_ctx ctx;
@@ -1912,6 +1910,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
goto out_release_extents;
}
+ btrfs_init_log_ctx_scratch_eb(&ctx);
+
/*
* We use start here because we will need to wait on the IO to complete
* in btrfs_sync_log, which could require joining a transaction (for
@@ -1931,6 +1931,15 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
trans->in_fsync = true;
ret = btrfs_log_dentry_safe(trans, dentry, &ctx);
+ /*
+ * Scratch eb no longer needed, release before syncing log or commit
+ * transaction, to avoid holding unnecessary memory during such long
+ * operations.
+ */
+ if (ctx.scratch_eb) {
+ free_extent_buffer(ctx.scratch_eb);
+ ctx.scratch_eb = NULL;
+ }
btrfs_release_log_ctx_extents(&ctx);
if (ret < 0) {
/* Fallthrough and commit/free transaction. */
@@ -2006,6 +2015,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
ret = btrfs_commit_transaction(trans);
out:
+ free_extent_buffer(ctx.scratch_eb);
ASSERT(list_empty(&ctx.list));
ASSERT(list_empty(&ctx.conflict_inodes));
err = file_check_and_advance_wb_err(file);
@@ -2176,7 +2186,7 @@ static int find_first_non_hole(struct btrfs_inode *inode, u64 *start, u64 *len)
struct extent_map *em;
int ret = 0;
- em = btrfs_get_extent(inode, NULL, 0,
+ em = btrfs_get_extent(inode, NULL,
round_down(*start, fs_info->sectorsize),
round_up(*len, fs_info->sectorsize));
if (IS_ERR(em))
@@ -2593,7 +2603,7 @@ out:
static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len)
{
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct extent_state *cached_state = NULL;
struct btrfs_path *path;
@@ -2835,7 +2845,7 @@ static int btrfs_zero_range_check_range_boundary(struct btrfs_inode *inode,
int ret;
offset = round_down(offset, sectorsize);
- em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(inode, NULL, offset, sectorsize);
if (IS_ERR(em))
return PTR_ERR(em);
@@ -2866,7 +2876,7 @@ static int btrfs_zero_range(struct inode *inode,
u64 bytes_to_reserve = 0;
bool space_reserved = false;
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, alloc_start,
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, alloc_start,
alloc_end - alloc_start);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
@@ -2909,8 +2919,7 @@ static int btrfs_zero_range(struct inode *inode,
if (BTRFS_BYTES_TO_BLKS(fs_info, offset) ==
BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1)) {
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, alloc_start,
- sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, alloc_start, sectorsize);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto out;
@@ -3005,7 +3014,7 @@ reserve_space:
}
ret = btrfs_prealloc_file_range(inode, mode, alloc_start,
alloc_end - alloc_start,
- i_blocksize(inode),
+ fs_info->sectorsize,
offset + len, &alloc_hint);
unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state);
@@ -3049,7 +3058,7 @@ static long btrfs_fallocate(struct file *file, int mode,
int ret;
/* Do not allow fallocate in ZONED mode */
- if (btrfs_is_zoned(btrfs_sb(inode->i_sb)))
+ if (btrfs_is_zoned(inode_to_fs_info(inode)))
return -EOPNOTSUPP;
alloc_start = round_down(offset, blocksize);
@@ -3126,7 +3135,7 @@ static long btrfs_fallocate(struct file *file, int mode,
/* First, check if we exceed the qgroup limit */
while (cur_offset < alloc_end) {
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, cur_offset,
alloc_end - cur_offset);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
@@ -3177,7 +3186,7 @@ static long btrfs_fallocate(struct file *file, int mode,
if (!ret) {
ret = btrfs_prealloc_file_range(inode, mode,
range->start,
- range->len, i_blocksize(inode),
+ range->len, blocksize,
offset + len, &alloc_hint);
/*
* btrfs_prealloc_file_range() releases space even
@@ -3754,7 +3763,7 @@ static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to)
if (fsverity_active(inode))
return 0;
- if (check_direct_read(btrfs_sb(inode->i_sb), to, iocb->ki_pos))
+ if (check_direct_read(inode_to_fs_info(inode), to, iocb->ki_pos))
return 0;
btrfs_inode_lock(BTRFS_I(inode), BTRFS_ILOCK_SHARED);
diff --git a/fs/btrfs/file.h b/fs/btrfs/file.h
index 82b34fbb295f..77aaca208c7b 100644
--- a/fs/btrfs/file.h
+++ b/fs/btrfs/file.h
@@ -3,6 +3,21 @@
#ifndef BTRFS_FILE_H
#define BTRFS_FILE_H
+#include <linux/types.h>
+
+struct file;
+struct extent_state;
+struct kiocb;
+struct iov_iter;
+struct page;
+struct btrfs_ioctl_encoded_io_args;
+struct btrfs_drop_extents_args;
+struct btrfs_inode;
+struct btrfs_root;
+struct btrfs_path;
+struct btrfs_replace_extent_info;
+struct btrfs_trans_handle;
+
extern const struct file_operations btrfs_file_operations;
int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index d372c7ce0e6b..c8a05d5eb9cb 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -19,9 +19,7 @@
#include "transaction.h"
#include "disk-io.h"
#include "extent_io.h"
-#include "volumes.h"
#include "space-info.h"
-#include "delalloc-space.h"
#include "block-group.h"
#include "discard.h"
#include "subpage.h"
@@ -399,7 +397,7 @@ static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
return -ENOMEM;
io_ctl->num_pages = num_pages;
- io_ctl->fs_info = btrfs_sb(inode->i_sb);
+ io_ctl->fs_info = inode_to_fs_info(inode);
io_ctl->inode = inode;
return 0;
@@ -2621,7 +2619,7 @@ static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl,
}
}
-int __btrfs_add_free_space(struct btrfs_block_group *block_group,
+static int __btrfs_add_free_space(struct btrfs_block_group *block_group,
u64 offset, u64 bytes,
enum btrfs_trim_state trim_state)
{
@@ -4156,15 +4154,13 @@ out:
int __init btrfs_free_space_init(void)
{
- btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space",
- sizeof(struct btrfs_free_space), 0,
- SLAB_MEM_SPREAD, NULL);
+ btrfs_free_space_cachep = KMEM_CACHE(btrfs_free_space, 0);
if (!btrfs_free_space_cachep)
return -ENOMEM;
btrfs_free_space_bitmap_cachep = kmem_cache_create("btrfs_free_space_bitmap",
PAGE_SIZE, PAGE_SIZE,
- SLAB_MEM_SPREAD, NULL);
+ 0, NULL);
if (!btrfs_free_space_bitmap_cachep) {
kmem_cache_destroy(btrfs_free_space_cachep);
return -ENOMEM;
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 33b4da3271b1..83774bfd7b3b 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -6,6 +6,19 @@
#ifndef BTRFS_FREE_SPACE_CACHE_H
#define BTRFS_FREE_SPACE_CACHE_H
+#include <linux/rbtree.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include "fs.h"
+
+struct inode;
+struct page;
+struct btrfs_fs_info;
+struct btrfs_path;
+struct btrfs_trans_handle;
+struct btrfs_trim_block_group;
+
/*
* This is the trim state of an extent or bitmap.
*
@@ -114,8 +127,6 @@ int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group,
struct btrfs_free_space_ctl *ctl);
-int __btrfs_add_free_space(struct btrfs_block_group *block_group, u64 bytenr,
- u64 size, enum btrfs_trim_state trim_state);
int btrfs_add_free_space(struct btrfs_block_group *block_group,
u64 bytenr, u64 size);
int btrfs_add_free_space_unused(struct btrfs_block_group *block_group,
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 7b598b070700..90f2938bd743 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1176,12 +1176,16 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
BTRFS_FREE_SPACE_TREE_OBJECTID);
if (IS_ERR(free_space_root)) {
ret = PTR_ERR(free_space_root);
- goto abort;
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ goto out_clear;
}
ret = btrfs_global_root_insert(free_space_root);
if (ret) {
btrfs_put_root(free_space_root);
- goto abort;
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ goto out_clear;
}
node = rb_first_cached(&fs_info->block_group_cache_tree);
@@ -1189,8 +1193,11 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
block_group = rb_entry(node, struct btrfs_block_group,
cache_node);
ret = populate_free_space_tree(trans, block_group);
- if (ret)
- goto abort;
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ goto out_clear;
+ }
node = rb_next(node);
}
@@ -1206,11 +1213,9 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
return ret;
-abort:
+out_clear:
clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
- btrfs_abort_transaction(trans, ret);
- btrfs_end_transaction(trans);
return ret;
}
@@ -1273,12 +1278,18 @@ int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
ret = clear_free_space_tree(trans, free_space_root);
- if (ret)
- goto abort;
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ return ret;
+ }
ret = btrfs_del_root(trans, &free_space_root->root_key);
- if (ret)
- goto abort;
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ return ret;
+ }
btrfs_global_root_delete(free_space_root);
@@ -1295,11 +1306,6 @@ int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
btrfs_put_root(free_space_root);
return btrfs_commit_transaction(trans);
-
-abort:
- btrfs_abort_transaction(trans, ret);
- btrfs_end_transaction(trans);
- return ret;
}
int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
@@ -1322,8 +1328,11 @@ int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
ret = clear_free_space_tree(trans, free_space_root);
- if (ret)
- goto abort;
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ return ret;
+ }
node = rb_first_cached(&fs_info->block_group_cache_tree);
while (node) {
@@ -1332,8 +1341,11 @@ int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
block_group = rb_entry(node, struct btrfs_block_group,
cache_node);
ret = populate_free_space_tree(trans, block_group);
- if (ret)
- goto abort;
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ return ret;
+ }
node = rb_next(node);
}
@@ -1344,10 +1356,6 @@ int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
ret = btrfs_commit_transaction(trans);
clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
return ret;
-abort:
- btrfs_abort_transaction(trans, ret);
- btrfs_end_transaction(trans);
- return ret;
}
static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/free-space-tree.h b/fs/btrfs/free-space-tree.h
index 6d5551d0ced8..e6c6d6f4f221 100644
--- a/fs/btrfs/free-space-tree.h
+++ b/fs/btrfs/free-space-tree.h
@@ -6,7 +6,13 @@
#ifndef BTRFS_FREE_SPACE_TREE_H
#define BTRFS_FREE_SPACE_TREE_H
+#include <linux/bits.h>
+
struct btrfs_caching_control;
+struct btrfs_fs_info;
+struct btrfs_path;
+struct btrfs_block_group;
+struct btrfs_trans_handle;
/*
* The default size for new free space bitmap items. The last bitmap in a block
diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
index f8bb73d6ab68..93f5c57ea4e3 100644
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -4,13 +4,50 @@
#define BTRFS_FS_H
#include <linux/blkdev.h>
-#include <linux/fs.h>
-#include <linux/btrfs_tree.h>
#include <linux/sizes.h>
+#include <linux/time64.h>
+#include <linux/compiler.h>
+#include <linux/math.h>
+#include <linux/atomic.h>
+#include <linux/blkdev.h>
+#include <linux/percpu_counter.h>
+#include <linux/completion.h>
+#include <linux/lockdep.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/rwlock_types.h>
+#include <linux/rwsem.h>
+#include <linux/semaphore.h>
+#include <linux/list.h>
+#include <linux/radix-tree.h>
+#include <linux/workqueue.h>
+#include <linux/wait.h>
+#include <linux/wait_bit.h>
+#include <linux/sched.h>
+#include <linux/rbtree.h>
+#include <uapi/linux/btrfs.h>
+#include <uapi/linux/btrfs_tree.h>
#include "extent-io-tree.h"
-#include "extent_map.h"
#include "async-thread.h"
#include "block-rsv.h"
+#include "fs.h"
+
+struct inode;
+struct super_block;
+struct kobject;
+struct reloc_control;
+struct crypto_shash;
+struct ulist;
+struct btrfs_device;
+struct btrfs_block_group;
+struct btrfs_root;
+struct btrfs_fs_devices;
+struct btrfs_transaction;
+struct btrfs_delayed_root;
+struct btrfs_balance_control;
+struct btrfs_subpage_info;
+struct btrfs_stripe_hash_table;
+struct btrfs_space_info;
#define BTRFS_MAX_EXTENT_SIZE SZ_128M
@@ -732,10 +769,13 @@ struct btrfs_fs_info {
/* Reclaim partially filled block groups in the background */
struct work_struct reclaim_bgs_work;
+ /* Protected by unused_bgs_lock. */
struct list_head reclaim_bgs;
int bg_reclaim_threshold;
+ /* Protects the lists unused_bgs and reclaim_bgs. */
spinlock_t unused_bgs_lock;
+ /* Protected by unused_bgs_lock. */
struct list_head unused_bgs;
struct mutex unused_bg_unpin_mutex;
/* Protect block groups that are going to be deleted */
@@ -829,6 +869,17 @@ struct btrfs_fs_info {
#endif
};
+#define page_to_inode(_page) (BTRFS_I(_Generic((_page), \
+ struct page *: (_page))->mapping->host))
+#define folio_to_inode(_folio) (BTRFS_I(_Generic((_folio), \
+ struct folio *: (_folio))->mapping->host))
+
+#define page_to_fs_info(_page) (page_to_inode(_page)->root->fs_info)
+#define folio_to_fs_info(_folio) (folio_to_inode(_folio)->root->fs_info)
+
+#define inode_to_fs_info(_inode) (BTRFS_I(_Generic((_inode), \
+ struct inode *: (_inode)))->root->fs_info)
+
static inline u64 btrfs_get_fs_generation(const struct btrfs_fs_info *fs_info)
{
return READ_ONCE(fs_info->generation);
@@ -922,6 +973,8 @@ void btrfs_exclop_finish(struct btrfs_fs_info *fs_info);
void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
enum btrfs_exclusive_operation op);
+int btrfs_check_ioctl_vol_args_path(const struct btrfs_ioctl_vol_args *vol_args);
+
/* Compatibility and incompatibility defines */
void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag,
const char *name);
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index 7d734830e514..9c1394c0a6d7 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -9,7 +9,6 @@
#include "inode-item.h"
#include "disk-io.h"
#include "transaction.h"
-#include "print-tree.h"
#include "space-info.h"
#include "accessors.h"
#include "extent-tree.h"
diff --git a/fs/btrfs/inode-item.h b/fs/btrfs/inode-item.h
index 4337bb26f419..c4aded82709b 100644
--- a/fs/btrfs/inode-item.h
+++ b/fs/btrfs/inode-item.h
@@ -6,14 +6,15 @@
#include <linux/types.h>
#include <linux/crc32c.h>
+struct fscrypt_str;
+struct extent_buffer;
struct btrfs_trans_handle;
struct btrfs_root;
struct btrfs_path;
struct btrfs_key;
struct btrfs_inode_extref;
struct btrfs_inode;
-struct extent_buffer;
-struct fscrypt_str;
+struct btrfs_truncate_control;
/*
* Return this if we need to call truncate_block for the last bit of the
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4795738d5785..37701531eeb1 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -39,14 +39,12 @@
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
-#include "print-tree.h"
#include "ordered-data.h"
#include "xattr.h"
#include "tree-log.h"
#include "bio.h"
#include "compression.h"
#include "locking.h"
-#include "free-space-cache.h"
#include "props.h"
#include "qgroup.h"
#include "delalloc-space.h"
@@ -740,7 +738,8 @@ static noinline int add_async_extent(struct async_chunk *cow,
struct async_extent *async_extent;
async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS);
- BUG_ON(!async_extent); /* -ENOMEM */
+ if (!async_extent)
+ return -ENOMEM;
async_extent->start = start;
async_extent->ram_size = ram_size;
async_extent->compressed_size = compressed_size;
@@ -1027,8 +1026,9 @@ again:
* The async work queues will take care of doing actual allocation on
* disk for these compressed pages, and will submit the bios.
*/
- add_async_extent(async_chunk, start, total_in, total_compressed, pages,
- nr_pages, compress_type);
+ ret = add_async_extent(async_chunk, start, total_in, total_compressed, pages,
+ nr_pages, compress_type);
+ BUG_ON(ret);
if (start + total_in < end) {
start += total_in;
cond_resched();
@@ -1040,8 +1040,9 @@ mark_incompressible:
if (!btrfs_test_opt(fs_info, FORCE_COMPRESS) && !inode->prop_compress)
inode->flags |= BTRFS_INODE_NOCOMPRESS;
cleanup_and_bail_uncompressed:
- add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0,
- BTRFS_COMPRESS_NONE);
+ ret = add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0,
+ BTRFS_COMPRESS_NONE);
+ BUG_ON(ret);
free_pages:
if (pages) {
for (i = 0; i < nr_pages; i++) {
@@ -2302,6 +2303,8 @@ void btrfs_split_delalloc_extent(struct btrfs_inode *inode,
struct btrfs_fs_info *fs_info = inode->root->fs_info;
u64 size;
+ lockdep_assert_held(&inode->io_tree.lock);
+
/* not delalloc, ignore it */
if (!(orig->state & EXTENT_DELALLOC))
return;
@@ -2340,6 +2343,8 @@ void btrfs_merge_delalloc_extent(struct btrfs_inode *inode, struct extent_state
u64 new_size, old_size;
u32 num_extents;
+ lockdep_assert_held(&inode->io_tree.lock);
+
/* not delalloc, ignore it */
if (!(other->state & EXTENT_DELALLOC))
return;
@@ -2387,55 +2392,50 @@ void btrfs_merge_delalloc_extent(struct btrfs_inode *inode, struct extent_state
spin_unlock(&inode->lock);
}
-static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
- struct btrfs_inode *inode)
+static void btrfs_add_delalloc_inode(struct btrfs_inode *inode)
{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct btrfs_root *root = inode->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
spin_lock(&root->delalloc_lock);
- if (list_empty(&inode->delalloc_inodes)) {
- list_add_tail(&inode->delalloc_inodes, &root->delalloc_inodes);
- set_bit(BTRFS_INODE_IN_DELALLOC_LIST, &inode->runtime_flags);
- root->nr_delalloc_inodes++;
- if (root->nr_delalloc_inodes == 1) {
- spin_lock(&fs_info->delalloc_root_lock);
- BUG_ON(!list_empty(&root->delalloc_root));
- list_add_tail(&root->delalloc_root,
- &fs_info->delalloc_roots);
- spin_unlock(&fs_info->delalloc_root_lock);
- }
+ ASSERT(list_empty(&inode->delalloc_inodes));
+ list_add_tail(&inode->delalloc_inodes, &root->delalloc_inodes);
+ root->nr_delalloc_inodes++;
+ if (root->nr_delalloc_inodes == 1) {
+ spin_lock(&fs_info->delalloc_root_lock);
+ ASSERT(list_empty(&root->delalloc_root));
+ list_add_tail(&root->delalloc_root, &fs_info->delalloc_roots);
+ spin_unlock(&fs_info->delalloc_root_lock);
}
spin_unlock(&root->delalloc_lock);
}
-void __btrfs_del_delalloc_inode(struct btrfs_root *root,
- struct btrfs_inode *inode)
+void btrfs_del_delalloc_inode(struct btrfs_inode *inode)
{
+ struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
+ lockdep_assert_held(&root->delalloc_lock);
+
+ /*
+ * We may be called after the inode was already deleted from the list,
+ * namely in the transaction abort path btrfs_destroy_delalloc_inodes(),
+ * and then later through btrfs_clear_delalloc_extent() while the inode
+ * still has ->delalloc_bytes > 0.
+ */
if (!list_empty(&inode->delalloc_inodes)) {
list_del_init(&inode->delalloc_inodes);
- clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
- &inode->runtime_flags);
root->nr_delalloc_inodes--;
if (!root->nr_delalloc_inodes) {
ASSERT(list_empty(&root->delalloc_inodes));
spin_lock(&fs_info->delalloc_root_lock);
- BUG_ON(list_empty(&root->delalloc_root));
+ ASSERT(!list_empty(&root->delalloc_root));
list_del_init(&root->delalloc_root);
spin_unlock(&fs_info->delalloc_root_lock);
}
}
}
-static void btrfs_del_delalloc_inode(struct btrfs_root *root,
- struct btrfs_inode *inode)
-{
- spin_lock(&root->delalloc_lock);
- __btrfs_del_delalloc_inode(root, inode);
- spin_unlock(&root->delalloc_lock);
-}
-
/*
* Properly track delayed allocation bytes in the inode and to maintain the
* list of inodes that have pending delalloc work to be done.
@@ -2445,6 +2445,8 @@ void btrfs_set_delalloc_extent(struct btrfs_inode *inode, struct extent_state *s
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ lockdep_assert_held(&inode->io_tree.lock);
+
if ((bits & EXTENT_DEFRAG) && !(bits & EXTENT_DELALLOC))
WARN_ON(1);
/*
@@ -2453,10 +2455,9 @@ void btrfs_set_delalloc_extent(struct btrfs_inode *inode, struct extent_state *s
* bit, which is only set or cleared with irqs on
*/
if (!(state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
- struct btrfs_root *root = inode->root;
u64 len = state->end + 1 - state->start;
+ u64 prev_delalloc_bytes;
u32 num_extents = count_max_extents(fs_info, len);
- bool do_list = !btrfs_is_free_space_inode(inode);
spin_lock(&inode->lock);
btrfs_mod_outstanding_extents(inode, num_extents);
@@ -2469,13 +2470,20 @@ void btrfs_set_delalloc_extent(struct btrfs_inode *inode, struct extent_state *s
percpu_counter_add_batch(&fs_info->delalloc_bytes, len,
fs_info->delalloc_batch);
spin_lock(&inode->lock);
+ prev_delalloc_bytes = inode->delalloc_bytes;
inode->delalloc_bytes += len;
if (bits & EXTENT_DEFRAG)
inode->defrag_bytes += len;
- if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
- &inode->runtime_flags))
- btrfs_add_delalloc_inodes(root, inode);
spin_unlock(&inode->lock);
+
+ /*
+ * We don't need to be under the protection of the inode's lock,
+ * because we are called while holding the inode's io_tree lock
+ * and are therefore protected against concurrent calls of this
+ * function and btrfs_clear_delalloc_extent().
+ */
+ if (!btrfs_is_free_space_inode(inode) && prev_delalloc_bytes == 0)
+ btrfs_add_delalloc_inode(inode);
}
if (!(state->state & EXTENT_DELALLOC_NEW) &&
@@ -2497,6 +2505,8 @@ void btrfs_clear_delalloc_extent(struct btrfs_inode *inode,
u64 len = state->end + 1 - state->start;
u32 num_extents = count_max_extents(fs_info, len);
+ lockdep_assert_held(&inode->io_tree.lock);
+
if ((state->state & EXTENT_DEFRAG) && (bits & EXTENT_DEFRAG)) {
spin_lock(&inode->lock);
inode->defrag_bytes -= len;
@@ -2510,7 +2520,7 @@ void btrfs_clear_delalloc_extent(struct btrfs_inode *inode,
*/
if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = inode->root;
- bool do_list = !btrfs_is_free_space_inode(inode);
+ u64 new_delalloc_bytes;
spin_lock(&inode->lock);
btrfs_mod_outstanding_extents(inode, -num_extents);
@@ -2530,7 +2540,8 @@ void btrfs_clear_delalloc_extent(struct btrfs_inode *inode,
return;
if (!btrfs_is_data_reloc_root(root) &&
- do_list && !(state->state & EXTENT_NORESERVE) &&
+ !btrfs_is_free_space_inode(inode) &&
+ !(state->state & EXTENT_NORESERVE) &&
(bits & EXTENT_CLEAR_DATA_RESV))
btrfs_free_reserved_data_space_noquota(fs_info, len);
@@ -2538,11 +2549,20 @@ void btrfs_clear_delalloc_extent(struct btrfs_inode *inode,
fs_info->delalloc_batch);
spin_lock(&inode->lock);
inode->delalloc_bytes -= len;
- if (do_list && inode->delalloc_bytes == 0 &&
- test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
- &inode->runtime_flags))
- btrfs_del_delalloc_inode(root, inode);
+ new_delalloc_bytes = inode->delalloc_bytes;
spin_unlock(&inode->lock);
+
+ /*
+ * We don't need to be under the protection of the inode's lock,
+ * because we are called while holding the inode's io_tree lock
+ * and are therefore protected against concurrent calls of this
+ * function and btrfs_set_delalloc_extent().
+ */
+ if (!btrfs_is_free_space_inode(inode) && new_delalloc_bytes == 0) {
+ spin_lock(&root->delalloc_lock);
+ btrfs_del_delalloc_inode(inode);
+ spin_unlock(&root->delalloc_lock);
+ }
}
if ((state->state & EXTENT_DELALLOC_NEW) &&
@@ -2632,7 +2652,7 @@ static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
u64 em_len;
int ret = 0;
- em = btrfs_get_extent(inode, NULL, 0, search_start, search_len);
+ em = btrfs_get_extent(inode, NULL, search_start, search_len);
if (IS_ERR(em))
return PTR_ERR(em);
@@ -2829,7 +2849,7 @@ out_page:
int btrfs_writepage_cow_fixup(struct page *page)
{
struct inode *inode = page->mapping->host;
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_writepage_fixup *fixup;
/* This page has ordered extent covering it already */
@@ -3127,8 +3147,13 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
ordered_extent->disk_num_bytes);
}
}
- unpin_extent_cache(inode, ordered_extent->file_offset,
- ordered_extent->num_bytes, trans->transid);
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
+
+ ret = unpin_extent_cache(inode, ordered_extent->file_offset,
+ ordered_extent->num_bytes, trans->transid);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -3254,7 +3279,7 @@ out:
int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered)
{
- if (btrfs_is_zoned(btrfs_sb(ordered->inode->i_sb)) &&
+ if (btrfs_is_zoned(inode_to_fs_info(ordered->inode)) &&
!test_bit(BTRFS_ORDERED_IOERR, &ordered->flags) &&
list_empty(&ordered->bioc_list))
btrfs_finish_ordered_zoned(ordered);
@@ -3739,7 +3764,7 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
static int btrfs_read_locked_inode(struct inode *inode,
struct btrfs_path *in_path)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_path *path = in_path;
struct extent_buffer *leaf;
struct btrfs_inode_item *inode_item;
@@ -4383,7 +4408,14 @@ static noinline int may_destroy_subvol(struct btrfs_root *root)
ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
if (ret < 0)
goto out;
- BUG_ON(ret == 0);
+ if (ret == 0) {
+ /*
+ * Key with offset -1 found, there would have to exist a root
+ * with such id, but this is out of valid range.
+ */
+ ret = -EUCLEAN;
+ goto out;
+ }
ret = 0;
if (path->slots[0] > 0) {
@@ -4464,8 +4496,8 @@ again:
int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
struct btrfs_root *root = dir->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
struct inode *inode = d_inode(dentry);
struct btrfs_root *dest = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
@@ -4695,7 +4727,7 @@ int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
u32 blocksize = fs_info->sectorsize;
pgoff_t index = from >> PAGE_SHIFT;
unsigned offset = from & (blocksize - 1);
- struct page *page;
+ struct folio *folio;
gfp_t mask = btrfs_alloc_write_mask(mapping);
size_t write_bytes = blocksize;
int ret = 0;
@@ -4727,8 +4759,9 @@ int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
goto out;
}
again:
- page = find_or_create_page(mapping, index, mask);
- if (!page) {
+ folio = __filemap_get_folio(mapping, index,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT, mask);
+ if (IS_ERR(folio)) {
btrfs_delalloc_release_space(inode, data_reserved, block_start,
blocksize, true);
btrfs_delalloc_release_extents(inode, blocksize);
@@ -4736,15 +4769,15 @@ again:
goto out;
}
- if (!PageUptodate(page)) {
- ret = btrfs_read_folio(NULL, page_folio(page));
- lock_page(page);
- if (page->mapping != mapping) {
- unlock_page(page);
- put_page(page);
+ if (!folio_test_uptodate(folio)) {
+ ret = btrfs_read_folio(NULL, folio);
+ folio_lock(folio);
+ if (folio->mapping != mapping) {
+ folio_unlock(folio);
+ folio_put(folio);
goto again;
}
- if (!PageUptodate(page)) {
+ if (!folio_test_uptodate(folio)) {
ret = -EIO;
goto out_unlock;
}
@@ -4756,19 +4789,19 @@ again:
* folio private, but left the page in the mapping. Set the page mapped
* here to make sure it's properly set for the subpage stuff.
*/
- ret = set_page_extent_mapped(page);
+ ret = set_folio_extent_mapped(folio);
if (ret < 0)
goto out_unlock;
- wait_on_page_writeback(page);
+ folio_wait_writeback(folio);
lock_extent(io_tree, block_start, block_end, &cached_state);
ordered = btrfs_lookup_ordered_extent(inode, block_start);
if (ordered) {
unlock_extent(io_tree, block_start, block_end, &cached_state);
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
btrfs_start_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
goto again;
@@ -4789,15 +4822,16 @@ again:
if (!len)
len = blocksize - offset;
if (front)
- memzero_page(page, (block_start - page_offset(page)),
- offset);
+ folio_zero_range(folio, block_start - folio_pos(folio),
+ offset);
else
- memzero_page(page, (block_start - page_offset(page)) + offset,
- len);
+ folio_zero_range(folio,
+ (block_start - folio_pos(folio)) + offset,
+ len);
}
- btrfs_folio_clear_checked(fs_info, page_folio(page), block_start,
+ btrfs_folio_clear_checked(fs_info, folio, block_start,
block_end + 1 - block_start);
- btrfs_folio_set_dirty(fs_info, page_folio(page), block_start,
+ btrfs_folio_set_dirty(fs_info, folio, block_start,
block_end + 1 - block_start);
unlock_extent(io_tree, block_start, block_end, &cached_state);
@@ -4814,8 +4848,8 @@ out_unlock:
block_start, blocksize, true);
}
btrfs_delalloc_release_extents(inode, blocksize);
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
out:
if (only_release_metadata)
btrfs_check_nocow_unlock(inode);
@@ -4907,8 +4941,7 @@ int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size)
&cached_state);
cur_offset = hole_start;
while (1) {
- em = btrfs_get_extent(inode, NULL, 0, cur_offset,
- block_end - cur_offset);
+ em = btrfs_get_extent(inode, NULL, cur_offset, block_end - cur_offset);
if (IS_ERR(em)) {
err = PTR_ERR(em);
em = NULL;
@@ -5019,7 +5052,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
btrfs_drew_write_unlock(&root->snapshot_lock);
btrfs_end_transaction(trans);
} else {
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
if (btrfs_is_zoned(fs_info)) {
ret = btrfs_wait_ordered_range(inode,
@@ -5222,7 +5255,7 @@ static struct btrfs_trans_handle *evict_refill_and_join(struct btrfs_root *root,
void btrfs_evict_inode(struct inode *inode)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info;
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_block_rsv *rsv = NULL;
@@ -5236,6 +5269,7 @@ void btrfs_evict_inode(struct inode *inode)
return;
}
+ fs_info = inode_to_fs_info(inode);
evict_inode_truncate_pages(inode);
if (inode->i_nlink &&
@@ -5533,7 +5567,6 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
BTRFS_I(inode)->location.offset = 0;
BTRFS_I(inode)->root = btrfs_grab_root(args->root);
- BUG_ON(args->root && !BTRFS_I(inode)->root);
if (args->root && args->root == args->root->fs_info->tree_root &&
args->ino != BTRFS_BTREE_INODE_OBJECTID)
@@ -5661,7 +5694,7 @@ static inline u8 btrfs_inode_type(struct inode *inode)
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(dir);
struct inode *inode;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_root *sub_root = root;
@@ -6200,7 +6233,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
struct inode *dir = args->dir;
struct inode *inode = args->inode;
const struct fscrypt_str *name = args->orphan ? NULL : &args->fname.disk_name;
- struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(dir);
struct btrfs_root *root;
struct btrfs_inode_item *inode_item;
struct btrfs_key *location;
@@ -6522,7 +6555,7 @@ fail_dir_item:
static int btrfs_create_common(struct inode *dir, struct dentry *dentry,
struct inode *inode)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(dir);
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_new_inode_args new_inode_args = {
.dir = dir,
@@ -6592,7 +6625,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
struct btrfs_trans_handle *trans = NULL;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct inode *inode = d_inode(old_dentry);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct fscrypt_name fname;
u64 index;
int err;
@@ -6756,7 +6789,6 @@ static int read_inline_extent(struct btrfs_inode *inode, struct btrfs_path *path
*
* @inode: file to search in
* @page: page to read extent data into if the extent is inline
- * @pg_offset: offset into @page to copy to
* @start: file offset
* @len: length of range starting at @start
*
@@ -6770,8 +6802,7 @@ static int read_inline_extent(struct btrfs_inode *inode, struct btrfs_path *path
* Return: ERR_PTR on error, non-NULL extent_map on success.
*/
struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
- struct page *page, size_t pg_offset,
- u64 start, u64 len)
+ struct page *page, u64 start, u64 len)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
int ret = 0;
@@ -6914,7 +6945,6 @@ next:
* ensured by tree-checker and inline extent creation path.
* Thus all members representing file offsets should be zero.
*/
- ASSERT(pg_offset == 0);
ASSERT(extent_start == 0);
ASSERT(em->start == 0);
@@ -7078,7 +7108,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
u64 *orig_start, u64 *orig_block_len,
u64 *ram_bytes, bool nowait, bool strict)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct can_nocow_file_extent_args nocow_args = { 0 };
struct btrfs_path *path;
int ret;
@@ -7317,7 +7347,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
unsigned int iomap_flags)
{
const bool nowait = (iomap_flags & IOMAP_NOWAIT);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct extent_map *em = *map;
int type;
u64 block_start, orig_start, orig_block_len, ram_bytes;
@@ -7457,7 +7487,7 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
struct iomap *srcmap)
{
struct iomap_iter *iter = container_of(iomap, struct iomap_iter, iomap);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct extent_map *em;
struct extent_state *cached_state = NULL;
struct btrfs_dio_data *dio_data = iter->private;
@@ -7555,7 +7585,7 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
if (ret < 0)
goto err;
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, start, len);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto unlock_err;
@@ -7903,7 +7933,7 @@ static void btrfs_readahead(struct readahead_control *rac)
*/
static void wait_subpage_spinlock(struct page *page)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
+ struct btrfs_fs_info *fs_info = page_to_fs_info(page);
struct folio *folio = page_folio(page);
struct btrfs_subpage *subpage;
@@ -7970,7 +8000,7 @@ static int btrfs_migrate_folio(struct address_space *mapping,
static void btrfs_invalidate_folio(struct folio *folio, size_t offset,
size_t length)
{
- struct btrfs_inode *inode = BTRFS_I(folio->mapping->host);
+ struct btrfs_inode *inode = folio_to_inode(folio);
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct extent_io_tree *tree = &inode->io_tree;
struct extent_state *cached_state = NULL;
@@ -8154,7 +8184,7 @@ vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
struct page *page = vmf->page;
struct folio *folio = page_folio(page);
struct inode *inode = file_inode(vmf->vma->vm_file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_ordered_extent *ordered;
struct extent_state *cached_state = NULL;
@@ -8700,7 +8730,7 @@ int __init btrfs_init_cachep(void)
{
btrfs_inode_cachep = kmem_cache_create("btrfs_inode",
sizeof(struct btrfs_inode), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT,
+ SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
init_once);
if (!btrfs_inode_cachep)
goto fail;
@@ -8723,7 +8753,7 @@ static int btrfs_getattr(struct mnt_idmap *idmap,
u64 delalloc_bytes;
u64 inode_bytes;
struct inode *inode = d_inode(path->dentry);
- u32 blocksize = inode->i_sb->s_blocksize;
+ u32 blocksize = btrfs_sb(inode->i_sb)->sectorsize;
u32 bi_flags = BTRFS_I(inode)->flags;
u32 bi_ro_flags = BTRFS_I(inode)->ro_flags;
@@ -8763,7 +8793,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
struct inode *new_dir,
struct dentry *new_dentry)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(old_dir->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(old_dir);
struct btrfs_trans_handle *trans;
unsigned int trans_num_items;
struct btrfs_root *root = BTRFS_I(old_dir)->root;
@@ -9015,7 +9045,7 @@ static int btrfs_rename(struct mnt_idmap *idmap,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(old_dir->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(old_dir);
struct btrfs_new_inode_args whiteout_args = {
.dir = old_dir,
.dentry = old_dentry,
@@ -9457,7 +9487,7 @@ out:
static int btrfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *symname)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(dir);
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_path *path;
@@ -9638,7 +9668,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
loff_t actual_len, u64 *alloc_hint,
struct btrfs_trans_handle *trans)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct extent_map *em;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_key ins;
@@ -9790,7 +9820,7 @@ static int btrfs_permission(struct mnt_idmap *idmap,
static int btrfs_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
struct file *file, umode_t mode)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(dir);
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct inode *inode;
@@ -10164,7 +10194,7 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
cond_resched();
}
- em = btrfs_get_extent(inode, NULL, 0, start, lockend - start + 1);
+ em = btrfs_get_extent(inode, NULL, start, lockend - start + 1);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto out_unlock_extent;
@@ -10744,7 +10774,7 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
struct btrfs_block_group *bg;
u64 len = isize - start;
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, start, len);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto out;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 9876ee27f069..294e31edec9d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -34,11 +34,9 @@
#include "export.h"
#include "transaction.h"
#include "btrfs_inode.h"
-#include "print-tree.h"
#include "volumes.h"
#include "locking.h"
#include "backref.h"
-#include "rcu-string.h"
#include "send.h"
#include "dev-replace.h"
#include "props.h"
@@ -47,9 +45,7 @@
#include "tree-log.h"
#include "compression.h"
#include "space-info.h"
-#include "delalloc-space.h"
#include "block-group.h"
-#include "subpage.h"
#include "fs.h"
#include "accessors.h"
#include "extent-tree.h"
@@ -231,6 +227,20 @@ static int check_fsflags_compatible(struct btrfs_fs_info *fs_info,
return 0;
}
+int btrfs_check_ioctl_vol_args_path(const struct btrfs_ioctl_vol_args *vol_args)
+{
+ if (memchr(vol_args->name, 0, sizeof(vol_args->name)) == NULL)
+ return -ENAMETOOLONG;
+ return 0;
+}
+
+static int btrfs_check_ioctl_vol_args2_subvol_name(const struct btrfs_ioctl_vol_args_v2 *vol_args2)
+{
+ if (memchr(vol_args2->name, 0, sizeof(vol_args2->name)) == NULL)
+ return -ENAMETOOLONG;
+ return 0;
+}
+
/*
* Set flags/xflags from the internal inode flags. The remaining items of
* fsxattr are zeroed.
@@ -247,7 +257,7 @@ int btrfs_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa)
{
struct inode *inode = d_inode(dentry);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_inode *binode = BTRFS_I(inode);
struct btrfs_root *root = binode->root;
struct btrfs_trans_handle *trans;
@@ -528,7 +538,7 @@ static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info,
* block group is in the logical address space, which can be any
* sectorsize aligned bytenr in the range [0, U64_MAX].
*/
- if (range.len < fs_info->sb->s_blocksize)
+ if (range.len < fs_info->sectorsize)
return -EINVAL;
range.minlen = max(range.minlen, minlen);
@@ -584,7 +594,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
struct inode *dir, struct dentry *dentry,
struct btrfs_qgroup_inherit *inherit)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(dir);
struct btrfs_trans_handle *trans;
struct btrfs_key key;
struct btrfs_root_item *root_item;
@@ -776,7 +786,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
struct dentry *dentry, bool readonly,
struct btrfs_qgroup_inherit *inherit)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(dir);
struct inode *inode;
struct btrfs_pending_snapshot *pending_snapshot;
unsigned int trans_num_items;
@@ -910,7 +920,9 @@ static int btrfs_may_delete(struct mnt_idmap *idmap,
if (d_really_is_negative(victim))
return -ENOENT;
- BUG_ON(d_inode(victim->d_parent) != dir);
+ /* The @victim is not inside @dir. */
+ if (d_inode(victim->d_parent) != dir)
+ return -EINVAL;
audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
error = inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC);
@@ -962,7 +974,7 @@ static noinline int btrfs_mksubvol(const struct path *parent,
struct btrfs_qgroup_inherit *inherit)
{
struct inode *dir = d_inode(parent->dentry);
- struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(dir);
struct dentry *dentry;
struct fscrypt_str name_str = FSTR_INIT((char *)name, namelen);
int error;
@@ -1097,7 +1109,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
{
BTRFS_DEV_LOOKUP_ARGS(args);
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
u64 new_size;
u64 old_size;
u64 devid = 1;
@@ -1128,7 +1140,10 @@ static noinline int btrfs_ioctl_resize(struct file *file,
ret = PTR_ERR(vol_args);
goto out_drop;
}
- vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
+ ret = btrfs_check_ioctl_vol_args_path(vol_args);
+ if (ret < 0)
+ goto out_free;
+
sizestr = vol_args->name;
cancel = (strcmp("cancel", sizestr) == 0);
ret = exclop_start_or_cancel_reloc(fs_info, BTRFS_EXCLOP_RESIZE, cancel);
@@ -1328,12 +1343,15 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
- vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
+ ret = btrfs_check_ioctl_vol_args_path(vol_args);
+ if (ret < 0)
+ goto out;
ret = __btrfs_ioctl_snap_create(file, file_mnt_idmap(file),
vol_args->name, vol_args->fd, subvol,
false, NULL);
+out:
kfree(vol_args);
return ret;
}
@@ -1352,7 +1370,9 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
- vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
+ ret = btrfs_check_ioctl_vol_args2_subvol_name(vol_args);
+ if (ret < 0)
+ goto free_args;
if (vol_args->flags & ~BTRFS_SUBVOL_CREATE_ARGS_MASK) {
ret = -EOPNOTSUPP;
@@ -1362,7 +1382,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
readonly = true;
if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) {
- u64 nums;
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(file_inode(file));
if (vol_args->size < sizeof(*inherit) ||
vol_args->size > PAGE_SIZE) {
@@ -1375,19 +1395,9 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
goto free_args;
}
- if (inherit->num_qgroups > PAGE_SIZE ||
- inherit->num_ref_copies > PAGE_SIZE ||
- inherit->num_excl_copies > PAGE_SIZE) {
- ret = -EINVAL;
- goto free_inherit;
- }
-
- nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
- 2 * inherit->num_excl_copies;
- if (vol_args->size != struct_size(inherit, qgroups, nums)) {
- ret = -EINVAL;
+ ret = btrfs_qgroup_check_inherit(fs_info, inherit, vol_args->size);
+ if (ret < 0)
goto free_inherit;
- }
}
ret = __btrfs_ioctl_snap_create(file, file_mnt_idmap(file),
@@ -1405,7 +1415,7 @@ free_args:
static noinline int btrfs_ioctl_subvol_getflags(struct inode *inode,
void __user *arg)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret = 0;
u64 flags = 0;
@@ -1428,7 +1438,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
void __user *arg)
{
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
u64 root_flags;
@@ -1675,7 +1685,7 @@ static noinline int search_ioctl(struct inode *inode,
u64 *buf_size,
char __user *ubuf)
{
- struct btrfs_fs_info *info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *info = inode_to_fs_info(inode);
struct btrfs_root *root;
struct btrfs_key key;
struct btrfs_path *path;
@@ -2346,9 +2356,9 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
bool destroy_v2)
{
struct dentry *parent = file->f_path.dentry;
- struct btrfs_fs_info *fs_info = btrfs_sb(parent->d_sb);
struct dentry *dentry;
struct inode *dir = d_inode(parent);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(dir);
struct inode *inode;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_root *dest = NULL;
@@ -2382,7 +2392,9 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
* name, same as v1 currently does.
*/
if (!(vol_args2->flags & BTRFS_SUBVOL_SPEC_BY_ID)) {
- vol_args2->name[BTRFS_SUBVOL_NAME_MAX] = 0;
+ err = btrfs_check_ioctl_vol_args2_subvol_name(vol_args2);
+ if (err < 0)
+ goto out;
subvol_name = vol_args2->name;
err = mnt_want_write_file(file);
@@ -2466,7 +2478,10 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
- vol_args->name[BTRFS_PATH_NAME_MAX] = 0;
+ err = btrfs_check_ioctl_vol_args_path(vol_args);
+ if (err < 0)
+ goto out;
+
subvol_name = vol_args->name;
err = mnt_want_write_file(file);
@@ -2677,12 +2692,16 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
goto out;
}
- vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
+ ret = btrfs_check_ioctl_vol_args_path(vol_args);
+ if (ret < 0)
+ goto out_free;
+
ret = btrfs_init_new_device(fs_info, vol_args->name);
if (!ret)
btrfs_info(fs_info, "disk added %s", vol_args->name);
+out_free:
kfree(vol_args);
out:
if (restore_op)
@@ -2696,7 +2715,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
{
BTRFS_DEV_LOOKUP_ARGS(args);
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_ioctl_vol_args_v2 *vol_args;
struct file *bdev_file = NULL;
int ret;
@@ -2714,7 +2733,10 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
goto out;
}
- vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
+ ret = btrfs_check_ioctl_vol_args2_subvol_name(vol_args);
+ if (ret < 0)
+ goto out;
+
if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) {
args.devid = vol_args->devid;
} else if (!strcmp("cancel", vol_args->name)) {
@@ -2761,7 +2783,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
{
BTRFS_DEV_LOOKUP_ARGS(args);
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_ioctl_vol_args *vol_args;
struct file *bdev_file = NULL;
int ret;
@@ -2774,7 +2796,10 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
- vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
+ ret = btrfs_check_ioctl_vol_args_path(vol_args);
+ if (ret < 0)
+ goto out_free;
+
if (!strcmp("cancel", vol_args->name)) {
cancel = true;
} else {
@@ -2801,6 +2826,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
fput(bdev_file);
out:
btrfs_put_dev_args_from_path(&args);
+out_free:
kfree(vol_args);
return ret;
}
@@ -2904,7 +2930,7 @@ out:
static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
{
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_root *new_root;
struct btrfs_dir_item *di;
@@ -3178,7 +3204,7 @@ static noinline long btrfs_ioctl_wait_sync(struct btrfs_fs_info *fs_info,
static long btrfs_ioctl_scrub(struct file *file, void __user *arg)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(file_inode(file)->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(file_inode(file));
struct btrfs_ioctl_scrub_args *sa;
int ret;
@@ -3696,7 +3722,7 @@ out:
static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
{
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_ioctl_quota_ctl_args *sa;
int ret;
@@ -3738,7 +3764,7 @@ drop_write:
static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
{
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_ioctl_qgroup_assign_args *sa;
struct btrfs_trans_handle *trans;
@@ -3894,7 +3920,7 @@ drop_write:
static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg)
{
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_ioctl_quota_rescan_args *qsa;
int ret;
@@ -3958,7 +3984,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
struct btrfs_ioctl_received_subvol_args *sa)
{
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_root_item *root_item = &root->root_item;
struct btrfs_trans_handle *trans;
@@ -4146,7 +4172,7 @@ static int btrfs_ioctl_get_fslabel(struct btrfs_fs_info *fs_info,
static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg)
{
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_super_block *super_block = fs_info->super_copy;
struct btrfs_trans_handle *trans;
@@ -4289,7 +4315,7 @@ check_feature_bits(fs_info, FEAT_##mask_base, change_mask, flags, \
static int btrfs_ioctl_set_features(struct file *file, void __user *arg)
{
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_super_block *super_block = fs_info->super_copy;
struct btrfs_ioctl_feature_flags flags[2];
@@ -4580,7 +4606,7 @@ long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg)
{
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_root *root = BTRFS_I(inode)->root;
void __user *argp = (void __user *)arg;
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index d51b9a2f2f6e..2c5dc25ec670 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -3,6 +3,15 @@
#ifndef BTRFS_IOCTL_H
#define BTRFS_IOCTL_H
+#include <linux/types.h>
+
+struct file;
+struct dentry;
+struct mnt_idmap;
+struct fileattr;
+struct btrfs_fs_info;
+struct btrfs_ioctl_balance_args;
+
long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
int btrfs_fileattr_get(struct dentry *dentry, struct fileattr *fa);
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 74d8e2003f58..99ccab86bb86 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -13,7 +13,6 @@
#include "ctree.h"
#include "extent_io.h"
#include "locking.h"
-#include "accessors.h"
/*
* Lockdep class keys for extent_buffer->lock's in this root. For a given
@@ -85,7 +84,7 @@ void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, int
{
struct btrfs_lockdep_keyset *ks;
- BUG_ON(level >= ARRAY_SIZE(ks->keys));
+ ASSERT(level < ARRAY_SIZE(ks->keys));
/* Find the matching keyset, id 0 is the default entry */
for (ks = btrfs_lockdep_keysets; ks->id; ks++)
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index 7d6ee1e609bf..9576f485a300 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -8,8 +8,14 @@
#include <linux/atomic.h>
#include <linux/wait.h>
+#include <linux/lockdep.h>
#include <linux/percpu_counter.h>
#include "extent_io.h"
+#include "locking.h"
+
+struct extent_buffer;
+struct btrfs_path;
+struct btrfs_root;
#define BTRFS_WRITE_LOCK 1
#define BTRFS_READ_LOCK 2
@@ -157,8 +163,6 @@ enum btrfs_lockdep_trans_states {
static_assert(BTRFS_NESTING_MAX <= MAX_LOCKDEP_SUBCLASSES,
"too many lock subclasses defined");
-struct btrfs_path;
-
void __btrfs_tree_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest);
void btrfs_tree_lock(struct extent_buffer *eb);
void btrfs_tree_unlock(struct extent_buffer *eb);
diff --git a/fs/btrfs/lru_cache.h b/fs/btrfs/lru_cache.h
index 00328c856be6..e32906ab6faa 100644
--- a/fs/btrfs/lru_cache.h
+++ b/fs/btrfs/lru_cache.h
@@ -3,8 +3,10 @@
#ifndef BTRFS_LRU_CACHE_H
#define BTRFS_LRU_CACHE_H
+#include <linux/types.h>
#include <linux/maple_tree.h>
#include <linux/list.h>
+#include "lru_cache.h"
/*
* A cache entry. This is meant to be embedded in a structure of a user of
@@ -50,11 +52,6 @@ struct btrfs_lru_cache {
#define btrfs_lru_cache_for_each_entry_safe(cache, entry, tmp) \
list_for_each_entry_safe_reverse((entry), (tmp), &(cache)->lru_list, lru_list)
-static inline unsigned int btrfs_lru_cache_size(const struct btrfs_lru_cache *cache)
-{
- return cache->size;
-}
-
static inline struct btrfs_lru_cache_entry *btrfs_lru_cache_lru_entry(
struct btrfs_lru_cache *cache)
{
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index e43bc0fdc74e..3e5d3b7028e8 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -214,7 +214,7 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
unsigned long *total_in, unsigned long *total_out)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
- const u32 sectorsize = btrfs_sb(mapping->host->i_sb)->sectorsize;
+ const u32 sectorsize = inode_to_fs_info(mapping->host)->sectorsize;
struct page *page_in = NULL;
char *sizes_ptr;
const unsigned long max_nr_page = *out_pages;
@@ -429,7 +429,7 @@ int lzo_decompress(struct list_head *ws, const u8 *data_in,
size_t destlen)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
- struct btrfs_fs_info *fs_info = btrfs_sb(dest_page->mapping->host->i_sb);
+ struct btrfs_fs_info *fs_info = page_to_fs_info(dest_page);
const u32 sectorsize = fs_info->sectorsize;
size_t in_len;
size_t out_len;
diff --git a/fs/btrfs/messages.c b/fs/btrfs/messages.c
index cdada4865837..c96dd66fd0f7 100644
--- a/fs/btrfs/messages.c
+++ b/fs/btrfs/messages.c
@@ -3,8 +3,6 @@
#include "fs.h"
#include "messages.h"
#include "discard.h"
-#include "transaction.h"
-#include "space-info.h"
#include "super.h"
#ifdef CONFIG_PRINTK
diff --git a/fs/btrfs/misc.h b/fs/btrfs/misc.h
index 40f2d9f1a17a..dde4904aead9 100644
--- a/fs/btrfs/misc.h
+++ b/fs/btrfs/misc.h
@@ -3,6 +3,8 @@
#ifndef BTRFS_MISC_H
#define BTRFS_MISC_H
+#include <linux/types.h>
+#include <linux/bitmap.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/math64.h>
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 59850dc17b22..b749ba45da2b 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -19,7 +19,6 @@
#include "qgroup.h"
#include "subpage.h"
#include "file.h"
-#include "super.h"
static struct kmem_cache *btrfs_ordered_extent_cache;
@@ -1236,10 +1235,7 @@ struct btrfs_ordered_extent *btrfs_split_ordered_extent(
int __init ordered_data_init(void)
{
- btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent",
- sizeof(struct btrfs_ordered_extent), 0,
- SLAB_MEM_SPREAD,
- NULL);
+ btrfs_ordered_extent_cache = KMEM_CACHE(btrfs_ordered_extent, 0);
if (!btrfs_ordered_extent_cache)
return -ENOMEM;
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 127ef8bf0ffd..34413fc5b4bd 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -6,6 +6,21 @@
#ifndef BTRFS_ORDERED_DATA_H
#define BTRFS_ORDERED_DATA_H
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/refcount.h>
+#include <linux/completion.h>
+#include <linux/rbtree.h>
+#include <linux/wait.h>
+#include "async-thread.h"
+
+struct inode;
+struct page;
+struct extent_state;
+struct btrfs_inode;
+struct btrfs_root;
+struct btrfs_fs_info;
+
struct btrfs_ordered_sum {
/*
* Logical start address and length for of the blocks covered by
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c
index 7a1b021b5669..6195a2215b8f 100644
--- a/fs/btrfs/orphan.c
+++ b/fs/btrfs/orphan.c
@@ -4,7 +4,6 @@
*/
#include "ctree.h"
-#include "disk-io.h"
#include "orphan.h"
int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/orphan.h b/fs/btrfs/orphan.h
index 3faab5cbb59a..aa54a88a60de 100644
--- a/fs/btrfs/orphan.h
+++ b/fs/btrfs/orphan.h
@@ -3,6 +3,11 @@
#ifndef BTRFS_ORPHAN_H
#define BTRFS_ORPHAN_H
+#include <linux/types.h>
+
+struct btrfs_trans_handle;
+struct btrfs_root;
+
int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 offset);
int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h
index c42bc666d5ee..8504bf1702c7 100644
--- a/fs/btrfs/print-tree.h
+++ b/fs/btrfs/print-tree.h
@@ -9,6 +9,9 @@
/* Buffer size to contain tree name and possibly additional data (offset) */
#define BTRFS_ROOT_NAME_BUF_LEN 48
+struct extent_buffer;
+struct btrfs_key;
+
void btrfs_print_leaf(const struct extent_buffer *l);
void btrfs_print_tree(const struct extent_buffer *c, bool follow);
const char *btrfs_root_name(const struct btrfs_key *key, char *buf);
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index f9bf591a0718..2a9b7b029eeb 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -4,6 +4,7 @@
*/
#include <linux/hashtable.h>
+#include <linux/xattr.h>
#include "messages.h"
#include "props.h"
#include "btrfs_inode.h"
@@ -302,7 +303,7 @@ static int prop_compression_validate(const struct btrfs_inode *inode,
static int prop_compression_apply(struct inode *inode, const char *value,
size_t len)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
int type;
/* Reset to defaults */
diff --git a/fs/btrfs/props.h b/fs/btrfs/props.h
index 6e283196e38a..f60cd89feb29 100644
--- a/fs/btrfs/props.h
+++ b/fs/btrfs/props.h
@@ -6,7 +6,12 @@
#ifndef BTRFS_PROPS_H
#define BTRFS_PROPS_H
-#include "ctree.h"
+#include <linux/compiler_types.h>
+
+struct inode;
+struct btrfs_inode;
+struct btrfs_path;
+struct btrfs_trans_handle;
int __init btrfs_props_init(void);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 5470e1cdf10c..5f90f0605b12 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1324,7 +1324,7 @@ static int flush_reservations(struct btrfs_fs_info *fs_info)
trans = btrfs_join_transaction(fs_info->tree_root);
if (IS_ERR(trans))
return PTR_ERR(trans);
- btrfs_commit_transaction(trans);
+ ret = btrfs_commit_transaction(trans);
return ret;
}
@@ -2505,8 +2505,8 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
struct extent_buffer *eb = root_eb;
struct btrfs_path *path = NULL;
- BUG_ON(root_level < 0 || root_level >= BTRFS_MAX_LEVEL);
- BUG_ON(root_eb == NULL);
+ ASSERT(0 <= root_level && root_level < BTRFS_MAX_LEVEL);
+ ASSERT(root_eb != NULL);
if (!btrfs_qgroup_full_accounting(fs_info))
return 0;
@@ -2861,8 +2861,6 @@ int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
if (nr_old_roots == 0 && nr_new_roots == 0)
goto out_free;
- BUG_ON(!fs_info->quota_root);
-
trace_btrfs_qgroup_account_extent(fs_info, trans->transid, bytenr,
num_bytes, nr_old_roots, nr_new_roots);
@@ -2959,11 +2957,6 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
ctx.roots = NULL;
}
- /* Free the reserved data space */
- btrfs_qgroup_free_refroot(fs_info,
- record->data_rsv_refroot,
- record->data_rsv,
- BTRFS_QGROUP_RSV_DATA);
/*
* Use BTRFS_SEQ_LAST as time_seq to do special search,
* which doesn't lock tree or delayed_refs and search
@@ -2987,6 +2980,11 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
record->old_roots = NULL;
new_roots = NULL;
}
+ /* Free the reserved data space */
+ btrfs_qgroup_free_refroot(fs_info,
+ record->data_rsv_refroot,
+ record->data_rsv,
+ BTRFS_QGROUP_RSV_DATA);
cleanup:
ulist_free(record->old_roots);
ulist_free(new_roots);
@@ -3048,6 +3046,57 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
return ret;
}
+int btrfs_qgroup_check_inherit(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup_inherit *inherit,
+ size_t size)
+{
+ if (inherit->flags & ~BTRFS_QGROUP_INHERIT_FLAGS_SUPP)
+ return -EOPNOTSUPP;
+ if (size < sizeof(*inherit) || size > PAGE_SIZE)
+ return -EINVAL;
+
+ /*
+ * In the past we allowed btrfs_qgroup_inherit to specify to copy
+ * rfer/excl numbers directly from other qgroups. This behavior has
+ * been disabled in userspace for a very long time, but here we should
+ * also disable it in kernel, as this behavior is known to mark qgroup
+ * inconsistent, and a rescan would wipe out the changes anyway.
+ *
+ * Reject any btrfs_qgroup_inherit with num_ref_copies or num_excl_copies.
+ */
+ if (inherit->num_ref_copies > 0 || inherit->num_excl_copies > 0)
+ return -EINVAL;
+
+ if (inherit->num_qgroups > PAGE_SIZE)
+ return -EINVAL;
+
+ if (size != struct_size(inherit, qgroups, inherit->num_qgroups))
+ return -EINVAL;
+
+ /*
+ * Now check all the remaining qgroups, they should all:
+ *
+ * - Exist
+ * - Be higher level qgroups.
+ */
+ for (int i = 0; i < inherit->num_qgroups; i++) {
+ struct btrfs_qgroup *qgroup;
+ u64 qgroupid = inherit->qgroups[i];
+
+ if (btrfs_qgroup_level(qgroupid) == 0)
+ return -EINVAL;
+
+ spin_lock(&fs_info->qgroup_lock);
+ qgroup = find_qgroup_rb(fs_info, qgroupid);
+ if (!qgroup) {
+ spin_unlock(&fs_info->qgroup_lock);
+ return -ENOENT;
+ }
+ spin_unlock(&fs_info->qgroup_lock);
+ }
+ return 0;
+}
+
static int qgroup_auto_inherit(struct btrfs_fs_info *fs_info,
u64 inode_rootid,
struct btrfs_qgroup_inherit **inherit)
@@ -3090,6 +3139,62 @@ static int qgroup_auto_inherit(struct btrfs_fs_info *fs_info,
}
/*
+ * Check if we can skip rescan when inheriting qgroups. If @src has a single
+ * @parent, and that @parent is owning all its bytes exclusively, we can skip
+ * the full rescan, by just adding nodesize to the @parent's excl/rfer.
+ *
+ * Return <0 for fatal errors (like srcid/parentid has no qgroup).
+ * Return 0 if a quick inherit is done.
+ * Return >0 if a quick inherit is not possible, and a full rescan is needed.
+ */
+static int qgroup_snapshot_quick_inherit(struct btrfs_fs_info *fs_info,
+ u64 srcid, u64 parentid)
+{
+ struct btrfs_qgroup *src;
+ struct btrfs_qgroup *parent;
+ struct btrfs_qgroup_list *list;
+ int nr_parents = 0;
+
+ src = find_qgroup_rb(fs_info, srcid);
+ if (!src)
+ return -ENOENT;
+ parent = find_qgroup_rb(fs_info, parentid);
+ if (!parent)
+ return -ENOENT;
+
+ /*
+ * Source has no parent qgroup, but our new qgroup would have one.
+ * Qgroup numbers would become inconsistent.
+ */
+ if (list_empty(&src->groups))
+ return 1;
+
+ list_for_each_entry(list, &src->groups, next_group) {
+ /* The parent is not the same, quick update is not possible. */
+ if (list->group->qgroupid != parentid)
+ return 1;
+ nr_parents++;
+ /*
+ * More than one parent qgroup, we can't be sure about accounting
+ * consistency.
+ */
+ if (nr_parents > 1)
+ return 1;
+ }
+
+ /*
+ * The parent is not exclusively owning all its bytes. We're not sure
+ * if the source has any bytes not fully owned by the parent.
+ */
+ if (parent->excl != parent->rfer)
+ return 1;
+
+ parent->excl += fs_info->nodesize;
+ parent->rfer += fs_info->nodesize;
+ return 0;
+}
+
+/*
* Copy the accounting information between qgroups. This is necessary
* when a snapshot or a subvolume is created. Throwing an error will
* cause a transaction abort so we take extra care here to only error
@@ -3257,6 +3362,13 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
qgroup_dirty(fs_info, dstgroup);
qgroup_dirty(fs_info, srcgroup);
+
+ /*
+ * If the source qgroup has parent but the new one doesn't,
+ * we need a full rescan.
+ */
+ if (!inherit && !list_empty(&srcgroup->groups))
+ need_rescan = true;
}
if (!inherit)
@@ -3271,14 +3383,16 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
if (ret)
goto unlock;
}
+ if (srcid) {
+ /* Check if we can do a quick inherit. */
+ ret = qgroup_snapshot_quick_inherit(fs_info, srcid, *i_qgroups);
+ if (ret < 0)
+ goto unlock;
+ if (ret > 0)
+ need_rescan = true;
+ ret = 0;
+ }
++i_qgroups;
-
- /*
- * If we're doing a snapshot, and adding the snapshot to a new
- * qgroup, the numbers are guaranteed to be incorrect.
- */
- if (srcid)
- need_rescan = true;
}
for (i = 0; i < inherit->num_ref_copies; ++i, i_qgroups += 2) {
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index be18c862e64e..706640be0ec2 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -6,12 +6,22 @@
#ifndef BTRFS_QGROUP_H
#define BTRFS_QGROUP_H
+#include <linux/types.h>
#include <linux/spinlock.h>
#include <linux/rbtree.h>
#include <linux/kobject.h>
-#include "ulist.h"
-#include "delayed-ref.h"
-#include "misc.h"
+#include <linux/list.h>
+#include <uapi/linux/btrfs_tree.h>
+
+struct extent_buffer;
+struct extent_changeset;
+struct btrfs_delayed_extent_op;
+struct btrfs_fs_info;
+struct btrfs_root;
+struct btrfs_ioctl_quota_ctl_args;
+struct btrfs_trans_handle;
+struct btrfs_delayed_ref_root;
+struct btrfs_inode;
/*
* Btrfs qgroup overview
@@ -321,7 +331,6 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
struct btrfs_qgroup_limit *limit);
int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
-struct btrfs_delayed_extent_op;
int btrfs_qgroup_trace_extent_nolock(
struct btrfs_fs_info *fs_info,
@@ -341,6 +350,9 @@ int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
struct ulist *new_roots);
int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans);
int btrfs_run_qgroups(struct btrfs_trans_handle *trans);
+int btrfs_qgroup_check_inherit(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup_inherit *inherit,
+ size_t size);
int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
u64 objectid, u64 inode_rootid,
struct btrfs_qgroup_inherit *inherit);
diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c
index 9589362acfbf..6af6b4b9a32e 100644
--- a/fs/btrfs/raid-stripe-tree.c
+++ b/fs/btrfs/raid-stripe-tree.c
@@ -11,7 +11,6 @@
#include "disk-io.h"
#include "raid-stripe-tree.h"
#include "volumes.h"
-#include "misc.h"
#include "print-tree.h"
int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 length)
diff --git a/fs/btrfs/raid-stripe-tree.h b/fs/btrfs/raid-stripe-tree.h
index cdb58b38fcb5..c9c258f84903 100644
--- a/fs/btrfs/raid-stripe-tree.h
+++ b/fs/btrfs/raid-stripe-tree.h
@@ -6,6 +6,10 @@
#ifndef BTRFS_RAID_STRIPE_TREE_H
#define BTRFS_RAID_STRIPE_TREE_H
+#include <linux/types.h>
+#include <uapi/linux/btrfs_tree.h>
+#include "fs.h"
+
#define BTRFS_RST_SUPP_BLOCK_GROUP_MASK (BTRFS_BLOCK_GROUP_DUP | \
BTRFS_BLOCK_GROUP_RAID1_MASK | \
BTRFS_BLOCK_GROUP_RAID0 | \
@@ -13,6 +17,7 @@
struct btrfs_io_context;
struct btrfs_io_stripe;
+struct btrfs_fs_info;
struct btrfs_ordered_extent;
struct btrfs_trans_handle;
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 792c8e17c31d..6f4a9cfeea44 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -14,7 +14,6 @@
#include <linux/raid/xor.h>
#include <linux/mm.h>
#include "messages.h"
-#include "misc.h"
#include "ctree.h"
#include "disk-io.h"
#include "volumes.h"
@@ -918,6 +917,13 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
*/
ASSERT(stripe_nsectors <= BITS_PER_LONG);
+ /*
+ * Real stripes must be between 2 (2 disks RAID5, aka RAID1) and 256
+ * (limited by u8).
+ */
+ ASSERT(real_stripes >= 2);
+ ASSERT(real_stripes <= U8_MAX);
+
rbio = kzalloc(sizeof(*rbio), GFP_NOFS);
if (!rbio)
return ERR_PTR(-ENOMEM);
@@ -955,6 +961,7 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
ASSERT(btrfs_nr_parity_stripes(bioc->map_type));
rbio->nr_data = real_stripes - btrfs_nr_parity_stripes(bioc->map_type);
+ ASSERT(rbio->nr_data > 0);
return rbio;
}
@@ -1181,6 +1188,26 @@ static inline void bio_list_put(struct bio_list *bio_list)
bio_put(bio);
}
+static void assert_rbio(struct btrfs_raid_bio *rbio)
+{
+ if (!IS_ENABLED(CONFIG_BTRFS_DEBUG) ||
+ !IS_ENABLED(CONFIG_BTRFS_ASSERT))
+ return;
+
+ /*
+ * At least two stripes (2 disks RAID5), and since real_stripes is U8,
+ * we won't go beyond 256 disks anyway.
+ */
+ ASSERT(rbio->real_stripes >= 2);
+ ASSERT(rbio->nr_data > 0);
+
+ /*
+ * This is another check to make sure nr data stripes is smaller
+ * than total stripes.
+ */
+ ASSERT(rbio->nr_data < rbio->real_stripes);
+}
+
/* Generate PQ for one vertical stripe. */
static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr)
{
@@ -1212,6 +1239,7 @@ static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr)
pointers[stripe++] = kmap_local_page(sector->page) +
sector->pgoff;
+ assert_rbio(rbio);
raid6_call.gen_syndrome(rbio->real_stripes, sectorsize,
pointers);
} else {
@@ -2473,6 +2501,7 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
}
if (has_qstripe) {
+ assert_rbio(rbio);
/* RAID6, call the library function to fill in our P/Q */
raid6_call.gen_syndrome(rbio->real_stripes, sectorsize,
pointers);
diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h
index 470213688872..0d7b4c2fb6ae 100644
--- a/fs/btrfs/raid56.h
+++ b/fs/btrfs/raid56.h
@@ -7,9 +7,18 @@
#ifndef BTRFS_RAID56_H
#define BTRFS_RAID56_H
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/bio.h>
+#include <linux/refcount.h>
#include <linux/workqueue.h>
#include "volumes.h"
+struct page;
+struct sector_ptr;
+struct btrfs_fs_info;
+
enum btrfs_rbio_ops {
BTRFS_RBIO_WRITE,
BTRFS_RBIO_READ_REBUILD,
diff --git a/fs/btrfs/rcu-string.h b/fs/btrfs/rcu-string.h
index 5c2b66d155ef..1c2d7cb1fe6f 100644
--- a/fs/btrfs/rcu-string.h
+++ b/fs/btrfs/rcu-string.h
@@ -6,6 +6,12 @@
#ifndef BTRFS_RCU_STRING_H
#define BTRFS_RCU_STRING_H
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/rcupdate.h>
+#include <linux/printk.h>
+
struct rcu_string {
struct rcu_head rcu;
char str[];
diff --git a/fs/btrfs/ref-verify.h b/fs/btrfs/ref-verify.h
index 855de37719b5..3511e1a5c96b 100644
--- a/fs/btrfs/ref-verify.h
+++ b/fs/btrfs/ref-verify.h
@@ -6,7 +6,16 @@
#ifndef BTRFS_REF_VERIFY_H
#define BTRFS_REF_VERIFY_H
+#include <linux/types.h>
+#include <linux/rbtree_types.h>
+
+struct btrfs_fs_info;
+struct btrfs_ref;
+
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
+
+#include <linux/spinlock.h>
+
int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info);
void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info);
int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index ae90894dc7dc..08d0fb46ceec 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -174,7 +174,7 @@ static int clone_copy_inline_extent(struct inode *dst,
char *inline_data,
struct btrfs_trans_handle **trans_out)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(dst->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(dst);
struct btrfs_root *root = BTRFS_I(dst)->root;
const u64 aligned_end = ALIGN(new_key->offset + datal,
fs_info->sectorsize);
@@ -337,7 +337,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
const u64 off, const u64 olen, const u64 olen_aligned,
const u64 destoff, int no_time_update)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_path *path = NULL;
struct extent_buffer *leaf;
struct btrfs_trans_handle *trans;
@@ -663,7 +663,7 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len,
struct inode *dst, u64 dst_loff)
{
struct btrfs_fs_info *fs_info = BTRFS_I(src)->root->fs_info;
- const u64 bs = fs_info->sb->s_blocksize;
+ const u64 bs = fs_info->sectorsize;
int ret;
/*
@@ -726,11 +726,11 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
{
struct inode *inode = file_inode(file);
struct inode *src = file_inode(file_src);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
int ret;
int wb_ret;
u64 len = olen;
- u64 bs = fs_info->sb->s_blocksize;
+ u64 bs = fs_info->sectorsize;
/*
* VFS's generic_remap_file_range_prep() protects us from cloning the
@@ -796,7 +796,7 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in,
{
struct inode *inode_in = file_inode(file_in);
struct inode *inode_out = file_inode(file_out);
- u64 bs = BTRFS_I(inode_out)->root->fs_info->sb->s_blocksize;
+ u64 bs = BTRFS_I(inode_out)->root->fs_info->sectorsize;
u64 wb_len;
int ret;
diff --git a/fs/btrfs/reflink.h b/fs/btrfs/reflink.h
index ecb309b4dad0..1e291f7d85c4 100644
--- a/fs/btrfs/reflink.h
+++ b/fs/btrfs/reflink.h
@@ -3,7 +3,9 @@
#ifndef BTRFS_REFLINK_H
#define BTRFS_REFLINK_H
-#include <linux/fs.h>
+#include <linux/types.h>
+
+struct file;
loff_t btrfs_remap_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index abe594f77f99..f96f267fb4aa 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -523,7 +523,8 @@ static noinline_for_stack struct btrfs_backref_node *build_backref_tree(
if (handle_useless_nodes(rc, node))
node = NULL;
out:
- btrfs_backref_iter_free(iter);
+ btrfs_free_path(iter->path);
+ kfree(iter);
btrfs_free_path(path);
if (err) {
btrfs_backref_error_cleanup(cache, node);
@@ -2987,7 +2988,7 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
const struct file_extent_cluster *cluster,
int *cluster_nr, unsigned long page_index)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
u64 offset = BTRFS_I(inode)->index_cnt;
const unsigned long last_index = (cluster->end - offset) >> PAGE_SHIFT;
gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
diff --git a/fs/btrfs/relocation.h b/fs/btrfs/relocation.h
index 5fb60f2deb53..788c86d8633a 100644
--- a/fs/btrfs/relocation.h
+++ b/fs/btrfs/relocation.h
@@ -3,6 +3,15 @@
#ifndef BTRFS_RELOCATION_H
#define BTRFS_RELOCATION_H
+#include <linux/types.h>
+
+struct extent_buffer;
+struct btrfs_fs_info;
+struct btrfs_root;
+struct btrfs_trans_handle;
+struct btrfs_ordered_extent;
+struct btrfs_pending_snapshot;
+
int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start);
int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, struct btrfs_root *root);
int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 603ad1459368..4bb538a372ce 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -10,7 +10,6 @@
#include "messages.h"
#include "transaction.h"
#include "disk-io.h"
-#include "print-tree.h"
#include "qgroup.h"
#include "space-info.h"
#include "accessors.h"
@@ -82,7 +81,14 @@ int btrfs_find_root(struct btrfs_root *root, const struct btrfs_key *search_key,
if (ret > 0)
goto out;
} else {
- BUG_ON(ret == 0); /* Logical error */
+ /*
+ * Key with offset -1 found, there would have to exist a root
+ * with such id, but this is out of the valid range.
+ */
+ if (ret == 0) {
+ ret = -EUCLEAN;
+ goto out;
+ }
if (path->slots[0] == 0)
goto out;
path->slots[0]--;
@@ -323,8 +329,11 @@ int btrfs_del_root(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, root, key, path, -1, 1);
if (ret < 0)
goto out;
-
- BUG_ON(ret != 0);
+ if (ret != 0) {
+ /* The root must exist but we did not find it by the key. */
+ ret = -EUCLEAN;
+ goto out;
+ }
ret = btrfs_del_item(trans, root, path);
out:
diff --git a/fs/btrfs/root-tree.h b/fs/btrfs/root-tree.h
index 8b2c3859e464..6f929cf3bd49 100644
--- a/fs/btrfs/root-tree.h
+++ b/fs/btrfs/root-tree.h
@@ -3,7 +3,17 @@
#ifndef BTRFS_ROOT_TREE_H
#define BTRFS_ROOT_TREE_H
+#include <linux/types.h>
+
struct fscrypt_str;
+struct extent_buffer;
+struct btrfs_key;
+struct btrfs_root;
+struct btrfs_root_item;
+struct btrfs_path;
+struct btrfs_fs_info;
+struct btrfs_block_rsv;
+struct btrfs_trans_handle;
int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
struct btrfs_block_rsv *rsv,
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 0123d2728923..c4bd0e60db59 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1390,8 +1390,15 @@ static int find_first_extent_item(struct btrfs_root *extent_root,
ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
if (ret < 0)
return ret;
+ if (ret == 0) {
+ /*
+ * Key with offset -1 found, there would have to exist an extent
+ * item with such offset, but this is out of the valid range.
+ */
+ btrfs_release_path(path);
+ return -EUCLEAN;
+ }
- ASSERT(ret > 0);
/*
* Here we intentionally pass 0 as @min_objectid, as there could be
* an extent item starting before @search_start.
diff --git a/fs/btrfs/scrub.h b/fs/btrfs/scrub.h
index 7639103ebf9d..f0df597b75c7 100644
--- a/fs/btrfs/scrub.h
+++ b/fs/btrfs/scrub.h
@@ -3,6 +3,12 @@
#ifndef BTRFS_SCRUB_H
#define BTRFS_SCRUB_H
+#include <linux/types.h>
+
+struct btrfs_fs_info;
+struct btrfs_device;
+struct btrfs_scrub_progress;
+
int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
u64 end, struct btrfs_scrub_progress *progress,
int readonly, int is_dev_replace);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index e48a063ef085..50b4a76ac88e 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -25,7 +25,6 @@
#include "btrfs_inode.h"
#include "transaction.h"
#include "compression.h"
-#include "xattr.h"
#include "print-tree.h"
#include "accessors.h"
#include "dir-item.h"
@@ -777,7 +776,12 @@ static int begin_cmd(struct send_ctx *sctx, int cmd)
if (WARN_ON(!sctx->send_buf))
return -EINVAL;
- BUG_ON(sctx->send_size);
+ if (unlikely(sctx->send_size != 0)) {
+ btrfs_err(sctx->send_root->fs_info,
+ "send: command header buffer not empty cmd %d offset %llu",
+ cmd, sctx->send_off);
+ return -EINVAL;
+ }
sctx->send_size += sizeof(*hdr);
hdr = (struct btrfs_cmd_header *)sctx->send_buf;
@@ -1070,7 +1074,15 @@ static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
ret = PTR_ERR(start);
goto out;
}
- BUG_ON(start < p->buf);
+ if (unlikely(start < p->buf)) {
+ btrfs_err(root->fs_info,
+ "send: path ref buffer underflow for key (%llu %u %llu)",
+ found_key->objectid,
+ found_key->type,
+ found_key->offset);
+ ret = -EINVAL;
+ goto out;
+ }
}
p->start = start;
} else {
@@ -1406,7 +1418,7 @@ static bool lookup_backref_cache(u64 leaf_bytenr, void *ctx,
struct btrfs_lru_cache_entry *raw_entry;
struct backref_cache_entry *entry;
- if (btrfs_lru_cache_size(&sctx->backref_cache) == 0)
+ if (sctx->backref_cache.size == 0)
return false;
/*
@@ -1504,7 +1516,7 @@ static void store_backref_cache(u64 leaf_bytenr, const struct ulist *root_ids,
* transaction handle or holding fs_info->commit_root_sem, so no need
* to take any lock here.
*/
- if (btrfs_lru_cache_size(&sctx->backref_cache) == 1)
+ if (sctx->backref_cache.size == 1)
sctx->backref_cache_last_reloc_trans = fs_info->last_reloc_trans;
}
@@ -2809,8 +2821,7 @@ static int cache_dir_utimes(struct send_ctx *sctx, u64 dir, u64 gen)
static int trim_dir_utimes_cache(struct send_ctx *sctx)
{
- while (btrfs_lru_cache_size(&sctx->dir_utimes_cache) >
- SEND_MAX_DIR_UTIMES_CACHE_SIZE) {
+ while (sctx->dir_utimes_cache.size > SEND_MAX_DIR_UTIMES_CACHE_SIZE) {
struct btrfs_lru_cache_entry *lru;
int ret;
@@ -4182,7 +4193,13 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
* This should never happen as the root dir always has the same ref
* which is always '..'
*/
- BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID);
+ if (unlikely(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID)) {
+ btrfs_err(fs_info,
+ "send: unexpected inode %llu in process_recorded_refs()",
+ sctx->cur_ino);
+ ret = -EINVAL;
+ goto out;
+ }
valid_path = fs_path_alloc();
if (!valid_path) {
@@ -6140,7 +6157,7 @@ static int send_write_or_clone(struct send_ctx *sctx,
int ret = 0;
u64 offset = key->offset;
u64 end;
- u64 bs = sctx->send_root->fs_info->sb->s_blocksize;
+ u64 bs = sctx->send_root->fs_info->sectorsize;
end = min_t(u64, btrfs_file_extent_end(path), sctx->cur_inode_size);
if (offset >= end)
@@ -6458,21 +6475,18 @@ static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
if (sctx->cur_ino != key->objectid || !need_send_hole(sctx))
return 0;
- if (sctx->cur_inode_last_extent == (u64)-1) {
- ret = get_last_extent(sctx, key->offset - 1);
- if (ret)
- return ret;
- }
-
- if (path->slots[0] == 0 &&
- sctx->cur_inode_last_extent < key->offset) {
- /*
- * We might have skipped entire leafs that contained only
- * file extent items for our current inode. These leafs have
- * a generation number smaller (older) than the one in the
- * current leaf and the leaf our last extent came from, and
- * are located between these 2 leafs.
- */
+ /*
+ * Get last extent's end offset (exclusive) if we haven't determined it
+ * yet (we're processing the first file extent item that is new), or if
+ * we're at the first slot of a leaf and the last extent's end is less
+ * than the current extent's offset, because we might have skipped
+ * entire leaves that contained only file extent items for our current
+ * inode. These leaves have a generation number smaller (older) than the
+ * one in the current leaf and the leaf our last extent came from, and
+ * are located between these 2 leaves.
+ */
+ if ((sctx->cur_inode_last_extent == (u64)-1) ||
+ (path->slots[0] == 0 && sctx->cur_inode_last_extent < key->offset)) {
ret = get_last_extent(sctx, key->offset - 1);
if (ret)
return ret;
@@ -7429,8 +7443,8 @@ static int tree_move_down(struct btrfs_path *path, int *level, u64 reada_min_gen
u64 reada_done = 0;
lockdep_assert_held_read(&parent->fs_info->commit_root_sem);
+ ASSERT(*level != 0);
- BUG_ON(*level == 0);
eb = btrfs_read_node_slot(parent, slot);
if (IS_ERR(eb))
return PTR_ERR(eb);
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
index 4f5509cb1803..dd1c9f02b011 100644
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@@ -8,6 +8,11 @@
#define BTRFS_SEND_H
#include <linux/types.h>
+#include <linux/sizes.h>
+#include <linux/align.h>
+
+struct inode;
+struct btrfs_ioctl_send_args;
#define BTRFS_SEND_STREAM_MAGIC "btrfs-stream"
/* Conditional support for the upcoming protocol version. */
@@ -25,9 +30,6 @@
#define BTRFS_SEND_BUF_SIZE_V1 SZ_64K
#define BTRFS_SEND_BUF_SIZE_V2 ALIGN(SZ_16K + BTRFS_MAX_COMPRESSED, PAGE_SIZE)
-struct inode;
-struct btrfs_ioctl_send_args;
-
enum btrfs_tlv_type {
BTRFS_TLV_U8,
BTRFS_TLV_U16,
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 3b54eb583474..d620323d08ea 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -9,7 +9,6 @@
#include "ordered-data.h"
#include "transaction.h"
#include "block-group.h"
-#include "zoned.h"
#include "fs.h"
#include "accessors.h"
#include "extent-tree.h"
diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
index 92c595fed1b0..a733458fd13b 100644
--- a/fs/btrfs/space-info.h
+++ b/fs/btrfs/space-info.h
@@ -4,8 +4,17 @@
#define BTRFS_SPACE_INFO_H
#include <trace/events/btrfs.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/kobject.h>
+#include <linux/lockdep.h>
+#include <linux/wait.h>
+#include <linux/rwsem.h>
#include "volumes.h"
+struct btrfs_fs_info;
+struct btrfs_block_group;
+
/*
* Different levels for to flush space when doing space reservations.
*
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index 0e49dab8dad2..54736f6238e6 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -111,6 +111,9 @@ void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info, u32 sector
subpage_info->checked_offset = cur;
cur += nr_bits;
+ subpage_info->locked_offset = cur;
+ cur += nr_bits;
+
subpage_info->total_nr_bits = cur;
}
@@ -237,28 +240,58 @@ static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info,
start + len <= folio_pos(folio) + PAGE_SIZE);
}
+#define subpage_calc_start_bit(fs_info, folio, name, start, len) \
+({ \
+ unsigned int start_bit; \
+ \
+ btrfs_subpage_assert(fs_info, folio, start, len); \
+ start_bit = offset_in_page(start) >> fs_info->sectorsize_bits; \
+ start_bit += fs_info->subpage_info->name##_offset; \
+ start_bit; \
+})
+
void btrfs_subpage_start_reader(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len)
{
struct btrfs_subpage *subpage = folio_get_private(folio);
+ const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len);
const int nbits = len >> fs_info->sectorsize_bits;
+ unsigned long flags;
+
btrfs_subpage_assert(fs_info, folio, start, len);
+ spin_lock_irqsave(&subpage->lock, flags);
+ /*
+ * Even though it's just for reading the page, no one should have
+ * locked the subpage range.
+ */
+ ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits));
+ bitmap_set(subpage->bitmaps, start_bit, nbits);
atomic_add(nbits, &subpage->readers);
+ spin_unlock_irqrestore(&subpage->lock, flags);
}
void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len)
{
struct btrfs_subpage *subpage = folio_get_private(folio);
+ const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len);
const int nbits = len >> fs_info->sectorsize_bits;
+ unsigned long flags;
bool is_data;
bool last;
btrfs_subpage_assert(fs_info, folio, start, len);
is_data = is_data_inode(folio->mapping->host);
+
+ spin_lock_irqsave(&subpage->lock, flags);
+
+ /* The range should have already been locked. */
+ ASSERT(bitmap_test_range_all_set(subpage->bitmaps, start_bit, nbits));
ASSERT(atomic_read(&subpage->readers) >= nbits);
+
+ bitmap_clear(subpage->bitmaps, start_bit, nbits);
last = atomic_sub_and_test(nbits, &subpage->readers);
/*
@@ -270,6 +303,7 @@ void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info,
*/
if (is_data && last)
folio_unlock(folio);
+ spin_unlock_irqrestore(&subpage->lock, flags);
}
static void btrfs_subpage_clamp_range(struct folio *folio, u64 *start, u32 *len)
@@ -290,28 +324,38 @@ static void btrfs_subpage_clamp_range(struct folio *folio, u64 *start, u32 *len)
orig_start + orig_len) - *start;
}
-void btrfs_subpage_start_writer(const struct btrfs_fs_info *fs_info,
- struct folio *folio, u64 start, u32 len)
+static void btrfs_subpage_start_writer(const struct btrfs_fs_info *fs_info,
+ struct folio *folio, u64 start, u32 len)
{
struct btrfs_subpage *subpage = folio_get_private(folio);
+ const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len);
const int nbits = (len >> fs_info->sectorsize_bits);
+ unsigned long flags;
int ret;
btrfs_subpage_assert(fs_info, folio, start, len);
+ spin_lock_irqsave(&subpage->lock, flags);
ASSERT(atomic_read(&subpage->readers) == 0);
+ ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits));
+ bitmap_set(subpage->bitmaps, start_bit, nbits);
ret = atomic_add_return(nbits, &subpage->writers);
ASSERT(ret == nbits);
+ spin_unlock_irqrestore(&subpage->lock, flags);
}
-bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info,
- struct folio *folio, u64 start, u32 len)
+static bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info,
+ struct folio *folio, u64 start, u32 len)
{
struct btrfs_subpage *subpage = folio_get_private(folio);
+ const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len);
const int nbits = (len >> fs_info->sectorsize_bits);
+ unsigned long flags;
+ bool last;
btrfs_subpage_assert(fs_info, folio, start, len);
+ spin_lock_irqsave(&subpage->lock, flags);
/*
* We have call sites passing @lock_page into
* extent_clear_unlock_delalloc() for compression path.
@@ -319,11 +363,18 @@ bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info,
* This @locked_page is locked by plain lock_page(), thus its
* subpage::writers is 0. Handle them in a special way.
*/
- if (atomic_read(&subpage->writers) == 0)
+ if (atomic_read(&subpage->writers) == 0) {
+ spin_unlock_irqrestore(&subpage->lock, flags);
return true;
+ }
ASSERT(atomic_read(&subpage->writers) >= nbits);
- return atomic_sub_and_test(nbits, &subpage->writers);
+ /* The target range should have been locked. */
+ ASSERT(bitmap_test_range_all_set(subpage->bitmaps, start_bit, nbits));
+ bitmap_clear(subpage->bitmaps, start_bit, nbits);
+ last = atomic_sub_and_test(nbits, &subpage->writers);
+ spin_unlock_irqrestore(&subpage->lock, flags);
+ return last;
}
/*
@@ -365,16 +416,6 @@ void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info,
folio_unlock(folio);
}
-#define subpage_calc_start_bit(fs_info, folio, name, start, len) \
-({ \
- unsigned int start_bit; \
- \
- btrfs_subpage_assert(fs_info, folio, start, len); \
- start_bit = offset_in_page(start) >> fs_info->sectorsize_bits; \
- start_bit += fs_info->subpage_info->name##_offset; \
- start_bit; \
-})
-
#define subpage_test_bitmap_all_set(fs_info, subpage, name) \
bitmap_test_range_all_set(subpage->bitmaps, \
fs_info->subpage_info->name##_offset, \
@@ -751,6 +792,7 @@ void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info,
GET_SUBPAGE_BITMAP(subpage, subpage_info, writeback, &writeback_bitmap);
GET_SUBPAGE_BITMAP(subpage, subpage_info, ordered, &ordered_bitmap);
GET_SUBPAGE_BITMAP(subpage, subpage_info, checked, &checked_bitmap);
+ GET_SUBPAGE_BITMAP(subpage, subpage_info, locked, &checked_bitmap);
spin_unlock_irqrestore(&subpage->lock, flags);
dump_page(folio_page(folio, 0), "btrfs subpage dump");
diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h
index 793c2b314a58..b6dc013b0fdc 100644
--- a/fs/btrfs/subpage.h
+++ b/fs/btrfs/subpage.h
@@ -4,6 +4,11 @@
#define BTRFS_SUBPAGE_H
#include <linux/spinlock.h>
+#include <linux/atomic.h>
+
+struct address_space;
+struct folio;
+struct btrfs_fs_info;
/*
* Extra info for subpapge bitmap.
@@ -28,7 +33,7 @@ struct btrfs_subpage_info {
unsigned int total_nr_bits;
/*
- * *_start indicates where the bitmap starts, the length is always
+ * *_offset indicates where the bitmap starts, the length is always
* @bitmap_size, which is calculated from PAGE_SIZE / sectorsize.
*/
unsigned int uptodate_offset;
@@ -36,6 +41,16 @@ struct btrfs_subpage_info {
unsigned int writeback_offset;
unsigned int ordered_offset;
unsigned int checked_offset;
+
+ /*
+ * For locked bitmaps, normally it's subpage representation for folio
+ * Locked flag, but metadata is different:
+ *
+ * - Metadata doesn't really lock the folio
+ * It's just to prevent page::private get cleared before the last
+ * end_page_read().
+ */
+ unsigned int locked_offset;
};
/*
@@ -93,10 +108,6 @@ void btrfs_subpage_start_reader(const struct btrfs_fs_info *fs_info,
void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len);
-void btrfs_subpage_start_writer(const struct btrfs_fs_info *fs_info,
- struct folio *folio, u64 start, u32 len);
-bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info,
- struct folio *folio, u64 start, u32 len);
int btrfs_folio_start_writer_lock(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len);
void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 101f786963d4..7e44ccaf348f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -34,13 +34,11 @@
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
-#include "print-tree.h"
#include "props.h"
#include "xattr.h"
#include "bio.h"
#include "export.h"
#include "compression.h"
-#include "rcu-string.h"
#include "dev-replace.h"
#include "free-space-cache.h"
#include "backref.h"
@@ -1767,7 +1765,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_bavail = 0;
buf->f_type = BTRFS_SUPER_MAGIC;
- buf->f_bsize = dentry->d_sb->s_blocksize;
+ buf->f_bsize = fs_info->sectorsize;
buf->f_namelen = BTRFS_NAME_LEN;
/* We treat it as constant endianness (it doesn't matter _which_)
@@ -2203,7 +2201,9 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
vol = memdup_user((void __user *)arg, sizeof(*vol));
if (IS_ERR(vol))
return PTR_ERR(vol);
- vol->name[BTRFS_PATH_NAME_MAX] = '\0';
+ ret = btrfs_check_ioctl_vol_args_path(vol);
+ if (ret < 0)
+ goto out;
switch (cmd) {
case BTRFS_IOC_SCAN_DEV:
@@ -2245,6 +2245,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
break;
}
+out:
kfree(vol);
return ret;
}
diff --git a/fs/btrfs/super.h b/fs/btrfs/super.h
index f18253ca280d..cbcab434b5ec 100644
--- a/fs/btrfs/super.h
+++ b/fs/btrfs/super.h
@@ -3,6 +3,13 @@
#ifndef BTRFS_SUPER_H
#define BTRFS_SUPER_H
+#include <linux/types.h>
+#include <linux/fs.h>
+#include "fs.h"
+
+struct super_block;
+struct btrfs_fs_info;
+
bool btrfs_check_options(struct btrfs_fs_info *info, unsigned long *mount_opt,
unsigned long flags);
int btrfs_sync_fs(struct super_block *sb, int wait);
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 84c05246ffd8..c6387a8ddb94 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -421,7 +421,7 @@ BTRFS_ATTR(static_feature, supported_sectorsizes,
static ssize_t acl_show(struct kobject *kobj, struct kobj_attribute *a, char *buf)
{
- return sysfs_emit(buf, "%d\n", !!IS_ENABLED(CONFIG_BTRFS_FS_POSIX_ACL));
+ return sysfs_emit(buf, "%d\n", IS_ENABLED(CONFIG_BTRFS_FS_POSIX_ACL));
}
BTRFS_ATTR(static_feature, acl, acl_show);
@@ -1228,11 +1228,12 @@ static ssize_t btrfs_read_policy_show(struct kobject *kobj,
struct kobj_attribute *a, char *buf)
{
struct btrfs_fs_devices *fs_devices = to_fs_devs(kobj);
+ const enum btrfs_read_policy policy = READ_ONCE(fs_devices->read_policy);
ssize_t ret = 0;
int i;
for (i = 0; i < BTRFS_NR_READ_POLICY; i++) {
- if (fs_devices->read_policy == i)
+ if (policy == i)
ret += sysfs_emit_at(buf, ret, "%s[%s]",
(ret == 0 ? "" : " "),
btrfs_read_policy_name[i]);
@@ -1256,8 +1257,8 @@ static ssize_t btrfs_read_policy_store(struct kobject *kobj,
for (i = 0; i < BTRFS_NR_READ_POLICY; i++) {
if (sysfs_streq(buf, btrfs_read_policy_name[i])) {
- if (i != fs_devices->read_policy) {
- fs_devices->read_policy = i;
+ if (i != READ_ONCE(fs_devices->read_policy)) {
+ WRITE_ONCE(fs_devices->read_policy, i);
btrfs_info(fs_devices->fs_info,
"read policy set to '%s'",
btrfs_read_policy_name[i]);
@@ -1306,6 +1307,47 @@ static ssize_t btrfs_bg_reclaim_threshold_store(struct kobject *kobj,
BTRFS_ATTR_RW(, bg_reclaim_threshold, btrfs_bg_reclaim_threshold_show,
btrfs_bg_reclaim_threshold_store);
+#ifdef CONFIG_BTRFS_DEBUG
+static ssize_t btrfs_offload_csum_show(struct kobject *kobj,
+ struct kobj_attribute *a, char *buf)
+{
+ struct btrfs_fs_devices *fs_devices = to_fs_devs(kobj);
+
+ switch (READ_ONCE(fs_devices->offload_csum_mode)) {
+ case BTRFS_OFFLOAD_CSUM_AUTO:
+ return sysfs_emit(buf, "auto\n");
+ case BTRFS_OFFLOAD_CSUM_FORCE_ON:
+ return sysfs_emit(buf, "1\n");
+ case BTRFS_OFFLOAD_CSUM_FORCE_OFF:
+ return sysfs_emit(buf, "0\n");
+ default:
+ WARN_ON(1);
+ return -EINVAL;
+ }
+}
+
+static ssize_t btrfs_offload_csum_store(struct kobject *kobj,
+ struct kobj_attribute *a, const char *buf,
+ size_t len)
+{
+ struct btrfs_fs_devices *fs_devices = to_fs_devs(kobj);
+ int ret;
+ bool val;
+
+ ret = kstrtobool(buf, &val);
+ if (ret == 0)
+ WRITE_ONCE(fs_devices->offload_csum_mode,
+ val ? BTRFS_OFFLOAD_CSUM_FORCE_ON : BTRFS_OFFLOAD_CSUM_FORCE_OFF);
+ else if (ret == -EINVAL && sysfs_streq(buf, "auto"))
+ WRITE_ONCE(fs_devices->offload_csum_mode, BTRFS_OFFLOAD_CSUM_AUTO);
+ else
+ return -EINVAL;
+
+ return len;
+}
+BTRFS_ATTR_RW(, offload_csum, btrfs_offload_csum_show, btrfs_offload_csum_store);
+#endif
+
/*
* Per-filesystem information and stats.
*
@@ -1325,6 +1367,9 @@ static const struct attribute *btrfs_attrs[] = {
BTRFS_ATTR_PTR(, bg_reclaim_threshold),
BTRFS_ATTR_PTR(, commit_stats),
BTRFS_ATTR_PTR(, temp_fsid),
+#ifdef CONFIG_BTRFS_DEBUG
+ BTRFS_ATTR_PTR(, offload_csum),
+#endif
NULL,
};
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h
index 86c7eef12873..e6a284c59809 100644
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -3,8 +3,17 @@
#ifndef BTRFS_SYSFS_H
#define BTRFS_SYSFS_H
+#include <linux/types.h>
+#include <linux/compiler_types.h>
#include <linux/kobject.h>
+struct btrfs_fs_info;
+struct btrfs_device;
+struct btrfs_fs_devices;
+struct btrfs_block_group;
+struct btrfs_space_info;
+struct btrfs_qgroup;
+
enum btrfs_feature_set {
FEAT_COMPAT,
FEAT_COMPAT_RO,
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index 25b3349595e0..865d4af4b303 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -11,6 +11,7 @@
#include "btrfs-tests.h"
#include "../ctree.h"
#include "../extent_io.h"
+#include "../disk-io.h"
#include "../btrfs_inode.h"
#define PROCESS_UNLOCK (1 << 0)
@@ -105,9 +106,11 @@ static void dump_extent_io_tree(const struct extent_io_tree *tree)
}
}
-static int test_find_delalloc(u32 sectorsize)
+static int test_find_delalloc(u32 sectorsize, u32 nodesize)
{
- struct inode *inode;
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_root *root = NULL;
+ struct inode *inode = NULL;
struct extent_io_tree *tmp;
struct page *page;
struct page *locked_page = NULL;
@@ -121,12 +124,27 @@ static int test_find_delalloc(u32 sectorsize)
test_msg("running find delalloc tests");
+ fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
+ if (!fs_info) {
+ test_std_err(TEST_ALLOC_FS_INFO);
+ return -ENOMEM;
+ }
+
+ root = btrfs_alloc_dummy_root(fs_info);
+ if (IS_ERR(root)) {
+ test_std_err(TEST_ALLOC_ROOT);
+ ret = PTR_ERR(root);
+ goto out;
+ }
+
inode = btrfs_new_test_inode();
if (!inode) {
test_std_err(TEST_ALLOC_INODE);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
}
tmp = &BTRFS_I(inode)->io_tree;
+ BTRFS_I(inode)->root = root;
/*
* Passing NULL as we don't have fs_info but tracepoints are not used
@@ -316,6 +334,8 @@ out:
process_page_range(inode, 0, total_dirty - 1,
PROCESS_UNLOCK | PROCESS_RELEASE);
iput(inode);
+ btrfs_free_dummy_root(root);
+ btrfs_free_dummy_fs_info(fs_info);
return ret;
}
@@ -794,7 +814,7 @@ int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
test_msg("running extent I/O tests");
- ret = test_find_delalloc(sectorsize);
+ ret = test_find_delalloc(sectorsize, nodesize);
if (ret)
goto out;
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 9957de9f7806..99da9d34b77a 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -258,7 +258,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
/* First with no extents */
BTRFS_I(inode)->root = root;
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, sectorsize);
if (IS_ERR(em)) {
em = NULL;
test_err("got an error when we shouldn't have");
@@ -278,7 +278,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
*/
setup_file_extents(root, sectorsize);
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, (u64)-1);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, (u64)-1);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -316,7 +316,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -339,7 +339,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
free_extent_map(em);
/* Regular extent */
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -367,7 +367,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
free_extent_map(em);
/* The next 3 are split extents */
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -396,7 +396,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -418,7 +418,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -452,7 +452,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
free_extent_map(em);
/* Prealloc extent */
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -481,7 +481,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
free_extent_map(em);
/* The next 3 are a half written prealloc extent */
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -511,7 +511,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -544,7 +544,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -579,7 +579,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
free_extent_map(em);
/* Now for the compressed extent */
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -613,7 +613,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
free_extent_map(em);
/* Split compressed extent */
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -648,7 +648,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -675,7 +675,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -710,7 +710,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
free_extent_map(em);
/* A hole between regular extents but no hole extent */
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset + 6, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset + 6, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -737,7 +737,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, SZ_4M);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, SZ_4M);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -770,7 +770,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, offset, sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -850,7 +850,7 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
insert_inode_item_key(root);
insert_extent(root, sectorsize, sectorsize, sectorsize, 0, sectorsize,
sectorsize, BTRFS_FILE_EXTENT_REG, 0, 1);
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, 2 * sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 2 * sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
@@ -872,7 +872,7 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
}
free_extent_map(em);
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, sectorsize, 2 * sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, sectorsize, 2 * sectorsize);
if (IS_ERR(em)) {
test_err("got an error when we shouldn't have");
goto out;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index bf8e64c766b6..46e8426adf4f 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -23,12 +23,10 @@
#include "qgroup.h"
#include "block-group.h"
#include "space-info.h"
-#include "zoned.h"
#include "fs.h"
#include "accessors.h"
#include "extent-tree.h"
#include "root-tree.h"
-#include "defrag.h"
#include "dir-item.h"
#include "uuid-tree.h"
#include "ioctl.h"
@@ -1959,19 +1957,6 @@ static void update_super_roots(struct btrfs_fs_info *fs_info)
super->uuid_tree_generation = root_item->generation;
}
-int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
-{
- struct btrfs_transaction *trans;
- int ret = 0;
-
- spin_lock(&info->trans_lock);
- trans = info->running_transaction;
- if (trans)
- ret = (trans->state >= TRANS_STATE_COMMIT_START);
- spin_unlock(&info->trans_lock);
- return ret;
-}
-
int btrfs_transaction_blocked(struct btrfs_fs_info *info)
{
struct btrfs_transaction *trans;
@@ -2686,9 +2671,7 @@ void __cold __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
int __init btrfs_transaction_init(void)
{
- btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle",
- sizeof(struct btrfs_trans_handle), 0,
- SLAB_TEMPORARY | SLAB_MEM_SPREAD, NULL);
+ btrfs_trans_handle_cachep = KMEM_CACHE(btrfs_trans_handle, SLAB_TEMPORARY);
if (!btrfs_trans_handle_cachep)
return -ENOMEM;
return 0;
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 2bf8bbdfd0b3..4e451ab173b1 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -6,12 +6,27 @@
#ifndef BTRFS_TRANSACTION_H
#define BTRFS_TRANSACTION_H
+#include <linux/atomic.h>
#include <linux/refcount.h>
+#include <linux/list.h>
+#include <linux/time64.h>
+#include <linux/mutex.h>
+#include <linux/wait.h>
#include "btrfs_inode.h"
#include "delayed-ref.h"
-#include "ctree.h"
+#include "extent-io-tree.h"
+#include "block-rsv.h"
+#include "messages.h"
#include "misc.h"
+struct dentry;
+struct inode;
+struct btrfs_pending_snapshot;
+struct btrfs_fs_info;
+struct btrfs_root_item;
+struct btrfs_root;
+struct btrfs_path;
+
/* Radix-tree tag for roots that are part of the trasaction. */
#define BTRFS_ROOT_TRANS_TAG 0
@@ -262,7 +277,6 @@ int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info,
struct extent_io_tree *dirty_pages, int mark);
int btrfs_wait_tree_log_extents(struct btrfs_root *root, int mark);
int btrfs_transaction_blocked(struct btrfs_fs_info *info);
-int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
void btrfs_put_transaction(struct btrfs_transaction *transaction);
void btrfs_add_dropped_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 6eccf8496486..c8fbcae4e88e 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -21,7 +21,6 @@
#include "messages.h"
#include "ctree.h"
#include "tree-checker.h"
-#include "disk-io.h"
#include "compression.h"
#include "volumes.h"
#include "misc.h"
@@ -30,7 +29,6 @@
#include "file-item.h"
#include "inode-item.h"
#include "dir-item.h"
-#include "raid-stripe-tree.h"
#include "extent-tree.h"
/*
@@ -67,6 +65,7 @@ static void generic_err(const struct extent_buffer *eb, int slot,
vaf.fmt = fmt;
vaf.va = &args;
+ dump_page(folio_page(eb->folios[0], 0), "eb page dump");
btrfs_crit(fs_info,
"corrupt %s: root=%llu block=%llu slot=%d, %pV",
btrfs_header_level(eb) == 0 ? "leaf" : "node",
@@ -94,6 +93,7 @@ static void file_extent_err(const struct extent_buffer *eb, int slot,
vaf.fmt = fmt;
vaf.va = &args;
+ dump_page(folio_page(eb->folios[0], 0), "eb page dump");
btrfs_crit(fs_info,
"corrupt %s: root=%llu block=%llu slot=%d ino=%llu file_offset=%llu, %pV",
btrfs_header_level(eb) == 0 ? "leaf" : "node",
@@ -154,6 +154,7 @@ static void dir_item_err(const struct extent_buffer *eb, int slot,
vaf.fmt = fmt;
vaf.va = &args;
+ dump_page(folio_page(eb->folios[0], 0), "eb page dump");
btrfs_crit(fs_info,
"corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV",
btrfs_header_level(eb) == 0 ? "leaf" : "node",
@@ -649,6 +650,7 @@ static void block_group_err(const struct extent_buffer *eb, int slot,
vaf.fmt = fmt;
vaf.va = &args;
+ dump_page(folio_page(eb->folios[0], 0), "eb page dump");
btrfs_crit(fs_info,
"corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV",
btrfs_header_level(eb) == 0 ? "leaf" : "node",
@@ -1005,6 +1007,7 @@ static void dev_item_err(const struct extent_buffer *eb, int slot,
vaf.fmt = fmt;
vaf.va = &args;
+ dump_page(folio_page(eb->folios[0], 0), "eb page dump");
btrfs_crit(eb->fs_info,
"corrupt %s: root=%llu block=%llu slot=%d devid=%llu %pV",
btrfs_header_level(eb) == 0 ? "leaf" : "node",
@@ -1260,6 +1263,7 @@ static void extent_err(const struct extent_buffer *eb, int slot,
vaf.fmt = fmt;
vaf.va = &args;
+ dump_page(folio_page(eb->folios[0], 0), "eb page dump");
btrfs_crit(eb->fs_info,
"corrupt %s: block=%llu slot=%d extent bytenr=%llu len=%llu %pV",
btrfs_header_level(eb) == 0 ? "leaf" : "node",
diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h
index 14b9fbe82da4..5c809b50b2d0 100644
--- a/fs/btrfs/tree-checker.h
+++ b/fs/btrfs/tree-checker.h
@@ -6,10 +6,12 @@
#ifndef BTRFS_TREE_CHECKER_H
#define BTRFS_TREE_CHECKER_H
+#include <linux/types.h>
#include <uapi/linux/btrfs_tree.h>
struct extent_buffer;
struct btrfs_chunk;
+struct btrfs_key;
/* All the extra info needed to verify the parentness of a tree block. */
struct btrfs_tree_parent_check {
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 331fc7429952..472918a5bc73 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -13,13 +13,11 @@
#include "tree-log.h"
#include "disk-io.h"
#include "locking.h"
-#include "print-tree.h"
#include "backref.h"
#include "compression.h"
#include "qgroup.h"
#include "block-group.h"
#include "space-info.h"
-#include "zoned.h"
#include "inode-item.h"
#include "fs.h"
#include "accessors.h"
@@ -2820,6 +2818,52 @@ static void wait_for_writer(struct btrfs_root *root)
finish_wait(&root->log_writer_wait, &wait);
}
+void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, struct inode *inode)
+{
+ ctx->log_ret = 0;
+ ctx->log_transid = 0;
+ ctx->log_new_dentries = false;
+ ctx->logging_new_name = false;
+ ctx->logging_new_delayed_dentries = false;
+ ctx->logged_before = false;
+ ctx->inode = inode;
+ INIT_LIST_HEAD(&ctx->list);
+ INIT_LIST_HEAD(&ctx->ordered_extents);
+ INIT_LIST_HEAD(&ctx->conflict_inodes);
+ ctx->num_conflict_inodes = 0;
+ ctx->logging_conflict_inodes = false;
+ ctx->scratch_eb = NULL;
+}
+
+void btrfs_init_log_ctx_scratch_eb(struct btrfs_log_ctx *ctx)
+{
+ struct btrfs_inode *inode = BTRFS_I(ctx->inode);
+
+ if (!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) &&
+ !test_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags))
+ return;
+
+ /*
+ * Don't care about allocation failure. This is just for optimization,
+ * if we fail to allocate here, we will try again later if needed.
+ */
+ ctx->scratch_eb = alloc_dummy_extent_buffer(inode->root->fs_info, 0);
+}
+
+void btrfs_release_log_ctx_extents(struct btrfs_log_ctx *ctx)
+{
+ struct btrfs_ordered_extent *ordered;
+ struct btrfs_ordered_extent *tmp;
+
+ ASSERT(inode_is_locked(ctx->inode));
+
+ list_for_each_entry_safe(ordered, tmp, &ctx->ordered_extents, log_list) {
+ list_del_init(&ordered->log_list);
+ btrfs_put_ordered_extent(ordered);
+ }
+}
+
+
static inline void btrfs_remove_log_ctx(struct btrfs_root *root,
struct btrfs_log_ctx *ctx)
{
@@ -3619,6 +3663,30 @@ out:
return ret;
}
+static int clone_leaf(struct btrfs_path *path, struct btrfs_log_ctx *ctx)
+{
+ const int slot = path->slots[0];
+
+ if (ctx->scratch_eb) {
+ copy_extent_buffer_full(ctx->scratch_eb, path->nodes[0]);
+ } else {
+ ctx->scratch_eb = btrfs_clone_extent_buffer(path->nodes[0]);
+ if (!ctx->scratch_eb)
+ return -ENOMEM;
+ }
+
+ btrfs_release_path(path);
+ path->nodes[0] = ctx->scratch_eb;
+ path->slots[0] = slot;
+ /*
+ * Add extra ref to scratch eb so that it is not freed when callers
+ * release the path, so we can reuse it later if needed.
+ */
+ atomic_inc(&ctx->scratch_eb->refs);
+
+ return 0;
+}
+
static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode,
struct btrfs_path *path,
@@ -3633,23 +3701,20 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
bool last_found = false;
int batch_start = 0;
int batch_size = 0;
- int i;
+ int ret;
/*
* We need to clone the leaf, release the read lock on it, and use the
* clone before modifying the log tree. See the comment at copy_items()
* about why we need to do this.
*/
- src = btrfs_clone_extent_buffer(path->nodes[0]);
- if (!src)
- return -ENOMEM;
+ ret = clone_leaf(path, ctx);
+ if (ret < 0)
+ return ret;
- i = path->slots[0];
- btrfs_release_path(path);
- path->nodes[0] = src;
- path->slots[0] = i;
+ src = path->nodes[0];
- for (; i < nritems; i++) {
+ for (int i = path->slots[0]; i < nritems; i++) {
struct btrfs_dir_item *di;
struct btrfs_key key;
int ret;
@@ -4259,17 +4324,16 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
struct btrfs_path *dst_path,
struct btrfs_path *src_path,
int start_slot, int nr, int inode_only,
- u64 logged_isize)
+ u64 logged_isize, struct btrfs_log_ctx *ctx)
{
struct btrfs_root *log = inode->root->log_root;
struct btrfs_file_extent_item *extent;
struct extent_buffer *src;
- int ret = 0;
+ int ret;
struct btrfs_key *ins_keys;
u32 *ins_sizes;
struct btrfs_item_batch batch;
char *ins_data;
- int i;
int dst_index;
const bool skip_csum = (inode->flags & BTRFS_INODE_NODATASUM);
const u64 i_size = i_size_read(&inode->vfs_inode);
@@ -4302,14 +4366,11 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
* while the other is holding the delayed node's mutex and wants to
* write lock the same subvolume leaf for flushing delayed items.
*/
- src = btrfs_clone_extent_buffer(src_path->nodes[0]);
- if (!src)
- return -ENOMEM;
+ ret = clone_leaf(src_path, ctx);
+ if (ret < 0)
+ return ret;
- i = src_path->slots[0];
- btrfs_release_path(src_path);
- src_path->nodes[0] = src;
- src_path->slots[0] = i;
+ src = src_path->nodes[0];
ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
nr * sizeof(u32), GFP_NOFS);
@@ -4324,7 +4385,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
batch.nr = 0;
dst_index = 0;
- for (i = 0; i < nr; i++) {
+ for (int i = 0; i < nr; i++) {
const int src_slot = start_slot + i;
struct btrfs_root *csum_root;
struct btrfs_ordered_sum *sums;
@@ -4431,7 +4492,7 @@ add_to_batch:
goto out;
dst_index = 0;
- for (i = 0; i < nr; i++) {
+ for (int i = 0; i < nr; i++) {
const int src_slot = start_slot + i;
const int dst_slot = dst_path->slots[0] + dst_index;
struct btrfs_key key;
@@ -4704,7 +4765,8 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
*/
static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode,
- struct btrfs_path *path)
+ struct btrfs_path *path,
+ struct btrfs_log_ctx *ctx)
{
struct btrfs_root *root = inode->root;
struct btrfs_key key;
@@ -4770,7 +4832,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
if (slot >= btrfs_header_nritems(leaf)) {
if (ins_nr > 0) {
ret = copy_items(trans, inode, dst_path, path,
- start_slot, ins_nr, 1, 0);
+ start_slot, ins_nr, 1, 0, ctx);
if (ret < 0)
goto out;
ins_nr = 0;
@@ -4820,7 +4882,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
}
if (ins_nr > 0)
ret = copy_items(trans, inode, dst_path, path,
- start_slot, ins_nr, 1, 0);
+ start_slot, ins_nr, 1, 0, ctx);
out:
btrfs_release_path(path);
btrfs_free_path(dst_path);
@@ -4899,7 +4961,7 @@ process:
write_unlock(&tree->lock);
if (!ret)
- ret = btrfs_log_prealloc_extents(trans, inode, path);
+ ret = btrfs_log_prealloc_extents(trans, inode, path, ctx);
if (ret)
return ret;
@@ -4980,7 +5042,8 @@ static int logged_inode_size(struct btrfs_root *log, struct btrfs_inode *inode,
static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode,
struct btrfs_path *path,
- struct btrfs_path *dst_path)
+ struct btrfs_path *dst_path,
+ struct btrfs_log_ctx *ctx)
{
struct btrfs_root *root = inode->root;
int ret;
@@ -5009,7 +5072,7 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
if (slot >= nritems) {
if (ins_nr > 0) {
ret = copy_items(trans, inode, dst_path, path,
- start_slot, ins_nr, 1, 0);
+ start_slot, ins_nr, 1, 0, ctx);
if (ret < 0)
return ret;
ins_nr = 0;
@@ -5035,7 +5098,7 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
}
if (ins_nr > 0) {
ret = copy_items(trans, inode, dst_path, path,
- start_slot, ins_nr, 1, 0);
+ start_slot, ins_nr, 1, 0, ctx);
if (ret < 0)
return ret;
}
@@ -5847,7 +5910,7 @@ again:
}
ret = copy_items(trans, inode, dst_path, path,
ins_start_slot, ins_nr,
- inode_only, logged_isize);
+ inode_only, logged_isize, ctx);
if (ret < 0)
return ret;
ins_nr = 0;
@@ -5866,7 +5929,7 @@ again:
goto next_slot;
ret = copy_items(trans, inode, dst_path, path,
ins_start_slot,
- ins_nr, inode_only, logged_isize);
+ ins_nr, inode_only, logged_isize, ctx);
if (ret < 0)
return ret;
ins_nr = 0;
@@ -5883,7 +5946,7 @@ again:
}
ret = copy_items(trans, inode, dst_path, path, ins_start_slot,
- ins_nr, inode_only, logged_isize);
+ ins_nr, inode_only, logged_isize, ctx);
if (ret < 0)
return ret;
ins_nr = 1;
@@ -5898,7 +5961,7 @@ next_slot:
if (ins_nr) {
ret = copy_items(trans, inode, dst_path, path,
ins_start_slot, ins_nr, inode_only,
- logged_isize);
+ logged_isize, ctx);
if (ret < 0)
return ret;
ins_nr = 0;
@@ -5923,7 +5986,7 @@ next_key:
}
if (ins_nr) {
ret = copy_items(trans, inode, dst_path, path, ins_start_slot,
- ins_nr, inode_only, logged_isize);
+ ins_nr, inode_only, logged_isize, ctx);
if (ret)
return ret;
}
@@ -5934,7 +5997,7 @@ next_key:
* lock the same leaf with btrfs_log_prealloc_extents() below.
*/
btrfs_release_path(path);
- ret = btrfs_log_prealloc_extents(trans, inode, dst_path);
+ ret = btrfs_log_prealloc_extents(trans, inode, dst_path, ctx);
}
return ret;
@@ -6526,7 +6589,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
btrfs_release_path(dst_path);
- ret = btrfs_log_all_xattrs(trans, inode, path, dst_path);
+ ret = btrfs_log_all_xattrs(trans, inode, path, dst_path, ctx);
if (ret)
goto out_unlock;
xattrs_logged = true;
@@ -6553,7 +6616,7 @@ log_extents:
* BTRFS_INODE_COPY_EVERYTHING set.
*/
if (!xattrs_logged && inode->logged_trans < trans->transid) {
- ret = btrfs_log_all_xattrs(trans, inode, path, dst_path);
+ ret = btrfs_log_all_xattrs(trans, inode, path, dst_path, ctx);
if (ret)
goto out_unlock;
btrfs_release_path(path);
@@ -7502,6 +7565,7 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
btrfs_init_log_ctx(&ctx, &inode->vfs_inode);
ctx.logging_new_name = true;
+ btrfs_init_log_ctx_scratch_eb(&ctx);
/*
* We don't care about the return value. If we fail to log the new name
* then we know the next attempt to sync the log will fallback to a full
@@ -7510,6 +7574,7 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
* inconsistent state after a rename operation.
*/
btrfs_log_inode_parent(trans, inode, parent, LOG_INODE_EXISTS, &ctx);
+ free_extent_buffer(ctx.scratch_eb);
ASSERT(list_empty(&ctx.conflict_inodes));
out:
/*
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index a550a8a375cd..22e9cbc81577 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -6,10 +6,18 @@
#ifndef BTRFS_TREE_LOG_H
#define BTRFS_TREE_LOG_H
+#include <linux/list.h>
+#include <linux/fs.h>
#include "messages.h"
#include "ctree.h"
#include "transaction.h"
+struct inode;
+struct dentry;
+struct btrfs_ordered_extent;
+struct btrfs_root;
+struct btrfs_trans_handle;
+
/* return value for btrfs_log_dentry_safe that means we don't need to log it at all */
#define BTRFS_NO_LOG_SYNC 256
@@ -36,37 +44,20 @@ struct btrfs_log_ctx {
struct list_head conflict_inodes;
int num_conflict_inodes;
bool logging_conflict_inodes;
+ /*
+ * Used for fsyncs that need to copy items from the subvolume tree to
+ * the log tree (full sync flag set or copy everything flag set) to
+ * avoid allocating a temporary extent buffer while holding a lock on
+ * an extent buffer of the subvolume tree and under the log transaction.
+ * Also helps to avoid allocating and freeing a temporary extent buffer
+ * in case we need to process multiple leaves from the subvolume tree.
+ */
+ struct extent_buffer *scratch_eb;
};
-static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx,
- struct inode *inode)
-{
- ctx->log_ret = 0;
- ctx->log_transid = 0;
- ctx->log_new_dentries = false;
- ctx->logging_new_name = false;
- ctx->logging_new_delayed_dentries = false;
- ctx->logged_before = false;
- ctx->inode = inode;
- INIT_LIST_HEAD(&ctx->list);
- INIT_LIST_HEAD(&ctx->ordered_extents);
- INIT_LIST_HEAD(&ctx->conflict_inodes);
- ctx->num_conflict_inodes = 0;
- ctx->logging_conflict_inodes = false;
-}
-
-static inline void btrfs_release_log_ctx_extents(struct btrfs_log_ctx *ctx)
-{
- struct btrfs_ordered_extent *ordered;
- struct btrfs_ordered_extent *tmp;
-
- ASSERT(inode_is_locked(ctx->inode));
-
- list_for_each_entry_safe(ordered, tmp, &ctx->ordered_extents, log_list) {
- list_del_init(&ordered->log_list);
- btrfs_put_ordered_extent(ordered);
- }
-}
+void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, struct inode *inode);
+void btrfs_init_log_ctx_scratch_eb(struct btrfs_log_ctx *ctx);
+void btrfs_release_log_ctx_extents(struct btrfs_log_ctx *ctx);
static inline void btrfs_set_log_full_commit(struct btrfs_trans_handle *trans)
{
diff --git a/fs/btrfs/tree-mod-log.c b/fs/btrfs/tree-mod-log.c
index 3df6153d5d5a..43b3accbed7a 100644
--- a/fs/btrfs/tree-mod-log.c
+++ b/fs/btrfs/tree-mod-log.c
@@ -44,7 +44,7 @@ struct tree_mod_elem {
/*
* Pull a new tree mod seq number for our operation.
*/
-static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
+static u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
{
return atomic64_inc_return(&fs_info->tree_mod_seq);
}
@@ -170,8 +170,7 @@ static noinline int tree_mod_log_insert(struct btrfs_fs_info *fs_info,
* this until all tree mod log insertions are recorded in the rb tree and then
* write unlock fs_info::tree_mod_log_lock.
*/
-static inline bool tree_mod_dont_log(struct btrfs_fs_info *fs_info,
- struct extent_buffer *eb)
+static bool tree_mod_dont_log(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
{
if (!test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags))
return true;
@@ -188,7 +187,7 @@ static inline bool tree_mod_dont_log(struct btrfs_fs_info *fs_info,
}
/* Similar to tree_mod_dont_log, but doesn't acquire any locks. */
-static inline bool tree_mod_need_log(const struct btrfs_fs_info *fs_info,
+static bool tree_mod_need_log(const struct btrfs_fs_info *fs_info,
struct extent_buffer *eb)
{
if (!test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags))
@@ -367,9 +366,9 @@ free_tms:
return ret;
}
-static inline int tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
- struct tree_mod_elem **tm_list,
- int nritems)
+static int tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
+ struct tree_mod_elem **tm_list,
+ int nritems)
{
int i, j;
int ret;
diff --git a/fs/btrfs/tree-mod-log.h b/fs/btrfs/tree-mod-log.h
index 94f10afeee97..ff00c8e8a393 100644
--- a/fs/btrfs/tree-mod-log.h
+++ b/fs/btrfs/tree-mod-log.h
@@ -3,7 +3,13 @@
#ifndef BTRFS_TREE_MOD_LOG_H
#define BTRFS_TREE_MOD_LOG_H
-#include "ctree.h"
+#include <linux/list.h>
+
+struct extent_buffer;
+struct btrfs_fs_info;
+struct btrfs_path;
+struct btrfs_root;
+struct btrfs_seq_list;
/* Represents a tree mod log user. */
struct btrfs_seq_list {
diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c
index b4ac2b0cd235..183863f4bfa4 100644
--- a/fs/btrfs/ulist.c
+++ b/fs/btrfs/ulist.c
@@ -7,7 +7,6 @@
#include <linux/slab.h>
#include "messages.h"
#include "ulist.h"
-#include "ctree.h"
/*
* ulist is a generic data structure to hold a collection of unique u64
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h
index b2cef187ea8e..8e200fe1a2dd 100644
--- a/fs/btrfs/ulist.h
+++ b/fs/btrfs/ulist.h
@@ -7,6 +7,7 @@
#ifndef BTRFS_ULIST_H
#define BTRFS_ULIST_H
+#include <linux/types.h>
#include <linux/list.h>
#include <linux/rbtree.h>
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c
index 5be74f9e47eb..b0aff297d67d 100644
--- a/fs/btrfs/uuid-tree.c
+++ b/fs/btrfs/uuid-tree.c
@@ -9,7 +9,6 @@
#include "ctree.h"
#include "transaction.h"
#include "disk-io.h"
-#include "print-tree.h"
#include "fs.h"
#include "accessors.h"
#include "uuid-tree.h"
@@ -114,7 +113,7 @@ int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
ret = btrfs_insert_empty_item(trans, uuid_root, path, &key,
sizeof(subid_le));
- if (ret >= 0) {
+ if (ret == 0) {
/* Add an item for the type for the first time */
eb = path->nodes[0];
slot = path->slots[0];
diff --git a/fs/btrfs/uuid-tree.h b/fs/btrfs/uuid-tree.h
index 5350c87fe2ca..080ede0227ae 100644
--- a/fs/btrfs/uuid-tree.h
+++ b/fs/btrfs/uuid-tree.h
@@ -3,6 +3,11 @@
#ifndef BTRFS_UUID_TREE_H
#define BTRFS_UUID_TREE_H
+#include <linux/types.h>
+
+struct btrfs_trans_handle;
+struct btrfs_fs_info;
+
int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
u64 subid);
int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
diff --git a/fs/btrfs/verity.c b/fs/btrfs/verity.c
index 66e2270b0dae..4042dd6437ae 100644
--- a/fs/btrfs/verity.c
+++ b/fs/btrfs/verity.c
@@ -14,7 +14,6 @@
#include "ctree.h"
#include "btrfs_inode.h"
#include "transaction.h"
-#include "disk-io.h"
#include "locking.h"
#include "fs.h"
#include "accessors.h"
diff --git a/fs/btrfs/verity.h b/fs/btrfs/verity.h
index 91c10f7d0a46..d696659e43e4 100644
--- a/fs/btrfs/verity.h
+++ b/fs/btrfs/verity.h
@@ -3,8 +3,13 @@
#ifndef BTRFS_VERITY_H
#define BTRFS_VERITY_H
+struct inode;
+struct btrfs_inode;
+
#ifdef CONFIG_FS_VERITY
+#include <linux/fsverity.h>
+
extern const struct fsverity_operations btrfs_verityops;
int btrfs_drop_verity_items(struct btrfs_inode *inode);
@@ -12,6 +17,8 @@ int btrfs_get_verity_descriptor(struct inode *inode, void *buf, size_t buf_size)
#else
+#include <linux/errno.h>
+
static inline int btrfs_drop_verity_items(struct btrfs_inode *inode)
{
return 0;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e180da4cc227..a2d07fa3cfdf 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -14,10 +14,8 @@
#include <linux/namei.h>
#include "misc.h"
#include "ctree.h"
-#include "extent_map.h"
#include "disk-io.h"
#include "transaction.h"
-#include "print-tree.h"
#include "volumes.h"
#include "raid56.h"
#include "rcu-string.h"
@@ -769,8 +767,9 @@ static noinline struct btrfs_device *device_list_add(const char *path,
if (same_fsid_diff_dev) {
generate_random_uuid(fs_devices->fsid);
fs_devices->temp_fsid = true;
- pr_info("BTRFS: device %s using temp-fsid %pU\n",
- path, fs_devices->fsid);
+ pr_info("BTRFS: device %s (%d:%d) using temp-fsid %pU\n",
+ path, MAJOR(path_devt), MINOR(path_devt),
+ fs_devices->fsid);
}
mutex_lock(&fs_devices->device_list_mutex);
@@ -799,8 +798,9 @@ static noinline struct btrfs_device *device_list_add(const char *path,
if (fs_devices->opened) {
btrfs_err(NULL,
-"device %s belongs to fsid %pU, and the fs is already mounted, scanned by %s (%d)",
- path, fs_devices->fsid, current->comm,
+"device %s (%d:%d) belongs to fsid %pU, and the fs is already mounted, scanned by %s (%d)",
+ path, MAJOR(path_devt), MINOR(path_devt),
+ fs_devices->fsid, current->comm,
task_pid_nr(current));
mutex_unlock(&fs_devices->device_list_mutex);
return ERR_PTR(-EBUSY);
@@ -826,13 +826,15 @@ static noinline struct btrfs_device *device_list_add(const char *path,
if (disk_super->label[0])
pr_info(
- "BTRFS: device label %s devid %llu transid %llu %s scanned by %s (%d)\n",
+"BTRFS: device label %s devid %llu transid %llu %s (%d:%d) scanned by %s (%d)\n",
disk_super->label, devid, found_transid, path,
+ MAJOR(path_devt), MINOR(path_devt),
current->comm, task_pid_nr(current));
else
pr_info(
- "BTRFS: device fsid %pU devid %llu transid %llu %s scanned by %s (%d)\n",
+"BTRFS: device fsid %pU devid %llu transid %llu %s (%d:%d) scanned by %s (%d)\n",
disk_super->fsid, devid, found_transid, path,
+ MAJOR(path_devt), MINOR(path_devt),
current->comm, task_pid_nr(current));
} else if (!device->name || strcmp(device->name->str, path)) {
@@ -1368,7 +1370,8 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
else
btrfs_free_stale_devices(devt, NULL);
- pr_debug("BTRFS: skip registering single non-seed device %s\n", path);
+ pr_debug("BTRFS: skip registering single non-seed device %s (%d:%d)\n",
+ path, MAJOR(devt), MINOR(devt));
device = NULL;
goto free_disk_super;
}
@@ -1403,7 +1406,7 @@ static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
if (in_range(physical_start, *start, len) ||
in_range(*start, physical_start,
- physical_end - physical_start)) {
+ physical_end + 1 - physical_start)) {
*start = physical_end + 1;
return true;
}
@@ -2032,11 +2035,10 @@ static void btrfs_scratch_superblock(struct btrfs_fs_info *fs_info,
copy_num, ret);
}
-void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
- struct block_device *bdev,
- const char *device_path)
+void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, struct btrfs_device *device)
{
int copy_num;
+ struct block_device *bdev = device->bdev;
if (!bdev)
return;
@@ -2052,7 +2054,7 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
/* Update ctime/mtime for device path for libblkid */
- update_dev_time(device_path);
+ update_dev_time(device->name->str);
}
int btrfs_rm_device(struct btrfs_fs_info *fs_info,
@@ -2187,8 +2189,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
* just flush the device and let the caller do the final bdev_release.
*/
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
- btrfs_scratch_superblocks(fs_info, device->bdev,
- device->name->str);
+ btrfs_scratch_superblocks(fs_info, device);
if (device->bdev) {
sync_blockdev(device->bdev);
invalidate_bdev(device->bdev);
@@ -2301,8 +2302,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev)
mutex_unlock(&fs_devices->device_list_mutex);
- btrfs_scratch_superblocks(tgtdev->fs_info, tgtdev->bdev,
- tgtdev->name->str);
+ btrfs_scratch_superblocks(tgtdev->fs_info, tgtdev);
btrfs_close_bdev(tgtdev);
synchronize_rcu();
@@ -3393,7 +3393,17 @@ again:
mutex_unlock(&fs_info->reclaim_bgs_lock);
goto error;
}
- BUG_ON(ret == 0); /* Corruption */
+ if (ret == 0) {
+ /*
+ * On the first search we would find chunk tree with
+ * offset -1, which is not possible. On subsequent
+ * loops this would find an existing item on an invalid
+ * offset (one less than the previous one, wrong
+ * alignment and size).
+ */
+ ret = -EUCLEAN;
+ goto error;
+ }
ret = btrfs_previous_item(chunk_root, path, key.objectid,
key.type);
@@ -3480,6 +3490,44 @@ static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
return 0;
}
+static void btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu,
+ const struct btrfs_disk_balance_args *disk)
+{
+ memset(cpu, 0, sizeof(*cpu));
+
+ cpu->profiles = le64_to_cpu(disk->profiles);
+ cpu->usage = le64_to_cpu(disk->usage);
+ cpu->devid = le64_to_cpu(disk->devid);
+ cpu->pstart = le64_to_cpu(disk->pstart);
+ cpu->pend = le64_to_cpu(disk->pend);
+ cpu->vstart = le64_to_cpu(disk->vstart);
+ cpu->vend = le64_to_cpu(disk->vend);
+ cpu->target = le64_to_cpu(disk->target);
+ cpu->flags = le64_to_cpu(disk->flags);
+ cpu->limit = le64_to_cpu(disk->limit);
+ cpu->stripes_min = le32_to_cpu(disk->stripes_min);
+ cpu->stripes_max = le32_to_cpu(disk->stripes_max);
+}
+
+static void btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk,
+ const struct btrfs_balance_args *cpu)
+{
+ memset(disk, 0, sizeof(*disk));
+
+ disk->profiles = cpu_to_le64(cpu->profiles);
+ disk->usage = cpu_to_le64(cpu->usage);
+ disk->devid = cpu_to_le64(cpu->devid);
+ disk->pstart = cpu_to_le64(cpu->pstart);
+ disk->pend = cpu_to_le64(cpu->pend);
+ disk->vstart = cpu_to_le64(cpu->vstart);
+ disk->vend = cpu_to_le64(cpu->vend);
+ disk->target = cpu_to_le64(cpu->target);
+ disk->flags = cpu_to_le64(cpu->flags);
+ disk->limit = cpu_to_le64(cpu->limit);
+ disk->stripes_min = cpu_to_le32(cpu->stripes_min);
+ disk->stripes_max = cpu_to_le32(cpu->stripes_max);
+}
+
static int insert_balance_item(struct btrfs_fs_info *fs_info,
struct btrfs_balance_control *bctl)
{
@@ -3624,7 +3672,7 @@ static void reset_balance_state(struct btrfs_fs_info *fs_info)
struct btrfs_balance_control *bctl = fs_info->balance_ctl;
int ret;
- BUG_ON(!fs_info->balance_ctl);
+ ASSERT(fs_info->balance_ctl);
spin_lock(&fs_info->balance_lock);
fs_info->balance_ctl = NULL;
@@ -5944,6 +5992,7 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
struct btrfs_chunk_map *map, int first,
int dev_replace_is_ongoing)
{
+ const enum btrfs_read_policy policy = READ_ONCE(fs_info->fs_devices->read_policy);
int i;
int num_stripes;
int preferred_mirror;
@@ -5958,13 +6007,12 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
else
num_stripes = map->num_stripes;
- switch (fs_info->fs_devices->read_policy) {
+ switch (policy) {
default:
/* Shouldn't happen, just warn and use pid instead of failing */
- btrfs_warn_rl(fs_info,
- "unknown read_policy type %u, reset to pid",
- fs_info->fs_devices->read_policy);
- fs_info->fs_devices->read_policy = BTRFS_READ_POLICY_PID;
+ btrfs_warn_rl(fs_info, "unknown read_policy type %u, reset to pid",
+ policy);
+ WRITE_ONCE(fs_info->fs_devices->read_policy, BTRFS_READ_POLICY_PID);
fallthrough;
case BTRFS_READ_POLICY_PID:
preferred_mirror = first + (current->pid % num_stripes);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index a11854912d53..93854609a4d5 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -6,13 +6,28 @@
#ifndef BTRFS_VOLUMES_H
#define BTRFS_VOLUMES_H
+#include <linux/blk_types.h>
+#include <linux/sizes.h>
+#include <linux/atomic.h>
#include <linux/sort.h>
-#include <linux/btrfs.h>
-#include "async-thread.h"
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/log2.h>
+#include <linux/kobject.h>
+#include <linux/refcount.h>
+#include <linux/completion.h>
+#include <linux/rbtree.h>
+#include <uapi/linux/btrfs.h>
#include "messages.h"
-#include "tree-checker.h"
#include "rcu-string.h"
+struct block_device;
+struct bdev_handle;
+struct btrfs_fs_info;
+struct btrfs_block_group;
+struct btrfs_trans_handle;
+struct btrfs_zoned_device_info;
+
#define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G)
extern struct mutex uuid_mutex;
@@ -77,7 +92,7 @@ enum btrfs_raid_types {
#define BTRFS_DEV_STATE_FLUSH_SENT (4)
#define BTRFS_DEV_STATE_NO_READA (5)
-struct btrfs_zoned_device_info;
+struct btrfs_fs_devices;
struct btrfs_device {
struct list_head dev_list; /* device_list_mutex */
@@ -276,6 +291,25 @@ enum btrfs_read_policy {
BTRFS_NR_READ_POLICY,
};
+#ifdef CONFIG_BTRFS_DEBUG
+/*
+ * Checksum mode - offload it to workqueues or do it synchronously in
+ * btrfs_submit_chunk().
+ */
+enum btrfs_offload_csum_mode {
+ /*
+ * Choose offloading checksum or do it synchronously automatically.
+ * Do it synchronously if the checksum is fast, or offload to workqueues
+ * otherwise.
+ */
+ BTRFS_OFFLOAD_CSUM_AUTO,
+ /* Always offload checksum to workqueues. */
+ BTRFS_OFFLOAD_CSUM_FORCE_ON,
+ /* Never offload checksum to workqueues. */
+ BTRFS_OFFLOAD_CSUM_FORCE_OFF,
+};
+#endif
+
struct btrfs_fs_devices {
u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
@@ -380,6 +414,11 @@ struct btrfs_fs_devices {
/* Policy used to read the mirrored stripes. */
enum btrfs_read_policy read_policy;
+
+#ifdef CONFIG_BTRFS_DEBUG
+ /* Checksum mode - offload it or do it synchronously. */
+ enum btrfs_offload_csum_mode offload_csum_mode;
+#endif
};
#define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info) \
@@ -557,8 +596,6 @@ static inline void btrfs_free_chunk_map(struct btrfs_chunk_map *map)
}
}
-struct btrfs_balance_args;
-struct btrfs_balance_progress;
struct btrfs_balance_control {
struct btrfs_balance_args data;
struct btrfs_balance_args meta;
@@ -780,9 +817,7 @@ void btrfs_commit_device_sizes(struct btrfs_transaction *trans);
struct list_head * __attribute_const__ btrfs_get_fs_uuids(void);
bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
struct btrfs_device *failing_dev);
-void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
- struct block_device *bdev,
- const char *device_path);
+void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, struct btrfs_device *device);
enum btrfs_raid_types __attribute_const__ btrfs_bg_flags_to_raid_index(u64 flags);
int btrfs_bg_type_to_factor(u64 flags);
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h
index 118118ca3e1d..b9376ea258ff 100644
--- a/fs/btrfs/xattr.h
+++ b/fs/btrfs/xattr.h
@@ -6,7 +6,11 @@
#ifndef BTRFS_XATTR_H
#define BTRFS_XATTR_H
-#include <linux/xattr.h>
+struct dentry;
+struct inode;
+struct qstr;
+struct xattr_handler;
+struct btrfs_trans_handle;
extern const struct xattr_handler * const btrfs_xattr_handlers[];
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 8da66ea699e8..e5b3f2003896 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -398,7 +398,7 @@ int zlib_decompress(struct list_head *ws, const u8 *data_in,
out:
if (unlikely(to_copy != destlen)) {
- pr_warn_ratelimited("BTRFS: infalte failed, decompressed=%lu expected=%zu\n",
+ pr_warn_ratelimited("BTRFS: inflate failed, decompressed=%lu expected=%zu\n",
to_copy, destlen);
ret = -EIO;
} else {
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index aea51fd850cd..5a3d5ec75c5a 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -12,10 +12,8 @@
#include "rcu-string.h"
#include "disk-io.h"
#include "block-group.h"
-#include "transaction.h"
#include "dev-replace.h"
#include "space-info.h"
-#include "super.h"
#include "fs.h"
#include "accessors.h"
#include "bio.h"
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index f573bda496fb..77c4321e331f 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -4,12 +4,27 @@
#define BTRFS_ZONED_H
#include <linux/types.h>
+#include <linux/atomic.h>
#include <linux/blkdev.h>
+#include <linux/blkzoned.h>
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
#include "messages.h"
#include "volumes.h"
#include "disk-io.h"
#include "block-group.h"
#include "btrfs_inode.h"
+#include "fs.h"
+
+struct block_device;
+struct extent_buffer;
+struct btrfs_bio;
+struct btrfs_ordered_extent;
+struct btrfs_fs_info;
+struct btrfs_space_info;
+struct btrfs_eb_write_context;
+struct btrfs_fs_devices;
#define BTRFS_DEFAULT_RECLAIM_THRESH (75)
diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c
index 0d66db8bc1d4..92b3744b819b 100644
--- a/fs/btrfs/zstd.c
+++ b/fs/btrfs/zstd.c
@@ -18,8 +18,9 @@
#include <linux/slab.h>
#include <linux/zstd.h>
#include "misc.h"
+#include "fs.h"
#include "compression.h"
-#include "ctree.h"
+#include "super.h"
#define ZSTD_BTRFS_MAX_WINDOWLOG 17
#define ZSTD_BTRFS_MAX_INPUT (1 << ZSTD_BTRFS_MAX_WINDOWLOG)
@@ -618,80 +619,48 @@ done:
}
int zstd_decompress(struct list_head *ws, const u8 *data_in,
- struct page *dest_page, unsigned long start_byte, size_t srclen,
+ struct page *dest_page, unsigned long dest_pgoff, size_t srclen,
size_t destlen)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
+ struct btrfs_fs_info *fs_info = btrfs_sb(dest_page->mapping->host->i_sb);
+ const u32 sectorsize = fs_info->sectorsize;
zstd_dstream *stream;
int ret = 0;
- size_t ret2;
- unsigned long total_out = 0;
- unsigned long pg_offset = 0;
+ unsigned long to_copy = 0;
stream = zstd_init_dstream(
ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
if (!stream) {
pr_warn("BTRFS: zstd_init_dstream failed\n");
- ret = -EIO;
goto finish;
}
- destlen = min_t(size_t, destlen, PAGE_SIZE);
-
workspace->in_buf.src = data_in;
workspace->in_buf.pos = 0;
workspace->in_buf.size = srclen;
workspace->out_buf.dst = workspace->buf;
workspace->out_buf.pos = 0;
- workspace->out_buf.size = PAGE_SIZE;
-
- ret2 = 1;
- while (pg_offset < destlen
- && workspace->in_buf.pos < workspace->in_buf.size) {
- unsigned long buf_start;
- unsigned long buf_offset;
- unsigned long bytes;
-
- /* Check if the frame is over and we still need more input */
- if (ret2 == 0) {
- pr_debug("BTRFS: zstd_decompress_stream ended early\n");
- ret = -EIO;
- goto finish;
- }
- ret2 = zstd_decompress_stream(stream, &workspace->out_buf,
- &workspace->in_buf);
- if (zstd_is_error(ret2)) {
- pr_debug("BTRFS: zstd_decompress_stream returned %d\n",
- zstd_get_error_code(ret2));
- ret = -EIO;
- goto finish;
- }
-
- buf_start = total_out;
- total_out += workspace->out_buf.pos;
- workspace->out_buf.pos = 0;
-
- if (total_out <= start_byte)
- continue;
-
- if (total_out > start_byte && buf_start < start_byte)
- buf_offset = start_byte - buf_start;
- else
- buf_offset = 0;
-
- bytes = min_t(unsigned long, destlen - pg_offset,
- workspace->out_buf.size - buf_offset);
-
- memcpy_to_page(dest_page, pg_offset,
- workspace->out_buf.dst + buf_offset, bytes);
-
- pg_offset += bytes;
+ workspace->out_buf.size = sectorsize;
+
+ /*
+ * Since both input and output buffers should not exceed one sector,
+ * one call should end the decompression.
+ */
+ ret = zstd_decompress_stream(stream, &workspace->out_buf, &workspace->in_buf);
+ if (zstd_is_error(ret)) {
+ pr_warn_ratelimited("BTRFS: zstd_decompress_stream return %d\n",
+ zstd_get_error_code(ret));
+ goto finish;
}
- ret = 0;
+ to_copy = workspace->out_buf.pos;
+ memcpy_to_page(dest_page, dest_pgoff, workspace->out_buf.dst, to_copy);
finish:
- if (pg_offset < destlen) {
- memzero_page(dest_page, pg_offset, destlen - pg_offset);
+ /* Error or early end. */
+ if (unlikely(to_copy < destlen)) {
+ ret = -EIO;
+ memzero_page(dest_page, dest_pgoff + to_copy, destlen - to_copy);
}
return ret;
}