summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-02-21 10:00:39 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2021-02-21 10:00:39 -0800
commit6f3952cbe00b74739f540981d1afe84cd4dac879 (patch)
tree6dacecd7ea8dc06d985c7ea417391c1ae2aec574 /include
parentf9d58de23152f2c16f326d7e014cfa2933b00304 (diff)
parent9d294a685fbcb256ce8c5f7fd88a7596d0f52a8a (diff)
Merge tag 'for-5.12-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "This brings updates of space handling, performance improvements or bug fixes. The subpage block size and zoned mode features have reached state where they're usable but with limitations. Performance or related: - do not block on deleted block group mutex in the cleaner, avoids some long stalls - improved flushing: make it work better with ticket space reservations and avoid excessive transaction commits in some scenarios, slightly improves throughput for random write load - preemptive background flushing: separate the logic from ticket reservations, improve the accounting and decisions when to flush in low space conditions - less lock contention related to running delayed refs, let just one thread do the flushing when there are many inside transaction commit - dbench workload improvements: avoid unnecessary work when logging inodes, fewer fallbacks to transaction commit and thus less waiting for it (+7% throughput, -20% latency) Core: - subpage block size - currently read-only support - refactor and generalize code where sectorsize is assumed to be page size, add the subpage handling everywhere - the read-write support is on the way, page sizes are still limited to 4K or 64K - zoned mode, first working version but with limitations - SMR/ZBC/ZNS friendly allocation mode, utilizing the "no fixed location for structures" and chunked allocation - superblock as the only fixed data structure needs special handling, uses 2 consecutive zones as a ring buffer - tree-log support with a dedicated block group to avoid unordered writes - emulated zones on non-zoned devices - not yet working - all non-single block group profiles, requires more zone write pointer synchronization between the multiple block groups - fitrim due to dependency on space cache, can be implemented Fixes: - ref-verify: proper tree owner and node level tracking - fix pinned byte accounting, causing some early ENOSPC now more likely due to other changes in delayed refs Other: - error handling fixes and improvements - more error injection points - more function documentation - more and updated tracepoints - subset of W=1 checked by default - update comments to allow more automatic kdoc parameter checks" * tag 'for-5.12-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (144 commits) btrfs: zoned: enable to mount ZONED incompat flag btrfs: zoned: deal with holes writing out tree-log pages btrfs: zoned: reorder log node allocation on zoned filesystem btrfs: zoned: serialize log transaction on zoned filesystems btrfs: zoned: extend zoned allocator to use dedicated tree-log block group btrfs: split alloc_log_tree() btrfs: zoned: relocate block group to repair IO failure in zoned filesystems btrfs: zoned: enable relocation on a zoned filesystem btrfs: zoned: support dev-replace in zoned filesystems btrfs: zoned: implement copying for zoned device-replace btrfs: zoned: implement cloning for zoned device-replace btrfs: zoned: mark block groups to copy for device-replace btrfs: zoned: do not use async metadata checksum on zoned filesystems btrfs: zoned: wait for existing extents before truncating btrfs: zoned: serialize metadata IO btrfs: zoned: introduce dedicated data write path for zoned filesystems btrfs: zoned: enable zone append writing for direct IO btrfs: zoned: use ZONE_APPEND write for zoned mode btrfs: save irq flags when looking up an ordered extent btrfs: zoned: cache if block group is on a sequential zone ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/bio.h2
-rw-r--r--include/linux/iomap.h1
-rw-r--r--include/linux/zstd.h8
-rw-r--r--include/trace/events/btrfs.h111
4 files changed, 110 insertions, 12 deletions
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 1edda614f7ce..de62911473bb 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -455,6 +455,8 @@ void bio_chain(struct bio *, struct bio *);
extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
unsigned int, unsigned int);
+int bio_add_zone_append_page(struct bio *bio, struct page *page,
+ unsigned int len, unsigned int offset);
bool __bio_try_merge_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int off, bool *same_page);
void __bio_add_page(struct bio *bio, struct page *page,
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 5bd3cac4df9c..8ebb1fa6f3b7 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -55,6 +55,7 @@ struct vm_fault;
#define IOMAP_F_SHARED 0x04
#define IOMAP_F_MERGED 0x08
#define IOMAP_F_BUFFER_HEAD 0x10
+#define IOMAP_F_ZONE_APPEND 0x20
/*
* Flags set by the core iomap code during operations:
diff --git a/include/linux/zstd.h b/include/linux/zstd.h
index 249575e2485f..e87f78c9b19c 100644
--- a/include/linux/zstd.h
+++ b/include/linux/zstd.h
@@ -791,11 +791,11 @@ size_t ZSTD_DStreamOutSize(void);
/* for static allocation */
#define ZSTD_FRAMEHEADERSIZE_MAX 18
#define ZSTD_FRAMEHEADERSIZE_MIN 6
-static const size_t ZSTD_frameHeaderSize_prefix = 5;
-static const size_t ZSTD_frameHeaderSize_min = ZSTD_FRAMEHEADERSIZE_MIN;
-static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX;
+#define ZSTD_frameHeaderSize_prefix 5
+#define ZSTD_frameHeaderSize_min ZSTD_FRAMEHEADERSIZE_MIN
+#define ZSTD_frameHeaderSize_max ZSTD_FRAMEHEADERSIZE_MAX
/* magic number + skippable frame length */
-static const size_t ZSTD_skippableHeaderSize = 8;
+#define ZSTD_skippableHeaderSize 8
/*-*************************************
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index ecd24c719de4..0551ea65374f 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -99,7 +99,8 @@ struct btrfs_space_info;
EM( ALLOC_CHUNK, "ALLOC_CHUNK") \
EM( ALLOC_CHUNK_FORCE, "ALLOC_CHUNK_FORCE") \
EM( RUN_DELAYED_IPUTS, "RUN_DELAYED_IPUTS") \
- EMe(COMMIT_TRANS, "COMMIT_TRANS")
+ EM( COMMIT_TRANS, "COMMIT_TRANS") \
+ EMe(FORCE_COMMIT_TRANS, "FORCE_COMMIT_TRANS")
/*
* First define the enums in the above macros to be exported to userspace via
@@ -499,12 +500,13 @@ DEFINE_EVENT(
#define show_ordered_flags(flags) \
__print_flags(flags, "|", \
- { (1 << BTRFS_ORDERED_IO_DONE), "IO_DONE" }, \
- { (1 << BTRFS_ORDERED_COMPLETE), "COMPLETE" }, \
+ { (1 << BTRFS_ORDERED_REGULAR), "REGULAR" }, \
{ (1 << BTRFS_ORDERED_NOCOW), "NOCOW" }, \
- { (1 << BTRFS_ORDERED_COMPRESSED), "COMPRESSED" }, \
{ (1 << BTRFS_ORDERED_PREALLOC), "PREALLOC" }, \
+ { (1 << BTRFS_ORDERED_COMPRESSED), "COMPRESSED" }, \
{ (1 << BTRFS_ORDERED_DIRECT), "DIRECT" }, \
+ { (1 << BTRFS_ORDERED_IO_DONE), "IO_DONE" }, \
+ { (1 << BTRFS_ORDERED_COMPLETE), "COMPLETE" }, \
{ (1 << BTRFS_ORDERED_IOERR), "IOERR" }, \
{ (1 << BTRFS_ORDERED_TRUNCATED), "TRUNCATED" })
@@ -1111,15 +1113,16 @@ TRACE_EVENT(btrfs_trigger_flush,
TRACE_EVENT(btrfs_flush_space,
TP_PROTO(const struct btrfs_fs_info *fs_info, u64 flags, u64 num_bytes,
- int state, int ret),
+ int state, int ret, bool for_preempt),
- TP_ARGS(fs_info, flags, num_bytes, state, ret),
+ TP_ARGS(fs_info, flags, num_bytes, state, ret, for_preempt),
TP_STRUCT__entry_btrfs(
__field( u64, flags )
__field( u64, num_bytes )
__field( int, state )
__field( int, ret )
+ __field( bool, for_preempt )
),
TP_fast_assign_btrfs(fs_info,
@@ -1127,15 +1130,16 @@ TRACE_EVENT(btrfs_flush_space,
__entry->num_bytes = num_bytes;
__entry->state = state;
__entry->ret = ret;
+ __entry->for_preempt = for_preempt;
),
- TP_printk_btrfs("state=%d(%s) flags=%llu(%s) num_bytes=%llu ret=%d",
+ TP_printk_btrfs("state=%d(%s) flags=%llu(%s) num_bytes=%llu ret=%d for_preempt=%d",
__entry->state,
__print_symbolic(__entry->state, FLUSH_STATES),
__entry->flags,
__print_flags((unsigned long)__entry->flags, "|",
BTRFS_GROUP_FLAGS),
- __entry->num_bytes, __entry->ret)
+ __entry->num_bytes, __entry->ret, __entry->for_preempt)
);
DECLARE_EVENT_CLASS(btrfs__reserved_extent,
@@ -2025,6 +2029,97 @@ TRACE_EVENT(btrfs_convert_extent_bit,
__print_flags(__entry->clear_bits, "|", EXTENT_FLAGS))
);
+DECLARE_EVENT_CLASS(btrfs_dump_space_info,
+ TP_PROTO(const struct btrfs_fs_info *fs_info,
+ const struct btrfs_space_info *sinfo),
+
+ TP_ARGS(fs_info, sinfo),
+
+ TP_STRUCT__entry_btrfs(
+ __field( u64, flags )
+ __field( u64, total_bytes )
+ __field( u64, bytes_used )
+ __field( u64, bytes_pinned )
+ __field( u64, bytes_reserved )
+ __field( u64, bytes_may_use )
+ __field( u64, bytes_readonly )
+ __field( u64, reclaim_size )
+ __field( int, clamp )
+ __field( u64, global_reserved )
+ __field( u64, trans_reserved )
+ __field( u64, delayed_refs_reserved )
+ __field( u64, delayed_reserved )
+ __field( u64, free_chunk_space )
+ ),
+
+ TP_fast_assign_btrfs(fs_info,
+ __entry->flags = sinfo->flags;
+ __entry->total_bytes = sinfo->total_bytes;
+ __entry->bytes_used = sinfo->bytes_used;
+ __entry->bytes_pinned = sinfo->bytes_pinned;
+ __entry->bytes_reserved = sinfo->bytes_reserved;
+ __entry->bytes_may_use = sinfo->bytes_may_use;
+ __entry->bytes_readonly = sinfo->bytes_readonly;
+ __entry->reclaim_size = sinfo->reclaim_size;
+ __entry->clamp = sinfo->clamp;
+ __entry->global_reserved = fs_info->global_block_rsv.reserved;
+ __entry->trans_reserved = fs_info->trans_block_rsv.reserved;
+ __entry->delayed_refs_reserved = fs_info->delayed_refs_rsv.reserved;
+ __entry->delayed_reserved = fs_info->delayed_block_rsv.reserved;
+ __entry->free_chunk_space = atomic64_read(&fs_info->free_chunk_space);
+ ),
+
+ TP_printk_btrfs("flags=%s total_bytes=%llu bytes_used=%llu "
+ "bytes_pinned=%llu bytes_reserved=%llu "
+ "bytes_may_use=%llu bytes_readonly=%llu "
+ "reclaim_size=%llu clamp=%d global_reserved=%llu "
+ "trans_reserved=%llu delayed_refs_reserved=%llu "
+ "delayed_reserved=%llu chunk_free_space=%llu",
+ __print_flags(__entry->flags, "|", BTRFS_GROUP_FLAGS),
+ __entry->total_bytes, __entry->bytes_used,
+ __entry->bytes_pinned, __entry->bytes_reserved,
+ __entry->bytes_may_use, __entry->bytes_readonly,
+ __entry->reclaim_size, __entry->clamp,
+ __entry->global_reserved, __entry->trans_reserved,
+ __entry->delayed_refs_reserved,
+ __entry->delayed_reserved, __entry->free_chunk_space)
+);
+
+DEFINE_EVENT(btrfs_dump_space_info, btrfs_done_preemptive_reclaim,
+ TP_PROTO(const struct btrfs_fs_info *fs_info,
+ const struct btrfs_space_info *sinfo),
+ TP_ARGS(fs_info, sinfo)
+);
+
+TRACE_EVENT(btrfs_reserve_ticket,
+ TP_PROTO(const struct btrfs_fs_info *fs_info, u64 flags, u64 bytes,
+ u64 start_ns, int flush, int error),
+
+ TP_ARGS(fs_info, flags, bytes, start_ns, flush, error),
+
+ TP_STRUCT__entry_btrfs(
+ __field( u64, flags )
+ __field( u64, bytes )
+ __field( u64, start_ns )
+ __field( int, flush )
+ __field( int, error )
+ ),
+
+ TP_fast_assign_btrfs(fs_info,
+ __entry->flags = flags;
+ __entry->bytes = bytes;
+ __entry->start_ns = start_ns;
+ __entry->flush = flush;
+ __entry->error = error;
+ ),
+
+ TP_printk_btrfs("flags=%s bytes=%llu start_ns=%llu flush=%s error=%d",
+ __print_flags(__entry->flags, "|", BTRFS_GROUP_FLAGS),
+ __entry->bytes, __entry->start_ns,
+ __print_symbolic(__entry->flush, FLUSH_ACTIONS),
+ __entry->error)
+);
+
DECLARE_EVENT_CLASS(btrfs_sleep_tree_lock,
TP_PROTO(const struct extent_buffer *eb, u64 start_ns),