From 2263639f96f24a121ec9f037981b81daf5a8d60a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 23 Jan 2024 15:24:46 -0700 Subject: iov_iter: streamline iovec/bvec alignment iteration Rewrite the alignment checking iterators for iovec and bvec to be easier to read, and also significantly more compact in terms of generated code. This saves 270 bytes of text on x86-64 for me (with clang-18) and 224 bytes on arm64 (with gcc-13). In profiles, also saves a bit of time as well for the same workload: 0.81% -0.18% [kernel.vmlinux] [k] iov_iter_aligned_bvec 0.48% -0.09% [kernel.vmlinux] [k] iov_iter_is_aligned which is a nice side benefit as well. Signed-off-by: Jens Axboe Link: https://lore.kernel.org/r/544b31f7-6d4b-42f5-a544-1420501f081f@kernel.dk Reviewed-by: Keith Busch Signed-off-by: Christian Brauner v2: do the other half of the iterators too, as suggested by Keith. This further saves some text. --- lib/iov_iter.c | 55 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 27 deletions(-) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index e0aa6b440ca5..15f5040709c3 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -714,12 +714,11 @@ EXPORT_SYMBOL(iov_iter_discard); static bool iov_iter_aligned_iovec(const struct iov_iter *i, unsigned addr_mask, unsigned len_mask) { + const struct iovec *iov = iter_iov(i); size_t size = i->count; size_t skip = i->iov_offset; - unsigned k; - for (k = 0; k < i->nr_segs; k++, skip = 0) { - const struct iovec *iov = iter_iov(i) + k; + do { size_t len = iov->iov_len - skip; if (len > size) @@ -729,34 +728,36 @@ static bool iov_iter_aligned_iovec(const struct iov_iter *i, unsigned addr_mask, if ((unsigned long)(iov->iov_base + skip) & addr_mask) return false; + iov++; size -= len; - if (!size) - break; - } + skip = 0; + } while (size); + return true; } static bool iov_iter_aligned_bvec(const struct iov_iter *i, unsigned addr_mask, unsigned len_mask) { - size_t size = i->count; + const struct bio_vec *bvec = i->bvec; unsigned skip = i->iov_offset; - unsigned k; + size_t size = i->count; - for (k = 0; k < i->nr_segs; k++, skip = 0) { - size_t len = i->bvec[k].bv_len - skip; + do { + size_t len = bvec->bv_len; if (len > size) len = size; if (len & len_mask) return false; - if ((unsigned long)(i->bvec[k].bv_offset + skip) & addr_mask) + if ((unsigned long)(bvec->bv_offset + skip) & addr_mask) return false; + bvec++; size -= len; - if (!size) - break; - } + skip = 0; + } while (size); + return true; } @@ -800,13 +801,12 @@ EXPORT_SYMBOL_GPL(iov_iter_is_aligned); static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i) { + const struct iovec *iov = iter_iov(i); unsigned long res = 0; size_t size = i->count; size_t skip = i->iov_offset; - unsigned k; - for (k = 0; k < i->nr_segs; k++, skip = 0) { - const struct iovec *iov = iter_iov(i) + k; + do { size_t len = iov->iov_len - skip; if (len) { res |= (unsigned long)iov->iov_base + skip; @@ -814,30 +814,31 @@ static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i) len = size; res |= len; size -= len; - if (!size) - break; } - } + iov++; + skip = 0; + } while (size); return res; } static unsigned long iov_iter_alignment_bvec(const struct iov_iter *i) { + const struct bio_vec *bvec = i->bvec; unsigned res = 0; size_t size = i->count; unsigned skip = i->iov_offset; - unsigned k; - for (k = 0; k < i->nr_segs; k++, skip = 0) { - size_t len = i->bvec[k].bv_len - skip; - res |= (unsigned long)i->bvec[k].bv_offset + skip; + do { + size_t len = bvec->bv_len - skip; + res |= (unsigned long)bvec->bv_offset + skip; if (len > size) len = size; res |= len; + bvec++; size -= len; - if (!size) - break; - } + skip = 0; + } while (size); + return res; } -- cgit v1.2.3-70-g09d2 From bd8c239c0502e70c92cf9496846bcbec7cd5702f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 29 Jan 2024 10:37:29 -0800 Subject: iov_iter: Avoid wrap-around instrumentation in copy_compat_iovec_from_user() The loop counter "i" in copy_compat_iovec_from_user() is an int, but because the nr_segs argument is unsigned long, the signed overflow sanitizer got worried "i" could wrap around. Instead of making "i" an unsigned long (which may enlarge the type size), switch both nr_segs and i to u32. There is no truncation with nr_segs since it is never larger than UIO_MAXIOV anyway. This keeps sanitizer instrumentation[1] out of a UACCESS path: vmlinux.o: warning: objtool: copy_compat_iovec_from_user+0xa9: call to __ubsan_handle_add_overflow() with UACCESS enabled Link: https://github.com/KSPP/linux/issues/26 [1] Cc: Christian Brauner Cc: Alexander Viro Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20240129183729.work.991-kees@kernel.org Signed-off-by: Christian Brauner --- lib/iov_iter.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 15f5040709c3..73715d10c812 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1167,11 +1167,12 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) EXPORT_SYMBOL(dup_iter); static __noclone int copy_compat_iovec_from_user(struct iovec *iov, - const struct iovec __user *uvec, unsigned long nr_segs) + const struct iovec __user *uvec, u32 nr_segs) { const struct compat_iovec __user *uiov = (const struct compat_iovec __user *)uvec; - int ret = -EFAULT, i; + int ret = -EFAULT; + u32 i; if (!user_access_begin(uiov, nr_segs * sizeof(*uiov))) return -EFAULT; -- cgit v1.2.3-70-g09d2 From 9b6713cc75229f25552c643083cbdbfb771e5bca Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 17 Feb 2024 15:24:01 -0500 Subject: maple_tree: Add mtree_alloc_cyclic() I need a cyclic allocator for the simple_offset implementation in fs/libfs.c. Signed-off-by: Chuck Lever Link: https://lore.kernel.org/r/170820144179.6328.12838600511394432325.stgit@91.116.238.104.host.secureserver.net Signed-off-by: Christian Brauner --- include/linux/maple_tree.h | 7 ++++ lib/maple_tree.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) (limited to 'lib') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index b3d63123b945..a53ad4dabd7e 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -171,6 +171,7 @@ enum maple_type { #define MT_FLAGS_LOCK_IRQ 0x100 #define MT_FLAGS_LOCK_BH 0x200 #define MT_FLAGS_LOCK_EXTERN 0x300 +#define MT_FLAGS_ALLOC_WRAPPED 0x0800 #define MAPLE_HEIGHT_MAX 31 @@ -319,6 +320,9 @@ int mtree_insert_range(struct maple_tree *mt, unsigned long first, int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp, void *entry, unsigned long size, unsigned long min, unsigned long max, gfp_t gfp); +int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp, + void *entry, unsigned long range_lo, unsigned long range_hi, + unsigned long *next, gfp_t gfp); int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp, void *entry, unsigned long size, unsigned long min, unsigned long max, gfp_t gfp); @@ -499,6 +503,9 @@ void *mas_find_range(struct ma_state *mas, unsigned long max); void *mas_find_rev(struct ma_state *mas, unsigned long min); void *mas_find_range_rev(struct ma_state *mas, unsigned long max); int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp); +int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp, + void *entry, unsigned long range_lo, unsigned long range_hi, + unsigned long *next, gfp_t gfp); bool mas_nomem(struct ma_state *mas, gfp_t gfp); void mas_pause(struct ma_state *mas); diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 6f241bb38799..af0970288727 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -4290,6 +4290,56 @@ exists: } +/** + * mas_alloc_cyclic() - Internal call to find somewhere to store an entry + * @mas: The maple state. + * @startp: Pointer to ID. + * @range_lo: Lower bound of range to search. + * @range_hi: Upper bound of range to search. + * @entry: The entry to store. + * @next: Pointer to next ID to allocate. + * @gfp: The GFP_FLAGS to use for allocations. + * + * Return: 0 if the allocation succeeded without wrapping, 1 if the + * allocation succeeded after wrapping, or -EBUSY if there are no + * free entries. + */ +int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp, + void *entry, unsigned long range_lo, unsigned long range_hi, + unsigned long *next, gfp_t gfp) +{ + unsigned long min = range_lo; + int ret = 0; + + range_lo = max(min, *next); + ret = mas_empty_area(mas, range_lo, range_hi, 1); + if ((mas->tree->ma_flags & MT_FLAGS_ALLOC_WRAPPED) && ret == 0) { + mas->tree->ma_flags &= ~MT_FLAGS_ALLOC_WRAPPED; + ret = 1; + } + if (ret < 0 && range_lo > min) { + ret = mas_empty_area(mas, min, range_hi, 1); + if (ret == 0) + ret = 1; + } + if (ret < 0) + return ret; + + do { + mas_insert(mas, entry); + } while (mas_nomem(mas, gfp)); + if (mas_is_err(mas)) + return xa_err(mas->node); + + *startp = mas->index; + *next = *startp + 1; + if (*next == 0) + mas->tree->ma_flags |= MT_FLAGS_ALLOC_WRAPPED; + + return ret; +} +EXPORT_SYMBOL(mas_alloc_cyclic); + static __always_inline void mas_rewalk(struct ma_state *mas, unsigned long index) { retry: @@ -6443,6 +6493,49 @@ unlock: } EXPORT_SYMBOL(mtree_alloc_range); +/** + * mtree_alloc_cyclic() - Find somewhere to store this entry in the tree. + * @mt: The maple tree. + * @startp: Pointer to ID. + * @range_lo: Lower bound of range to search. + * @range_hi: Upper bound of range to search. + * @entry: The entry to store. + * @next: Pointer to next ID to allocate. + * @gfp: The GFP_FLAGS to use for allocations. + * + * Finds an empty entry in @mt after @next, stores the new index into + * the @id pointer, stores the entry at that index, then updates @next. + * + * @mt must be initialized with the MT_FLAGS_ALLOC_RANGE flag. + * + * Context: Any context. Takes and releases the mt.lock. May sleep if + * the @gfp flags permit. + * + * Return: 0 if the allocation succeeded without wrapping, 1 if the + * allocation succeeded after wrapping, -ENOMEM if memory could not be + * allocated, -EINVAL if @mt cannot be used, or -EBUSY if there are no + * free entries. + */ +int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp, + void *entry, unsigned long range_lo, unsigned long range_hi, + unsigned long *next, gfp_t gfp) +{ + int ret; + + MA_STATE(mas, mt, 0, 0); + + if (!mt_is_alloc(mt)) + return -EINVAL; + if (WARN_ON_ONCE(mt_is_reserved(entry))) + return -EINVAL; + mtree_lock(mt); + ret = mas_alloc_cyclic(&mas, startp, entry, range_lo, range_hi, + next, gfp); + mtree_unlock(mt); + return ret; +} +EXPORT_SYMBOL(mtree_alloc_cyclic); + int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp, void *entry, unsigned long size, unsigned long min, unsigned long max, gfp_t gfp) -- cgit v1.2.3-70-g09d2 From f92e1a829d64dd66fa173c6934f03817d9e68d43 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Sat, 17 Feb 2024 15:24:08 -0500 Subject: test_maple_tree: testing the cyclic allocation This tests the interactions of the cyclic allocations, the maple state index and last, and overflow. Signed-off-by: Liam R. Howlett Signed-off-by: Chuck Lever Link: https://lore.kernel.org/r/170820144894.6328.13052830860966450674.stgit@91.116.238.104.host.secureserver.net Signed-off-by: Christian Brauner --- lib/test_maple_tree.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'lib') diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c index 29185ac5c727..399380db449c 100644 --- a/lib/test_maple_tree.c +++ b/lib/test_maple_tree.c @@ -3599,6 +3599,45 @@ static noinline void __init check_state_handling(struct maple_tree *mt) mas_unlock(&mas); } +static noinline void __init alloc_cyclic_testing(struct maple_tree *mt) +{ + unsigned long location; + unsigned long next; + int ret = 0; + MA_STATE(mas, mt, 0, 0); + + next = 0; + mtree_lock(mt); + for (int i = 0; i < 100; i++) { + mas_alloc_cyclic(&mas, &location, mt, 2, ULONG_MAX, &next, GFP_KERNEL); + MAS_BUG_ON(&mas, i != location - 2); + MAS_BUG_ON(&mas, mas.index != location); + MAS_BUG_ON(&mas, mas.last != location); + MAS_BUG_ON(&mas, i != next - 3); + } + + mtree_unlock(mt); + mtree_destroy(mt); + next = 0; + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + for (int i = 0; i < 100; i++) { + mtree_alloc_cyclic(mt, &location, mt, 2, ULONG_MAX, &next, GFP_KERNEL); + MT_BUG_ON(mt, i != location - 2); + MT_BUG_ON(mt, i != next - 3); + MT_BUG_ON(mt, mtree_load(mt, location) != mt); + } + + mtree_destroy(mt); + /* Overflow test */ + next = ULONG_MAX - 1; + ret = mtree_alloc_cyclic(mt, &location, mt, 2, ULONG_MAX, &next, GFP_KERNEL); + MT_BUG_ON(mt, ret != 0); + ret = mtree_alloc_cyclic(mt, &location, mt, 2, ULONG_MAX, &next, GFP_KERNEL); + MT_BUG_ON(mt, ret != 0); + ret = mtree_alloc_cyclic(mt, &location, mt, 2, ULONG_MAX, &next, GFP_KERNEL); + MT_BUG_ON(mt, ret != 1); +} + static DEFINE_MTREE(tree); static int __init maple_tree_seed(void) { @@ -3880,6 +3919,11 @@ static int __init maple_tree_seed(void) check_state_handling(&tree); mtree_destroy(&tree); + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + alloc_cyclic_testing(&tree); + mtree_destroy(&tree); + + #if defined(BENCH) skip: #endif -- cgit v1.2.3-70-g09d2