From 4cc0473d7754d387680bdf0728eb29f0ec8834bf Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Mon, 7 Oct 2024 22:49:05 +0800 Subject: get rid of __get_task_comm() Patch series "Improve the copy of task comm", v8. Using {memcpy,strncpy,strcpy,kstrdup} to copy the task comm relies on the length of task comm. Changes in the task comm could result in a destination string that is overflow. Therefore, we should explicitly ensure the destination string is always NUL-terminated, regardless of the task comm. This approach will facilitate future extensions to the task comm. As suggested by Linus [0], we can identify all relevant code with the following git grep command: git grep 'memcpy.*->comm\>' git grep 'kstrdup.*->comm\>' git grep 'strncpy.*->comm\>' git grep 'strcpy.*->comm\>' PATCH #2~#4: memcpy PATCH #5~#6: kstrdup PATCH #7: strcpy Please note that strncpy() is not included in this series as it is being tracked by another effort. [1] This patch (of 7): We want to eliminate the use of __get_task_comm() for the following reasons: - The task_lock() is unnecessary Quoted from Linus [0]: : Since user space can randomly change their names anyway, using locking : was always wrong for readers (for writers it probably does make sense : to have some lock - although practically speaking nobody cares there : either, but at least for a writer some kind of race could have : long-term mixed results Link: https://lkml.kernel.org/r/20241007144911.27693-1-laoar.shao@gmail.com Link: https://lkml.kernel.org/r/20241007144911.27693-2-laoar.shao@gmail.com Link: https://lore.kernel.org/all/CAHk-=wivfrF0_zvf+oj6==Sh=-npJooP8chLPEfaFV0oNYTTBA@mail.gmail.com [0] Link: https://lore.kernel.org/all/CAHk-=whWtUC-AjmGJveAETKOMeMFSTwKwu99v7+b6AyHMmaDFA@mail.gmail.com/ Link: https://lore.kernel.org/all/CAHk-=wjAmmHUg6vho1KjzQi2=psR30+CogFd4aXrThr2gsiS4g@mail.gmail.com/ [0] Link: https://github.com/KSPP/linux/issues/90 [1] Signed-off-by: Yafang Shao Suggested-by: Linus Torvalds Cc: Alexander Viro Cc: Christian Brauner Cc: Jan Kara Cc: Eric Biederman Cc: Kees Cook Cc: Alexei Starovoitov Cc: Matus Jokay Cc: Alejandro Colomar Cc: "Serge E. Hallyn" Cc: Catalin Marinas Cc: Justin Stitt Cc: Steven Rostedt (Google) Cc: Tetsuo Handa Cc: Andy Shevchenko Cc: Daniel Vetter Cc: David Airlie Cc: Eric Paris Cc: James Morris Cc: Maarten Lankhorst Cc: Matthew Wilcox Cc: Maxime Ripard Cc: Ondrej Mosnacek Cc: Paul Moore Cc: Quentin Monnet Cc: Simon Horman Cc: Stephen Smalley Cc: Thomas Zimmermann Signed-off-by: Andrew Morton --- include/linux/sched.h | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index bb343136ddd0..67718d5591dd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1121,9 +1121,12 @@ struct task_struct { /* * executable name, excluding path. * - * - normally initialized setup_new_exec() - * - access it with [gs]et_task_comm() - * - lock it with task_lock() + * - normally initialized begin_new_exec() + * - set it with set_task_comm() + * - strscpy_pad() to ensure it is always NUL-terminated and + * zero-padded + * - task_lock() to ensure the operation is atomic and the name is + * fully updated. */ char comm[TASK_COMM_LEN]; @@ -1938,10 +1941,23 @@ static inline void set_task_comm(struct task_struct *tsk, const char *from) __set_task_comm(tsk, from, false); } -extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk); +/* + * - Why not use task_lock()? + * User space can randomly change their names anyway, so locking for readers + * doesn't make sense. For writers, locking is probably necessary, as a race + * condition could lead to long-term mixed results. + * The strscpy_pad() in __set_task_comm() can ensure that the task comm is + * always NUL-terminated and zero-padded. Therefore the race condition between + * reader and writer is not an issue. + * + * - BUILD_BUG_ON() can help prevent the buf from being truncated. + * Since the callers don't perform any return value checks, this safeguard is + * necessary. + */ #define get_task_comm(buf, tsk) ({ \ - BUILD_BUG_ON(sizeof(buf) != TASK_COMM_LEN); \ - __get_task_comm(buf, sizeof(buf), tsk); \ + BUILD_BUG_ON(sizeof(buf) < TASK_COMM_LEN); \ + strscpy_pad(buf, (tsk)->comm); \ + buf; \ }) #ifdef CONFIG_SMP -- cgit v1.2.3-70-g09d2 From f2fa0fd4e7db8326a77618962714924b64f5f889 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 12 Oct 2024 19:52:53 +0200 Subject: reboot: move reboot_notifier_list to kernel/reboot.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All the functions related to the reboot notifier list are in kernel/reboot.c. Move the list itself, too. As there are no direct users anymore, make the declaration static. Link: https://lkml.kernel.org/r/20241012-reboot_notifier_list-v1-1-6093bb9455ce@weissschuh.net Signed-off-by: Thomas Weißschuh Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton --- include/linux/notifier.h | 2 -- kernel/notifier.c | 8 -------- kernel/reboot.c | 7 +++++++ 3 files changed, 7 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 45702bdcbceb..b42e64734968 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -237,7 +237,5 @@ static inline int notifier_to_errno(int ret) #define KBD_KEYSYM 0x0004 /* Keyboard keysym */ #define KBD_POST_KEYSYM 0x0005 /* Called after keyboard keysym interpretation */ -extern struct blocking_notifier_head reboot_notifier_list; - #endif /* __KERNEL__ */ #endif /* _LINUX_NOTIFIER_H */ diff --git a/kernel/notifier.c b/kernel/notifier.c index b3ce28f39eb6..2f9fe7c30287 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -5,18 +5,10 @@ #include #include #include -#include #define CREATE_TRACE_POINTS #include -/* - * Notifier list for kernel code which wants to be called - * at shutdown. This is used to stop any idling DMA operations - * and the like. - */ -BLOCKING_NOTIFIER_HEAD(reboot_notifier_list); - /* * Notifier chain core routines. The exported routines below * are layered on top of these, with appropriate locking added. diff --git a/kernel/reboot.c b/kernel/reboot.c index f05dbde2c93f..ffdf86b717ab 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -72,6 +72,13 @@ static bool poweroff_fallback_to_halt; */ void __weak (*pm_power_off)(void); +/* + * Notifier list for kernel code which wants to be called + * at shutdown. This is used to stop any idling DMA operations + * and the like. + */ +static BLOCKING_NOTIFIER_HEAD(reboot_notifier_list); + /** * emergency_restart - reboot the system * -- cgit v1.2.3-70-g09d2 From a9d38bcd7337f051912174ebfc500e1cef73982e Mon Sep 17 00:00:00 2001 From: Sui Jingfeng Date: Sat, 12 Oct 2024 18:08:17 +0800 Subject: scatterlist: fix a typo Replace the 'One' with 'On'. Link: https://lkml.kernel.org/r/20241012100817.323007-1-sui.jingfeng@linux.dev Fixes: af2880ec4402 ("scatterlist: add dedicated config for DMA flags") Signed-off-by: Sui Jingfeng Reviewed-by: Petr Tesarik Cc: Catalin Marinas Cc: Michael Kelley Cc: Robin Murphy Signed-off-by: Andrew Morton --- include/linux/scatterlist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index e61d164622db..c5e2239b550e 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -273,7 +273,7 @@ static inline void sg_unmark_end(struct scatterlist *sg) } /* - * One 64-bit architectures there is a 4-byte padding in struct scatterlist + * On 64-bit architectures there is a 4-byte padding in struct scatterlist * (assuming also CONFIG_NEED_SG_DMA_LENGTH is set). Use this padding for DMA * flags bits to indicate when a specific dma address is a bus address or the * buffer may have been bounced via SWIOTLB. -- cgit v1.2.3-70-g09d2 From 74ef070e325465a1b364db6a5c6859785537f835 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 21 Oct 2024 10:07:36 +0200 Subject: percpu: merge VERIFY_PERCPU_PTR() into its only user Merge VERIFY_PERCPU_PTR() into non-CONFIG_SMP per_cpu_ptr() to make macro similar to CONFIG_SMP per_cpu_ptr(). This will allow a follow-up patch to refactor common code to a macro. No functional changes, non-CONFIG_SMP per_cpu_ptr() was the only user of VERIFY_PERCPU_PTR(). Link: https://lkml.kernel.org/r/20241021080856.48746-1-ubizjak@gmail.com Signed-off-by: Uros Bizjak Acked-by: Christoph Lameter Cc: Dennis Zhou Cc: Tejun Heo Signed-off-by: Andrew Morton --- include/linux/percpu-defs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 8efce7414fad..7fa88c5f4b26 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -254,13 +254,13 @@ do { \ #else /* CONFIG_SMP */ -#define VERIFY_PERCPU_PTR(__p) \ +#define per_cpu_ptr(ptr, cpu) \ ({ \ - __verify_pcpu_ptr(__p); \ - (typeof(*(__p)) __kernel __force *)(__p); \ + (void)(cpu); \ + __verify_pcpu_ptr(ptr); \ + (typeof(*(ptr)) __kernel __force *)(ptr); \ }) -#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR(ptr); }) #define raw_cpu_ptr(ptr) per_cpu_ptr(ptr, 0) #define this_cpu_ptr(ptr) raw_cpu_ptr(ptr) -- cgit v1.2.3-70-g09d2 From 001217defda86d0d6a5a9e6cf77a6b813857e7e3 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 21 Oct 2024 10:07:37 +0200 Subject: percpu: introduce PERCPU_PTR() macro Introduce PERCPU_PTR() macro to cast the percpu pointer from the percpu address space to a generic (kernel) address space. Use it in per_cpu_ptr() and related SHIFT_PERCPU_PTR() macros. Also remove common knowledge from SHIFT_PERCPU_PTR() comment, "weird cast" is just a standard way to inform sparse of a cast from the percpu address space to a generic address space. Link: https://lkml.kernel.org/r/20241021080856.48746-2-ubizjak@gmail.com Signed-off-by: Uros Bizjak Acked-by: Christoph Lameter Cc: Dennis Zhou Cc: Tejun Heo Signed-off-by: Andrew Morton --- include/linux/percpu-defs.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 7fa88c5f4b26..e1cf7982424f 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -220,15 +220,17 @@ do { \ (void)__vpp_verify; \ } while (0) +#define PERCPU_PTR(__p) \ + (typeof(*(__p)) __force __kernel *)(__p); + #ifdef CONFIG_SMP /* - * Add an offset to a pointer but keep the pointer as-is. Use RELOC_HIDE() - * to prevent the compiler from making incorrect assumptions about the - * pointer value. The weird cast keeps both GCC and sparse happy. + * Add an offset to a pointer. Use RELOC_HIDE() to prevent the compiler + * from making incorrect assumptions about the pointer value. */ #define SHIFT_PERCPU_PTR(__p, __offset) \ - RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)) + RELOC_HIDE(PERCPU_PTR(__p), (__offset)) #define per_cpu_ptr(ptr, cpu) \ ({ \ @@ -258,7 +260,7 @@ do { \ ({ \ (void)(cpu); \ __verify_pcpu_ptr(ptr); \ - (typeof(*(ptr)) __kernel __force *)(ptr); \ + PERCPU_PTR(ptr); \ }) #define raw_cpu_ptr(ptr) per_cpu_ptr(ptr, 0) -- cgit v1.2.3-70-g09d2 From dabddd687c9e1a06241d6b4d1f66b9f2b60b3ad1 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 21 Oct 2024 10:07:38 +0200 Subject: percpu: cast percpu pointer in PERCPU_PTR() via unsigned long Cast pointer from percpu address space to generic (kernel) address space in PERCPU_PTR() macro via unsigned long intermediate cast [1]. This intermediate cast is also required to avoid build failure when GCC's strict named address space checks for x86 targets [2] are enabled. Found by GCC's named address space checks. [1] https://sparse.docs.kernel.org/en/latest/annotations.html#address-space-name [2] https://gcc.gnu.org/onlinedocs/gcc/Named-Address-Spaces.html#x86-Named-Address-Spaces Link: https://lkml.kernel.org/r/20241021080856.48746-3-ubizjak@gmail.com Signed-off-by: Uros Bizjak Acked-by: Christoph Lameter Cc: Dennis Zhou Cc: Tejun Heo Signed-off-by: Andrew Morton --- include/linux/percpu-defs.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index e1cf7982424f..35842d1e3879 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -221,7 +221,10 @@ do { \ } while (0) #define PERCPU_PTR(__p) \ - (typeof(*(__p)) __force __kernel *)(__p); +({ \ + unsigned long __pcpu_ptr = (__force unsigned long)(__p); \ + (typeof(*(__p)) __force __kernel *)(__pcpu_ptr); \ +}) #ifdef CONFIG_SMP -- cgit v1.2.3-70-g09d2 From 92a8b224b833e82d286d2100432adbac8cf8a2a1 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Sun, 20 Oct 2024 12:01:51 +0800 Subject: lib/min_heap: introduce non-inline versions of min heap API functions Patch series "Enhance min heap API with non-inline functions and optimizations", v2. Add non-inline versions of the min heap API functions in lib/min_heap.c and updates all users outside of kernel/events/core.c to use these non-inline versions. To mitigate the performance impact of indirect function calls caused by the non-inline versions of the swap and compare functions, a builtin swap has been introduced that swaps elements based on their size. Additionally, it micro-optimizes the efficiency of the min heap by pre-scaling the counter, following the same approach as in lib/sort.c. Documentation for the min heap API has also been added to the core-api section. This patch (of 10): All current min heap API functions are marked with '__always_inline'. However, as the number of users increases, inlining these functions everywhere leads to a increase in kernel size. In performance-critical paths, such as when perf events are enabled and min heap functions are called on every context switch, it is important to retain the inline versions for optimal performance. To balance this, the original inline functions are kept, and additional non-inline versions of the functions have been added in lib/min_heap.c. Link: https://lkml.kernel.org/r/20241020040200.939973-1-visitorckw@gmail.com Link: https://lore.kernel.org/20240522161048.8d8bbc7b153b4ecd92c50666@linux-foundation.org Link: https://lkml.kernel.org/r/20241020040200.939973-2-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Suggested-by: Andrew Morton Cc: Adrian Hunter Cc: Arnaldo Carvalho de Melo Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Jonathan Corbet Cc: Kent Overstreet Cc: Kuan-Wei Chiu Cc: "Liang, Kan" Cc: Mark Rutland Cc: Matthew Sakai Cc: Matthew Wilcox (Oracle) Cc: Namhyung Kim Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- drivers/md/bcache/Kconfig | 1 + drivers/md/dm-vdo/Kconfig | 1 + fs/bcachefs/Kconfig | 1 + include/linux/min_heap.h | 129 ++++++++++++++++++++++++++++++---------------- kernel/events/core.c | 6 +-- lib/Kconfig | 3 ++ lib/Kconfig.debug | 1 + lib/Makefile | 1 + lib/min_heap.c | 70 +++++++++++++++++++++++++ 9 files changed, 167 insertions(+), 46 deletions(-) create mode 100644 lib/min_heap.c (limited to 'include/linux') diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig index b2d10063d35f..d4697e79d5a3 100644 --- a/drivers/md/bcache/Kconfig +++ b/drivers/md/bcache/Kconfig @@ -5,6 +5,7 @@ config BCACHE select BLOCK_HOLDER_DEPRECATED if SYSFS select CRC64 select CLOSURES + select MIN_HEAP help Allows a block device to be used as cache for other devices; uses a btree for indexing and the layout is optimized for SSDs. diff --git a/drivers/md/dm-vdo/Kconfig b/drivers/md/dm-vdo/Kconfig index 111ecd2c2a24..2400b2bc4bc7 100644 --- a/drivers/md/dm-vdo/Kconfig +++ b/drivers/md/dm-vdo/Kconfig @@ -7,6 +7,7 @@ config DM_VDO select DM_BUFIO select LZ4_COMPRESS select LZ4_DECOMPRESS + select MIN_HEAP help This device mapper target presents a block device with deduplication, compression and thin-provisioning. diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig index 5bac803ea367..ab6c95b895b3 100644 --- a/fs/bcachefs/Kconfig +++ b/fs/bcachefs/Kconfig @@ -24,6 +24,7 @@ config BCACHEFS_FS select XXHASH select SRCU select SYMBOLIC_ERRNAME + select MIN_HEAP help The bcachefs filesystem - a modern, copy on write filesystem, with support for multiple devices, compression, checksumming, etc. diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index 43a7b9dcf15e..0abb21173979 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -40,7 +40,7 @@ struct min_heap_callbacks { /* Initialize a min-heap. */ static __always_inline -void __min_heap_init(min_heap_char *heap, void *data, int size) +void __min_heap_init_inline(min_heap_char *heap, void *data, int size) { heap->nr = 0; heap->size = size; @@ -50,33 +50,33 @@ void __min_heap_init(min_heap_char *heap, void *data, int size) heap->data = heap->preallocated; } -#define min_heap_init(_heap, _data, _size) \ - __min_heap_init((min_heap_char *)_heap, _data, _size) +#define min_heap_init_inline(_heap, _data, _size) \ + __min_heap_init_inline((min_heap_char *)_heap, _data, _size) /* Get the minimum element from the heap. */ static __always_inline -void *__min_heap_peek(struct min_heap_char *heap) +void *__min_heap_peek_inline(struct min_heap_char *heap) { return heap->nr ? heap->data : NULL; } -#define min_heap_peek(_heap) \ - (__minheap_cast(_heap) __min_heap_peek((min_heap_char *)_heap)) +#define min_heap_peek_inline(_heap) \ + (__minheap_cast(_heap) __min_heap_peek_inline((min_heap_char *)_heap)) /* Check if the heap is full. */ static __always_inline -bool __min_heap_full(min_heap_char *heap) +bool __min_heap_full_inline(min_heap_char *heap) { return heap->nr == heap->size; } -#define min_heap_full(_heap) \ - __min_heap_full((min_heap_char *)_heap) +#define min_heap_full_inline(_heap) \ + __min_heap_full_inline((min_heap_char *)_heap) /* Sift the element at pos down the heap. */ static __always_inline -void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size, - const struct min_heap_callbacks *func, void *args) +void __min_heap_sift_down_inline(min_heap_char *heap, int pos, size_t elem_size, + const struct min_heap_callbacks *func, void *args) { void *left, *right; void *data = heap->data; @@ -108,13 +108,14 @@ void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size, } } -#define min_heap_sift_down(_heap, _pos, _func, _args) \ - __min_heap_sift_down((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args) +#define min_heap_sift_down_inline(_heap, _pos, _func, _args) \ + __min_heap_sift_down_inline((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), \ + _func, _args) /* Sift up ith element from the heap, O(log2(nr)). */ static __always_inline -void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx, - const struct min_heap_callbacks *func, void *args) +void __min_heap_sift_up_inline(min_heap_char *heap, size_t elem_size, size_t idx, + const struct min_heap_callbacks *func, void *args) { void *data = heap->data; size_t parent; @@ -128,27 +129,28 @@ void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx, } } -#define min_heap_sift_up(_heap, _idx, _func, _args) \ - __min_heap_sift_up((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args) +#define min_heap_sift_up_inline(_heap, _idx, _func, _args) \ + __min_heap_sift_up_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, \ + _func, _args) /* Floyd's approach to heapification that is O(nr). */ static __always_inline -void __min_heapify_all(min_heap_char *heap, size_t elem_size, - const struct min_heap_callbacks *func, void *args) +void __min_heapify_all_inline(min_heap_char *heap, size_t elem_size, + const struct min_heap_callbacks *func, void *args) { int i; for (i = heap->nr / 2 - 1; i >= 0; i--) - __min_heap_sift_down(heap, i, elem_size, func, args); + __min_heap_sift_down_inline(heap, i, elem_size, func, args); } -#define min_heapify_all(_heap, _func, _args) \ - __min_heapify_all((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) +#define min_heapify_all_inline(_heap, _func, _args) \ + __min_heapify_all_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) /* Remove minimum element from the heap, O(log2(nr)). */ static __always_inline -bool __min_heap_pop(min_heap_char *heap, size_t elem_size, - const struct min_heap_callbacks *func, void *args) +bool __min_heap_pop_inline(min_heap_char *heap, size_t elem_size, + const struct min_heap_callbacks *func, void *args) { void *data = heap->data; @@ -158,13 +160,13 @@ bool __min_heap_pop(min_heap_char *heap, size_t elem_size, /* Place last element at the root (position 0) and then sift down. */ heap->nr--; memcpy(data, data + (heap->nr * elem_size), elem_size); - __min_heap_sift_down(heap, 0, elem_size, func, args); + __min_heap_sift_down_inline(heap, 0, elem_size, func, args); return true; } -#define min_heap_pop(_heap, _func, _args) \ - __min_heap_pop((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) +#define min_heap_pop_inline(_heap, _func, _args) \ + __min_heap_pop_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) /* * Remove the minimum element and then push the given element. The @@ -172,22 +174,21 @@ bool __min_heap_pop(min_heap_char *heap, size_t elem_size, * efficient than a pop followed by a push that does 2. */ static __always_inline -void __min_heap_pop_push(min_heap_char *heap, - const void *element, size_t elem_size, - const struct min_heap_callbacks *func, - void *args) +void __min_heap_pop_push_inline(min_heap_char *heap, const void *element, size_t elem_size, + const struct min_heap_callbacks *func, void *args) { memcpy(heap->data, element, elem_size); - __min_heap_sift_down(heap, 0, elem_size, func, args); + __min_heap_sift_down_inline(heap, 0, elem_size, func, args); } -#define min_heap_pop_push(_heap, _element, _func, _args) \ - __min_heap_pop_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args) +#define min_heap_pop_push_inline(_heap, _element, _func, _args) \ + __min_heap_pop_push_inline((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), \ + _func, _args) /* Push an element on to the heap, O(log2(nr)). */ static __always_inline -bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, - const struct min_heap_callbacks *func, void *args) +bool __min_heap_push_inline(min_heap_char *heap, const void *element, size_t elem_size, + const struct min_heap_callbacks *func, void *args) { void *data = heap->data; int pos; @@ -201,18 +202,19 @@ bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, heap->nr++; /* Sift child at pos up. */ - __min_heap_sift_up(heap, elem_size, pos, func, args); + __min_heap_sift_up_inline(heap, elem_size, pos, func, args); return true; } -#define min_heap_push(_heap, _element, _func, _args) \ - __min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args) +#define min_heap_push_inline(_heap, _element, _func, _args) \ + __min_heap_push_inline((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), \ + _func, _args) /* Remove ith element from the heap, O(log2(nr)). */ static __always_inline -bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx, - const struct min_heap_callbacks *func, void *args) +bool __min_heap_del_inline(min_heap_char *heap, size_t elem_size, size_t idx, + const struct min_heap_callbacks *func, void *args) { void *data = heap->data; @@ -224,12 +226,53 @@ bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx, if (idx == heap->nr) return true; func->swp(data + (idx * elem_size), data + (heap->nr * elem_size), args); - __min_heap_sift_up(heap, elem_size, idx, func, args); - __min_heap_sift_down(heap, idx, elem_size, func, args); + __min_heap_sift_up_inline(heap, elem_size, idx, func, args); + __min_heap_sift_down_inline(heap, idx, elem_size, func, args); return true; } +#define min_heap_del_inline(_heap, _idx, _func, _args) \ + __min_heap_del_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, \ + _func, _args) + +void __min_heap_init(min_heap_char *heap, void *data, int size); +void *__min_heap_peek(struct min_heap_char *heap); +bool __min_heap_full(min_heap_char *heap); +void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size, + const struct min_heap_callbacks *func, void *args); +void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx, + const struct min_heap_callbacks *func, void *args); +void __min_heapify_all(min_heap_char *heap, size_t elem_size, + const struct min_heap_callbacks *func, void *args); +bool __min_heap_pop(min_heap_char *heap, size_t elem_size, + const struct min_heap_callbacks *func, void *args); +void __min_heap_pop_push(min_heap_char *heap, const void *element, size_t elem_size, + const struct min_heap_callbacks *func, void *args); +bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, + const struct min_heap_callbacks *func, void *args); +bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx, + const struct min_heap_callbacks *func, void *args); + +#define min_heap_init(_heap, _data, _size) \ + __min_heap_init((min_heap_char *)_heap, _data, _size) +#define min_heap_peek(_heap) \ + (__minheap_cast(_heap) __min_heap_peek((min_heap_char *)_heap)) +#define min_heap_full(_heap) \ + __min_heap_full((min_heap_char *)_heap) +#define min_heap_sift_down(_heap, _pos, _func, _args) \ + __min_heap_sift_down((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args) +#define min_heap_sift_up(_heap, _idx, _func, _args) \ + __min_heap_sift_up((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args) +#define min_heapify_all(_heap, _func, _args) \ + __min_heapify_all((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) +#define min_heap_pop(_heap, _func, _args) \ + __min_heap_pop((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) +#define min_heap_pop_push(_heap, _element, _func, _args) \ + __min_heap_pop_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), \ + _func, _args) +#define min_heap_push(_heap, _element, _func, _args) \ + __min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args) #define min_heap_del(_heap, _idx, _func, _args) \ __min_heap_del((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args) diff --git a/kernel/events/core.c b/kernel/events/core.c index df27d08a7232..1b3c1198b2af 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3870,7 +3870,7 @@ static noinline int visit_groups_merge(struct perf_event_context *ctx, perf_assert_pmu_disabled((*evt)->pmu_ctx->pmu); } - min_heapify_all(&event_heap, &perf_min_heap, NULL); + min_heapify_all_inline(&event_heap, &perf_min_heap, NULL); while (event_heap.nr) { ret = func(*evt, data); @@ -3879,9 +3879,9 @@ static noinline int visit_groups_merge(struct perf_event_context *ctx, *evt = perf_event_groups_next(*evt, pmu); if (*evt) - min_heap_sift_down(&event_heap, 0, &perf_min_heap, NULL); + min_heap_sift_down_inline(&event_heap, 0, &perf_min_heap, NULL); else - min_heap_pop(&event_heap, &perf_min_heap, NULL); + min_heap_pop_inline(&event_heap, &perf_min_heap, NULL); } return 0; diff --git a/lib/Kconfig b/lib/Kconfig index cf303bd91dda..f5a2781669ea 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -780,3 +780,6 @@ config FIRMWARE_TABLE config UNION_FIND bool + +config MIN_HEAP + bool diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index eda319e9d569..2549b64b2280 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2279,6 +2279,7 @@ config TEST_LIST_SORT config TEST_MIN_HEAP tristate "Min heap test" depends on DEBUG_KERNEL || m + select MIN_HEAP help Enable this to turn on min heap function tests. This test is executed only once during system boot (so affects only boot time), diff --git a/lib/Makefile b/lib/Makefile index feebed74fc7a..1eb89962daef 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -40,6 +40,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ lib-$(CONFIG_UNION_FIND) += union_find.o lib-$(CONFIG_PRINTK) += dump_stack.o lib-$(CONFIG_SMP) += cpumask.o +lib-$(CONFIG_MIN_HEAP) += min_heap.o lib-y += kobject.o klist.o obj-y += lockref.o diff --git a/lib/min_heap.c b/lib/min_heap.c new file mode 100644 index 000000000000..4485372ff3b1 --- /dev/null +++ b/lib/min_heap.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +void __min_heap_init(min_heap_char *heap, void *data, int size) +{ + __min_heap_init_inline(heap, data, size); +} +EXPORT_SYMBOL(__min_heap_init); + +void *__min_heap_peek(struct min_heap_char *heap) +{ + return __min_heap_peek_inline(heap); +} +EXPORT_SYMBOL(__min_heap_peek); + +bool __min_heap_full(min_heap_char *heap) +{ + return __min_heap_full_inline(heap); +} +EXPORT_SYMBOL(__min_heap_full); + +void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size, + const struct min_heap_callbacks *func, void *args) +{ + __min_heap_sift_down_inline(heap, pos, elem_size, func, args); +} +EXPORT_SYMBOL(__min_heap_sift_down); + +void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx, + const struct min_heap_callbacks *func, void *args) +{ + __min_heap_sift_up_inline(heap, elem_size, idx, func, args); +} +EXPORT_SYMBOL(__min_heap_sift_up); + +void __min_heapify_all(min_heap_char *heap, size_t elem_size, + const struct min_heap_callbacks *func, void *args) +{ + __min_heapify_all_inline(heap, elem_size, func, args); +} +EXPORT_SYMBOL(__min_heapify_all); + +bool __min_heap_pop(min_heap_char *heap, size_t elem_size, + const struct min_heap_callbacks *func, void *args) +{ + return __min_heap_pop_inline(heap, elem_size, func, args); +} +EXPORT_SYMBOL(__min_heap_pop); + +void __min_heap_pop_push(min_heap_char *heap, const void *element, size_t elem_size, + const struct min_heap_callbacks *func, void *args) +{ + __min_heap_pop_push_inline(heap, element, elem_size, func, args); +} +EXPORT_SYMBOL(__min_heap_pop_push); + +bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, + const struct min_heap_callbacks *func, void *args) +{ + return __min_heap_push_inline(heap, element, elem_size, func, args); +} +EXPORT_SYMBOL(__min_heap_push); + +bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx, + const struct min_heap_callbacks *func, void *args) +{ + return __min_heap_del_inline(heap, elem_size, idx, func, args); +} +EXPORT_SYMBOL(__min_heap_del); -- cgit v1.2.3-70-g09d2 From aa5888afc2347ebb394c2c4b694fa3026775009e Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Sun, 20 Oct 2024 12:01:52 +0800 Subject: lib min_heap: optimize min heap by prescaling counters for better performance Improve the efficiency of the min heap by prescaling counters, eliminating the need to repeatedly compute 'index * element_size' when accessing elements. By doing so, we avoid the overhead associated with recalculating the byte offset for each heap operation. However, with prescaling, the calculation for the parent element's location is no longer as simple as '(i - 1) / 2'. To address this, we copy the parent function from 'lib/sort.c', which calculates the parent offset in a branchless manner without using any division instructions. This optimization should result in a more efficient heap implementation by reducing the computational overhead of finding parent and child offsets. Link: https://lkml.kernel.org/r/20241020040200.939973-3-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Cc: Adrian Hunter Cc: Arnaldo Carvalho de Melo Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Jonathan Corbet Cc: Kent Overstreet Cc: "Liang, Kan" Cc: Mark Rutland Cc: Matthew Sakai Cc: Matthew Wilcox (Oracle) Cc: Namhyung Kim Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- include/linux/min_heap.h | 73 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index 0abb21173979..41b092a14238 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -38,6 +38,32 @@ struct min_heap_callbacks { void (*swp)(void *lhs, void *rhs, void *args); }; +/** + * parent - given the offset of the child, find the offset of the parent. + * @i: the offset of the heap element whose parent is sought. Non-zero. + * @lsbit: a precomputed 1-bit mask, equal to "size & -size" + * @size: size of each element + * + * In terms of array indexes, the parent of element j = @i/@size is simply + * (j-1)/2. But when working in byte offsets, we can't use implicit + * truncation of integer divides. + * + * Fortunately, we only need one bit of the quotient, not the full divide. + * @size has a least significant bit. That bit will be clear if @i is + * an even multiple of @size, and set if it's an odd multiple. + * + * Logically, we're doing "if (i & lsbit) i -= size;", but since the + * branch is unpredictable, it's done with a bit of clever branch-free + * code instead. + */ +__attribute_const__ __always_inline +static size_t parent(size_t i, unsigned int lsbit, size_t size) +{ + i -= size; + i -= size & -(i & lsbit); + return i / 2; +} + /* Initialize a min-heap. */ static __always_inline void __min_heap_init_inline(min_heap_char *heap, void *data, int size) @@ -78,33 +104,30 @@ static __always_inline void __min_heap_sift_down_inline(min_heap_char *heap, int pos, size_t elem_size, const struct min_heap_callbacks *func, void *args) { - void *left, *right; + const unsigned long lsbit = elem_size & -elem_size; void *data = heap->data; - void *root = data + pos * elem_size; - int i = pos, j; + /* pre-scale counters for performance */ + size_t a = pos * elem_size; + size_t b, c, d; + size_t n = heap->nr * elem_size; /* Find the sift-down path all the way to the leaves. */ - for (;;) { - if (i * 2 + 2 >= heap->nr) - break; - left = data + (i * 2 + 1) * elem_size; - right = data + (i * 2 + 2) * elem_size; - i = func->less(left, right, args) ? i * 2 + 1 : i * 2 + 2; - } + for (b = a; c = 2 * b + elem_size, (d = c + elem_size) < n;) + b = func->less(data + c, data + d, args) ? c : d; /* Special case for the last leaf with no sibling. */ - if (i * 2 + 2 == heap->nr) - i = i * 2 + 1; + if (d == n) + b = c; /* Backtrack to the correct location. */ - while (i != pos && func->less(root, data + i * elem_size, args)) - i = (i - 1) / 2; + while (b != a && func->less(data + a, data + b, args)) + b = parent(b, lsbit, elem_size); /* Shift the element into its correct place. */ - j = i; - while (i != pos) { - i = (i - 1) / 2; - func->swp(data + i * elem_size, data + j * elem_size, args); + c = b; + while (b != a) { + b = parent(b, lsbit, elem_size); + func->swp(data + b, data + c, args); } } @@ -117,15 +140,17 @@ static __always_inline void __min_heap_sift_up_inline(min_heap_char *heap, size_t elem_size, size_t idx, const struct min_heap_callbacks *func, void *args) { + const unsigned long lsbit = elem_size & -elem_size; void *data = heap->data; - size_t parent; + /* pre-scale counters for performance */ + size_t a = idx * elem_size, b; - while (idx) { - parent = (idx - 1) / 2; - if (func->less(data + parent * elem_size, data + idx * elem_size, args)) + while (a) { + b = parent(a, lsbit, elem_size); + if (func->less(data + b, data + a, args)) break; - func->swp(data + parent * elem_size, data + idx * elem_size, args); - idx = parent; + func->swp(data + a, data + b, args); + a = b; } } -- cgit v1.2.3-70-g09d2 From 03ec56d084611b5a4dc06ffa74db0928616e4d7f Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Sun, 20 Oct 2024 12:01:53 +0800 Subject: lib min_heap: avoid indirect function call by providing default swap The non-inline min heap API can result in an indirect function call to the custom swap function. This becomes particularly costly when CONFIG_MITIGATION_RETPOLINE is enabled, as indirect function calls are expensive in this case. To address this, copy the code from lib/sort.c and provide a default builtin swap implementation that performs element swaps based on the element size. This change allows most users to avoid the overhead of indirect function calls, improving efficiency. Link: https://lkml.kernel.org/r/20241020040200.939973-4-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Cc: Adrian Hunter Cc: Arnaldo Carvalho de Melo Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Jonathan Corbet Cc: Kent Overstreet Cc: "Liang, Kan" Cc: Mark Rutland Cc: Matthew Sakai Cc: Matthew Wilcox (Oracle) Cc: Namhyung Kim Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- include/linux/min_heap.h | 159 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 156 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index 41b092a14238..e781727c8916 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -38,6 +38,147 @@ struct min_heap_callbacks { void (*swp)(void *lhs, void *rhs, void *args); }; +/** + * is_aligned - is this pointer & size okay for word-wide copying? + * @base: pointer to data + * @size: size of each element + * @align: required alignment (typically 4 or 8) + * + * Returns true if elements can be copied using word loads and stores. + * The size must be a multiple of the alignment, and the base address must + * be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS. + * + * For some reason, gcc doesn't know to optimize "if (a & mask || b & mask)" + * to "if ((a | b) & mask)", so we do that by hand. + */ +__attribute_const__ __always_inline +static bool is_aligned(const void *base, size_t size, unsigned char align) +{ + unsigned char lsbits = (unsigned char)size; + + (void)base; +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + lsbits |= (unsigned char)(uintptr_t)base; +#endif + return (lsbits & (align - 1)) == 0; +} + +/** + * swap_words_32 - swap two elements in 32-bit chunks + * @a: pointer to the first element to swap + * @b: pointer to the second element to swap + * @n: element size (must be a multiple of 4) + * + * Exchange the two objects in memory. This exploits base+index addressing, + * which basically all CPUs have, to minimize loop overhead computations. + * + * For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the + * bottom of the loop, even though the zero flag is still valid from the + * subtract (since the intervening mov instructions don't alter the flags). + * Gcc 8.1.0 doesn't have that problem. + */ +static __always_inline +void swap_words_32(void *a, void *b, size_t n) +{ + do { + u32 t = *(u32 *)(a + (n -= 4)); + *(u32 *)(a + n) = *(u32 *)(b + n); + *(u32 *)(b + n) = t; + } while (n); +} + +/** + * swap_words_64 - swap two elements in 64-bit chunks + * @a: pointer to the first element to swap + * @b: pointer to the second element to swap + * @n: element size (must be a multiple of 8) + * + * Exchange the two objects in memory. This exploits base+index + * addressing, which basically all CPUs have, to minimize loop overhead + * computations. + * + * We'd like to use 64-bit loads if possible. If they're not, emulating + * one requires base+index+4 addressing which x86 has but most other + * processors do not. If CONFIG_64BIT, we definitely have 64-bit loads, + * but it's possible to have 64-bit loads without 64-bit pointers (e.g. + * x32 ABI). Are there any cases the kernel needs to worry about? + */ +static __always_inline +void swap_words_64(void *a, void *b, size_t n) +{ + do { +#ifdef CONFIG_64BIT + u64 t = *(u64 *)(a + (n -= 8)); + *(u64 *)(a + n) = *(u64 *)(b + n); + *(u64 *)(b + n) = t; +#else + /* Use two 32-bit transfers to avoid base+index+4 addressing */ + u32 t = *(u32 *)(a + (n -= 4)); + *(u32 *)(a + n) = *(u32 *)(b + n); + *(u32 *)(b + n) = t; + + t = *(u32 *)(a + (n -= 4)); + *(u32 *)(a + n) = *(u32 *)(b + n); + *(u32 *)(b + n) = t; +#endif + } while (n); +} + +/** + * swap_bytes - swap two elements a byte at a time + * @a: pointer to the first element to swap + * @b: pointer to the second element to swap + * @n: element size + * + * This is the fallback if alignment doesn't allow using larger chunks. + */ +static __always_inline +void swap_bytes(void *a, void *b, size_t n) +{ + do { + char t = ((char *)a)[--n]; + ((char *)a)[n] = ((char *)b)[n]; + ((char *)b)[n] = t; + } while (n); +} + +/* + * The values are arbitrary as long as they can't be confused with + * a pointer, but small integers make for the smallest compare + * instructions. + */ +#define SWAP_WORDS_64 ((void (*)(void *, void *, void *))0) +#define SWAP_WORDS_32 ((void (*)(void *, void *, void *))1) +#define SWAP_BYTES ((void (*)(void *, void *, void *))2) + +/* + * Selects the appropriate swap function based on the element size. + */ +static __always_inline +void *select_swap_func(const void *base, size_t size) +{ + if (is_aligned(base, size, 8)) + return SWAP_WORDS_64; + else if (is_aligned(base, size, 4)) + return SWAP_WORDS_32; + else + return SWAP_BYTES; +} + +static __always_inline +void do_swap(void *a, void *b, size_t size, void (*swap_func)(void *lhs, void *rhs, void *args), + void *priv) +{ + if (swap_func == SWAP_WORDS_64) + swap_words_64(a, b, size); + else if (swap_func == SWAP_WORDS_32) + swap_words_32(a, b, size); + else if (swap_func == SWAP_BYTES) + swap_bytes(a, b, size); + else + swap_func(a, b, priv); +} + /** * parent - given the offset of the child, find the offset of the parent. * @i: the offset of the heap element whose parent is sought. Non-zero. @@ -106,11 +247,15 @@ void __min_heap_sift_down_inline(min_heap_char *heap, int pos, size_t elem_size, { const unsigned long lsbit = elem_size & -elem_size; void *data = heap->data; + void (*swp)(void *lhs, void *rhs, void *args) = func->swp; /* pre-scale counters for performance */ size_t a = pos * elem_size; size_t b, c, d; size_t n = heap->nr * elem_size; + if (!swp) + swp = select_swap_func(data, elem_size); + /* Find the sift-down path all the way to the leaves. */ for (b = a; c = 2 * b + elem_size, (d = c + elem_size) < n;) b = func->less(data + c, data + d, args) ? c : d; @@ -127,7 +272,7 @@ void __min_heap_sift_down_inline(min_heap_char *heap, int pos, size_t elem_size, c = b; while (b != a) { b = parent(b, lsbit, elem_size); - func->swp(data + b, data + c, args); + do_swap(data + b, data + c, elem_size, swp, args); } } @@ -142,14 +287,18 @@ void __min_heap_sift_up_inline(min_heap_char *heap, size_t elem_size, size_t idx { const unsigned long lsbit = elem_size & -elem_size; void *data = heap->data; + void (*swp)(void *lhs, void *rhs, void *args) = func->swp; /* pre-scale counters for performance */ size_t a = idx * elem_size, b; + if (!swp) + swp = select_swap_func(data, elem_size); + while (a) { b = parent(a, lsbit, elem_size); if (func->less(data + b, data + a, args)) break; - func->swp(data + a, data + b, args); + do_swap(data + a, data + b, elem_size, swp, args); a = b; } } @@ -242,15 +391,19 @@ bool __min_heap_del_inline(min_heap_char *heap, size_t elem_size, size_t idx, const struct min_heap_callbacks *func, void *args) { void *data = heap->data; + void (*swp)(void *lhs, void *rhs, void *args) = func->swp; if (WARN_ONCE(heap->nr <= 0, "Popping an empty heap")) return false; + if (!swp) + swp = select_swap_func(data, elem_size); + /* Place last element at the root (position 0) and then sift down. */ heap->nr--; if (idx == heap->nr) return true; - func->swp(data + (idx * elem_size), data + (heap->nr * elem_size), args); + do_swap(data + (idx * elem_size), data + (heap->nr * elem_size), elem_size, swp, args); __min_heap_sift_up_inline(heap, elem_size, idx, func, args); __min_heap_sift_down_inline(heap, idx, elem_size, func, args); -- cgit v1.2.3-70-g09d2 From a7306f3c283bfe03611229bb6280987aae2af8f9 Mon Sep 17 00:00:00 2001 From: Nataniel Farzan Date: Mon, 4 Nov 2024 19:22:32 -0800 Subject: Improve consistency of '#error' directive messages Remove the use of contractions and use proper punctuation in #error directive messages that discourage the direct inclusion of header files. Link: https://lkml.kernel.org/r/20241105032231.28833-1-natanielfarzan@gmail.com Signed-off-by: Nataniel Farzan Signed-off-by: Andrew Morton --- arch/alpha/include/asm/spinlock_types.h | 2 +- arch/arm/include/asm/spinlock_types.h | 2 +- arch/arm64/include/asm/spinlock_types.h | 2 +- arch/hexagon/include/asm/spinlock_types.h | 2 +- arch/powerpc/include/asm/simple_spinlock_types.h | 2 +- arch/powerpc/include/asm/spinlock_types.h | 2 +- arch/s390/include/asm/spinlock_types.h | 2 +- arch/sh/include/asm/spinlock_types.h | 2 +- arch/xtensa/include/asm/spinlock_types.h | 2 +- include/acpi/platform/aclinux.h | 2 +- include/linux/compiler-clang.h | 2 +- include/linux/compiler-gcc.h | 2 +- include/linux/pm_wakeup.h | 2 +- include/linux/rwlock.h | 2 +- include/linux/rwlock_api_smp.h | 2 +- include/linux/spinlock_api_smp.h | 2 +- include/linux/spinlock_types_up.h | 2 +- include/linux/spinlock_up.h | 2 +- tools/include/linux/compiler-gcc.h | 2 +- 19 files changed, 19 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/include/asm/spinlock_types.h b/arch/alpha/include/asm/spinlock_types.h index 2526fd3be5fd..05a444d77c53 100644 --- a/arch/alpha/include/asm/spinlock_types.h +++ b/arch/alpha/include/asm/spinlock_types.h @@ -3,7 +3,7 @@ #define _ALPHA_SPINLOCK_TYPES_H #ifndef __LINUX_SPINLOCK_TYPES_RAW_H -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif typedef struct { diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h index 0c14b36ef101..5404a2a96bf3 100644 --- a/arch/arm/include/asm/spinlock_types.h +++ b/arch/arm/include/asm/spinlock_types.h @@ -3,7 +3,7 @@ #define __ASM_SPINLOCK_TYPES_H #ifndef __LINUX_SPINLOCK_TYPES_RAW_H -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif #define TICKET_SHIFT 16 diff --git a/arch/arm64/include/asm/spinlock_types.h b/arch/arm64/include/asm/spinlock_types.h index 11ab1c077697..7cde3d8bd0ad 100644 --- a/arch/arm64/include/asm/spinlock_types.h +++ b/arch/arm64/include/asm/spinlock_types.h @@ -6,7 +6,7 @@ #define __ASM_SPINLOCK_TYPES_H #if !defined(__LINUX_SPINLOCK_TYPES_RAW_H) && !defined(__ASM_SPINLOCK_H) -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif #include diff --git a/arch/hexagon/include/asm/spinlock_types.h b/arch/hexagon/include/asm/spinlock_types.h index d5f66495b670..63add2d863e8 100644 --- a/arch/hexagon/include/asm/spinlock_types.h +++ b/arch/hexagon/include/asm/spinlock_types.h @@ -9,7 +9,7 @@ #define _ASM_SPINLOCK_TYPES_H #ifndef __LINUX_SPINLOCK_TYPES_RAW_H -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif typedef struct { diff --git a/arch/powerpc/include/asm/simple_spinlock_types.h b/arch/powerpc/include/asm/simple_spinlock_types.h index 08243338069d..391fc19f7272 100644 --- a/arch/powerpc/include/asm/simple_spinlock_types.h +++ b/arch/powerpc/include/asm/simple_spinlock_types.h @@ -3,7 +3,7 @@ #define _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H #ifndef __LINUX_SPINLOCK_TYPES_RAW_H -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif typedef struct { diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h index 40b01446cf75..569765fa16bc 100644 --- a/arch/powerpc/include/asm/spinlock_types.h +++ b/arch/powerpc/include/asm/spinlock_types.h @@ -3,7 +3,7 @@ #define _ASM_POWERPC_SPINLOCK_TYPES_H #ifndef __LINUX_SPINLOCK_TYPES_RAW_H -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif #ifdef CONFIG_PPC_QUEUED_SPINLOCKS diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h index b69695e39957..3653ff57d6d9 100644 --- a/arch/s390/include/asm/spinlock_types.h +++ b/arch/s390/include/asm/spinlock_types.h @@ -3,7 +3,7 @@ #define __ASM_SPINLOCK_TYPES_H #ifndef __LINUX_SPINLOCK_TYPES_RAW_H -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif typedef struct { diff --git a/arch/sh/include/asm/spinlock_types.h b/arch/sh/include/asm/spinlock_types.h index 907bda4b1619..7cb50e68448f 100644 --- a/arch/sh/include/asm/spinlock_types.h +++ b/arch/sh/include/asm/spinlock_types.h @@ -3,7 +3,7 @@ #define __ASM_SH_SPINLOCK_TYPES_H #ifndef __LINUX_SPINLOCK_TYPES_RAW_H -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif typedef struct { diff --git a/arch/xtensa/include/asm/spinlock_types.h b/arch/xtensa/include/asm/spinlock_types.h index 797aed7df3dd..6baaeac6248b 100644 --- a/arch/xtensa/include/asm/spinlock_types.h +++ b/arch/xtensa/include/asm/spinlock_types.h @@ -3,7 +3,7 @@ #define __ASM_SPINLOCK_TYPES_H #if !defined(__LINUX_SPINLOCK_TYPES_RAW_H) && !defined(__ASM_SPINLOCK_H) -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif #include diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h index 565341c826e3..f3249b7df5cb 100644 --- a/include/acpi/platform/aclinux.h +++ b/include/acpi/platform/aclinux.h @@ -15,7 +15,7 @@ /* ACPICA external files should not include ACPICA headers directly. */ #if !defined(BUILDING_ACPICA) && !defined(_LINUX_ACPI_H) -#error "Please don't include directly, include instead." +#error "Please do not include directly, include instead." #endif #endif diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 4c1a39dcb624..2e7c2c282f3a 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_COMPILER_TYPES_H -#error "Please don't include directly, include instead." +#error "Please do not include directly, include instead." #endif /* Compiler specific definitions for Clang compiler */ diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index cd6f9aae311f..d0ed9583743f 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_COMPILER_TYPES_H -#error "Please don't include directly, include instead." +#error "Please do not include directly, include instead." #endif /* diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 76cd1f9f1365..222f7530806c 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -10,7 +10,7 @@ #define _LINUX_PM_WAKEUP_H #ifndef _DEVICE_H_ -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif #include diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h index c0ef596f340b..5b87c6f4a243 100644 --- a/include/linux/rwlock.h +++ b/include/linux/rwlock.h @@ -2,7 +2,7 @@ #define __LINUX_RWLOCK_H #ifndef __LINUX_INSIDE_SPINLOCK_H -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif /* diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h index dceb0a59b692..31d3d1116323 100644 --- a/include/linux/rwlock_api_smp.h +++ b/include/linux/rwlock_api_smp.h @@ -2,7 +2,7 @@ #define __LINUX_RWLOCK_API_SMP_H #ifndef __LINUX_SPINLOCK_API_SMP_H -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif /* diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 89eb6f4c659c..9ecb0ab504e3 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -2,7 +2,7 @@ #define __LINUX_SPINLOCK_API_SMP_H #ifndef __LINUX_INSIDE_SPINLOCK_H -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif /* diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h index 7f86a2016ac5..fc4e2d017c20 100644 --- a/include/linux/spinlock_types_up.h +++ b/include/linux/spinlock_types_up.h @@ -2,7 +2,7 @@ #define __LINUX_SPINLOCK_TYPES_UP_H #ifndef __LINUX_SPINLOCK_TYPES_RAW_H -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif /* diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index c87204247592..1e84e71ca495 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -2,7 +2,7 @@ #define __LINUX_SPINLOCK_UP_H #ifndef __LINUX_INSIDE_SPINLOCK_H -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif #include /* for cpu_relax() */ diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index 62e7c901ac28..e20f98e14e81 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _TOOLS_LINUX_COMPILER_H_ -#error "Please don't include directly, include instead." +#error "Please do not include directly, include instead." #endif /* -- cgit v1.2.3-70-g09d2 From bc73b4186736341ab5cd2c199da82db6e1134e13 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Tue, 5 Nov 2024 16:54:05 +0200 Subject: util_macros.h: fix/rework find_closest() macros A bug was found in the find_closest() (find_closest_descending() is also affected after some testing), where for certain values with small progressions, the rounding (done by averaging 2 values) causes an incorrect index to be returned. The rounding issues occur for progressions of 1, 2 and 3. It goes away when the progression/interval between two values is 4 or larger. It's particularly bad for progressions of 1. For example if there's an array of 'a = { 1, 2, 3 }', using 'find_closest(2, a ...)' would return 0 (the index of '1'), rather than returning 1 (the index of '2'). This means that for exact values (with a progression of 1), find_closest() will misbehave and return the index of the value smaller than the one we're searching for. For progressions of 2 and 3, the exact values are obtained correctly; but values aren't approximated correctly (as one would expect). Starting with progressions of 4, all seems to be good (one gets what one would expect). While one could argue that 'find_closest()' should not be used for arrays with progressions of 1 (i.e. '{1, 2, 3, ...}', the macro should still behave correctly. The bug was found while testing the 'drivers/iio/adc/ad7606.c', specifically the oversampling feature. For reference, the oversampling values are listed as: static const unsigned int ad7606_oversampling_avail[7] = { 1, 2, 4, 8, 16, 32, 64, }; When doing: 1. $ echo 1 > /sys/bus/iio/devices/iio\:device0/oversampling_ratio $ cat /sys/bus/iio/devices/iio\:device0/oversampling_ratio 1 # this is fine 2. $ echo 2 > /sys/bus/iio/devices/iio\:device0/oversampling_ratio $ cat /sys/bus/iio/devices/iio\:device0/oversampling_ratio 1 # this is wrong; 2 should be returned here 3. $ echo 3 > /sys/bus/iio/devices/iio\:device0/oversampling_ratio $ cat /sys/bus/iio/devices/iio\:device0/oversampling_ratio 2 # this is fine 4. $ echo 4 > /sys/bus/iio/devices/iio\:device0/oversampling_ratio $ cat /sys/bus/iio/devices/iio\:device0/oversampling_ratio 4 # this is fine And from here-on, the values are as correct (one gets what one would expect.) While writing a kunit test for this bug, a peculiar issue was found for the array in the 'drivers/hwmon/ina2xx.c' & 'drivers/iio/adc/ina2xx-adc.c' drivers. While running the kunit test (for 'ina226_avg_tab' from these drivers): * idx = find_closest([-1 to 2], ina226_avg_tab, ARRAY_SIZE(ina226_avg_tab)); This returns idx == 0, so value. * idx = find_closest(3, ina226_avg_tab, ARRAY_SIZE(ina226_avg_tab)); This returns idx == 0, value 1; and now one could argue whether 3 is closer to 4 or to 1. This quirk only appears for value '3' in this array, but it seems to be a another rounding issue. * And from 4 onwards the 'find_closest'() works fine (one gets what one would expect). This change reworks the find_closest() macros to also check the difference between the left and right elements when 'x'. If the distance to the right is smaller (than the distance to the left), the index is incremented by 1. This also makes redundant the need for using the DIV_ROUND_CLOSEST() macro. In order to accommodate for any mix of negative + positive values, the internal variables '__fc_x', '__fc_mid_x', '__fc_left' & '__fc_right' are forced to 'long' type. This also addresses any potential bugs/issues with 'x' being of an unsigned type. In those situations any comparison between signed & unsigned would be promoted to a comparison between 2 unsigned numbers; this is especially annoying when '__fc_left' & '__fc_right' underflow. The find_closest_descending() macro was also reworked and duplicated from the find_closest(), and it is being iterated in reverse. The main reason for this is to get the same indices as 'find_closest()' (but in reverse). The comparison for '__fc_right < __fc_left' favors going the array in ascending order. For example for array '{ 1024, 512, 256, 128, 64, 16, 4, 1 }' and x = 3, we get: __fc_mid_x = 2 __fc_left = -1 __fc_right = -2 Then '__fc_right < __fc_left' evaluates to true and '__fc_i++' becomes 7 which is not quite incorrect, but 3 is closer to 4 than to 1. This change has been validated with the kunit from the next patch. Link: https://lkml.kernel.org/r/20241105145406.554365-1-aardelean@baylibre.com Fixes: 95d119528b0b ("util_macros.h: add find_closest() macro") Signed-off-by: Alexandru Ardelean Cc: Bartosz Golaszewski Cc: Greg Kroah-Hartman Cc: Signed-off-by: Andrew Morton --- include/linux/util_macros.h | 56 ++++++++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h index 6bb460c3e818..825487fb66fa 100644 --- a/include/linux/util_macros.h +++ b/include/linux/util_macros.h @@ -4,19 +4,6 @@ #include -#define __find_closest(x, a, as, op) \ -({ \ - typeof(as) __fc_i, __fc_as = (as) - 1; \ - typeof(x) __fc_x = (x); \ - typeof(*a) const *__fc_a = (a); \ - for (__fc_i = 0; __fc_i < __fc_as; __fc_i++) { \ - if (__fc_x op DIV_ROUND_CLOSEST(__fc_a[__fc_i] + \ - __fc_a[__fc_i + 1], 2)) \ - break; \ - } \ - (__fc_i); \ -}) - /** * find_closest - locate the closest element in a sorted array * @x: The reference value. @@ -25,8 +12,27 @@ * @as: Size of 'a'. * * Returns the index of the element closest to 'x'. + * Note: If using an array of negative numbers (or mixed positive numbers), + * then be sure that 'x' is of a signed-type to get good results. */ -#define find_closest(x, a, as) __find_closest(x, a, as, <=) +#define find_closest(x, a, as) \ +({ \ + typeof(as) __fc_i, __fc_as = (as) - 1; \ + long __fc_mid_x, __fc_x = (x); \ + long __fc_left, __fc_right; \ + typeof(*a) const *__fc_a = (a); \ + for (__fc_i = 0; __fc_i < __fc_as; __fc_i++) { \ + __fc_mid_x = (__fc_a[__fc_i] + __fc_a[__fc_i + 1]) / 2; \ + if (__fc_x <= __fc_mid_x) { \ + __fc_left = __fc_x - __fc_a[__fc_i]; \ + __fc_right = __fc_a[__fc_i + 1] - __fc_x; \ + if (__fc_right < __fc_left) \ + __fc_i++; \ + break; \ + } \ + } \ + (__fc_i); \ +}) /** * find_closest_descending - locate the closest element in a sorted array @@ -36,9 +42,27 @@ * @as: Size of 'a'. * * Similar to find_closest() but 'a' is expected to be sorted in descending - * order. + * order. The iteration is done in reverse order, so that the comparison + * of '__fc_right' & '__fc_left' also works for unsigned numbers. */ -#define find_closest_descending(x, a, as) __find_closest(x, a, as, >=) +#define find_closest_descending(x, a, as) \ +({ \ + typeof(as) __fc_i, __fc_as = (as) - 1; \ + long __fc_mid_x, __fc_x = (x); \ + long __fc_left, __fc_right; \ + typeof(*a) const *__fc_a = (a); \ + for (__fc_i = __fc_as; __fc_i >= 1; __fc_i--) { \ + __fc_mid_x = (__fc_a[__fc_i] + __fc_a[__fc_i - 1]) / 2; \ + if (__fc_x <= __fc_mid_x) { \ + __fc_left = __fc_x - __fc_a[__fc_i]; \ + __fc_right = __fc_a[__fc_i - 1] - __fc_x; \ + if (__fc_right < __fc_left) \ + __fc_i--; \ + break; \ + } \ + } \ + (__fc_i); \ +}) /** * is_insidevar - check if the @ptr points inside the @var memory range. -- cgit v1.2.3-70-g09d2