From 630fdd592912614a72d00026fdadad72d9ef62eb Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 26 Jul 2023 14:59:57 -0700 Subject: seq_file: seq_show_option_n() is used for precise sizes When seq_show_option_n() is used, it is for non-string memory that happens to be printable bytes. As such, we must use memcpy() to copy the bytes and then explicitly NUL-terminate the result. Cc: Andy Shevchenko Cc: Andrew Morton Cc: Al Viro Cc: Muchun Song Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230726215957.never.619-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/seq_file.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index bd023dd38ae6..386ab580b839 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -249,18 +249,19 @@ static inline void seq_show_option(struct seq_file *m, const char *name, /** * seq_show_option_n - display mount options with appropriate escapes - * where @value must be a specific length. + * where @value must be a specific length (i.e. + * not NUL-terminated). * @m: the seq_file handle * @name: the mount option name * @value: the mount option name's value, cannot be NULL - * @length: the length of @value to display + * @length: the exact length of @value to display, must be constant expression * * This is a macro since this uses "length" to define the size of the * stack buffer. */ #define seq_show_option_n(m, name, value, length) { \ char val_buf[length + 1]; \ - strncpy(val_buf, value, length); \ + memcpy(val_buf, value, length); \ val_buf[length] = '\0'; \ seq_show_option(m, name, val_buf); \ } -- cgit v1.3.1 From 7a0fd5e1678505534573b3c14c6ff69ed8592596 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 11 Aug 2023 17:18:38 +0200 Subject: compiler_types: Introduce the Clang __preserve_most function attribute [1]: "On X86-64 and AArch64 targets, this attribute changes the calling convention of a function. The preserve_most calling convention attempts to make the code in the caller as unintrusive as possible. This convention behaves identically to the C calling convention on how arguments and return values are passed, but it uses a different set of caller/callee-saved registers. This alleviates the burden of saving and recovering a large register set before and after the call in the caller. If the arguments are passed in callee-saved registers, then they will be preserved by the callee across the call. This doesn't apply for values returned in callee-saved registers. * On X86-64 the callee preserves all general purpose registers, except for R11. R11 can be used as a scratch register. Floating-point registers (XMMs/YMMs) are not preserved and need to be saved by the caller. * On AArch64 the callee preserve all general purpose registers, except x0-X8 and X16-X18." [1] https://clang.llvm.org/docs/AttributeReference.html#preserve-most Introduce the attribute to compiler_types.h as __preserve_most. Use of this attribute results in better code generation for calls to very rarely called functions, such as error-reporting functions, or rarely executed slow paths. Beware that the attribute conflicts with instrumentation calls inserted on function entry which do not use __preserve_most themselves. Notably, function tracing which assumes the normal C calling convention for the given architecture. Where the attribute is supported, __preserve_most will imply notrace. It is recommended to restrict use of the attribute to functions that should or already disable tracing. Note: The additional preprocessor check against architecture should not be necessary if __has_attribute() only returns true where supported; also see https://github.com/ClangBuiltLinux/linux/issues/1908. But until __has_attribute() does the right thing, we also guard by known-supported architectures to avoid build warnings on other architectures. The attribute may be supported by a future GCC version (see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110899). Signed-off-by: Marco Elver Reviewed-by: Miguel Ojeda Reviewed-by: Nick Desaulniers Acked-by: "Steven Rostedt (Google)" Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20230811151847.1594958-1-elver@google.com Signed-off-by: Kees Cook --- include/linux/compiler_types.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 547ea1ff806e..c523c6683789 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -106,6 +106,34 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } #define __cold #endif +/* + * On x86-64 and arm64 targets, __preserve_most changes the calling convention + * of a function to make the code in the caller as unintrusive as possible. This + * convention behaves identically to the C calling convention on how arguments + * and return values are passed, but uses a different set of caller- and callee- + * saved registers. + * + * The purpose is to alleviates the burden of saving and recovering a large + * register set before and after the call in the caller. This is beneficial for + * rarely taken slow paths, such as error-reporting functions that may be called + * from hot paths. + * + * Note: This may conflict with instrumentation inserted on function entry which + * does not use __preserve_most or equivalent convention (if in assembly). Since + * function tracing assumes the normal C calling convention, where the attribute + * is supported, __preserve_most implies notrace. It is recommended to restrict + * use of the attribute to functions that should or already disable tracing. + * + * Optional: not supported by gcc. + * + * clang: https://clang.llvm.org/docs/AttributeReference.html#preserve-most + */ +#if __has_attribute(__preserve_most__) && (defined(CONFIG_X86_64) || defined(CONFIG_ARM64)) +# define __preserve_most notrace __attribute__((__preserve_most__)) +#else +# define __preserve_most +#endif + /* Builtins */ /* -- cgit v1.3.1 From b16c42c8fde808b4f047d94f1f2aeda93487670d Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 11 Aug 2023 17:18:39 +0200 Subject: list_debug: Introduce inline wrappers for debug checks Turn the list debug checking functions __list_*_valid() into inline functions that wrap the out-of-line functions. Care is taken to ensure the inline wrappers are always inlined, so that additional compiler instrumentation (such as sanitizers) does not result in redundant outlining. This change is preparation for performing checks in the inline wrappers. No functional change intended. Signed-off-by: Marco Elver Link: https://lore.kernel.org/r/20230811151847.1594958-2-elver@google.com Signed-off-by: Kees Cook --- arch/arm64/kvm/hyp/nvhe/list_debug.c | 6 +++--- include/linux/list.h | 37 ++++++++++++++++++++++++++++++++---- lib/list_debug.c | 11 +++++------ 3 files changed, 41 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/kvm/hyp/nvhe/list_debug.c b/arch/arm64/kvm/hyp/nvhe/list_debug.c index d68abd7ea124..16266a939a4c 100644 --- a/arch/arm64/kvm/hyp/nvhe/list_debug.c +++ b/arch/arm64/kvm/hyp/nvhe/list_debug.c @@ -26,8 +26,8 @@ static inline __must_check bool nvhe_check_data_corruption(bool v) /* The predicates checked here are taken from lib/list_debug.c. */ -bool __list_add_valid(struct list_head *new, struct list_head *prev, - struct list_head *next) +bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev, + struct list_head *next) { if (NVHE_CHECK_DATA_CORRUPTION(next->prev != prev) || NVHE_CHECK_DATA_CORRUPTION(prev->next != next) || @@ -37,7 +37,7 @@ bool __list_add_valid(struct list_head *new, struct list_head *prev, return true; } -bool __list_del_entry_valid(struct list_head *entry) +bool __list_del_entry_valid_or_report(struct list_head *entry) { struct list_head *prev, *next; diff --git a/include/linux/list.h b/include/linux/list.h index f10344dbad4d..130c6a1bb45c 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -39,10 +39,39 @@ static inline void INIT_LIST_HEAD(struct list_head *list) } #ifdef CONFIG_DEBUG_LIST -extern bool __list_add_valid(struct list_head *new, - struct list_head *prev, - struct list_head *next); -extern bool __list_del_entry_valid(struct list_head *entry); +/* + * Performs the full set of list corruption checks before __list_add(). + * On list corruption reports a warning, and returns false. + */ +extern bool __list_add_valid_or_report(struct list_head *new, + struct list_head *prev, + struct list_head *next); + +/* + * Performs list corruption checks before __list_add(). Returns false if a + * corruption is detected, true otherwise. + */ +static __always_inline bool __list_add_valid(struct list_head *new, + struct list_head *prev, + struct list_head *next) +{ + return __list_add_valid_or_report(new, prev, next); +} + +/* + * Performs the full set of list corruption checks before __list_del_entry(). + * On list corruption reports a warning, and returns false. + */ +extern bool __list_del_entry_valid_or_report(struct list_head *entry); + +/* + * Performs list corruption checks before __list_del_entry(). Returns false if a + * corruption is detected, true otherwise. + */ +static __always_inline bool __list_del_entry_valid(struct list_head *entry) +{ + return __list_del_entry_valid_or_report(entry); +} #else static inline bool __list_add_valid(struct list_head *new, struct list_head *prev, diff --git a/lib/list_debug.c b/lib/list_debug.c index d98d43f80958..2def33b1491f 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -17,8 +17,8 @@ * attempt). */ -bool __list_add_valid(struct list_head *new, struct list_head *prev, - struct list_head *next) +bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev, + struct list_head *next) { if (CHECK_DATA_CORRUPTION(prev == NULL, "list_add corruption. prev is NULL.\n") || @@ -37,9 +37,9 @@ bool __list_add_valid(struct list_head *new, struct list_head *prev, return true; } -EXPORT_SYMBOL(__list_add_valid); +EXPORT_SYMBOL(__list_add_valid_or_report); -bool __list_del_entry_valid(struct list_head *entry) +bool __list_del_entry_valid_or_report(struct list_head *entry) { struct list_head *prev, *next; @@ -65,6 +65,5 @@ bool __list_del_entry_valid(struct list_head *entry) return false; return true; - } -EXPORT_SYMBOL(__list_del_entry_valid); +EXPORT_SYMBOL(__list_del_entry_valid_or_report); -- cgit v1.3.1 From aebc7b0d8d91bbc69e976909963046bc48bca4fd Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 11 Aug 2023 17:18:40 +0200 Subject: list: Introduce CONFIG_LIST_HARDENED Numerous production kernel configs (see [1, 2]) are choosing to enable CONFIG_DEBUG_LIST, which is also being recommended by KSPP for hardened configs [3]. The motivation behind this is that the option can be used as a security hardening feature (e.g. CVE-2019-2215 and CVE-2019-2025 are mitigated by the option [4]). The feature has never been designed with performance in mind, yet common list manipulation is happening across hot paths all over the kernel. Introduce CONFIG_LIST_HARDENED, which performs list pointer checking inline, and only upon list corruption calls the reporting slow path. To generate optimal machine code with CONFIG_LIST_HARDENED: 1. Elide checking for pointer values which upon dereference would result in an immediate access fault (i.e. minimal hardening checks). The trade-off is lower-quality error reports. 2. Use the __preserve_most function attribute (available with Clang, but not yet with GCC) to minimize the code footprint for calling the reporting slow path. As a result, function size of callers is reduced by avoiding saving registers before calling the rarely called reporting slow path. Note that all TUs in lib/Makefile already disable function tracing, including list_debug.c, and __preserve_most's implied notrace has no effect in this case. 3. Because the inline checks are a subset of the full set of checks in __list_*_valid_or_report(), always return false if the inline checks failed. This avoids redundant compare and conditional branch right after return from the slow path. As a side-effect of the checks being inline, if the compiler can prove some condition to always be true, it can completely elide some checks. Since DEBUG_LIST is functionally a superset of LIST_HARDENED, the Kconfig variables are changed to reflect that: DEBUG_LIST selects LIST_HARDENED, whereas LIST_HARDENED itself has no dependency on DEBUG_LIST. Running netperf with CONFIG_LIST_HARDENED (using a Clang compiler with "preserve_most") shows throughput improvements, in my case of ~7% on average (up to 20-30% on some test cases). Link: https://r.android.com/1266735 [1] Link: https://gitlab.archlinux.org/archlinux/packaging/packages/linux/-/blob/main/config [2] Link: https://kernsec.org/wiki/index.php/Kernel_Self_Protection_Project/Recommended_Settings [3] Link: https://googleprojectzero.blogspot.com/2019/11/bad-binder-android-in-wild-exploit.html [4] Signed-off-by: Marco Elver Link: https://lore.kernel.org/r/20230811151847.1594958-3-elver@google.com Signed-off-by: Kees Cook --- arch/arm64/kvm/hyp/nvhe/Makefile | 2 +- arch/arm64/kvm/hyp/nvhe/list_debug.c | 2 ++ drivers/misc/lkdtm/bugs.c | 4 +-- include/linux/list.h | 64 ++++++++++++++++++++++++++++++++---- lib/Kconfig.debug | 9 +++-- lib/Makefile | 2 +- lib/list_debug.c | 5 ++- security/Kconfig.hardening | 13 ++++++++ 8 files changed, 88 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 9ddc025e4b86..2250253a6429 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -25,7 +25,7 @@ hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o -hyp-obj-$(CONFIG_DEBUG_LIST) += list_debug.o +hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o hyp-obj-y += $(lib-objs) ## diff --git a/arch/arm64/kvm/hyp/nvhe/list_debug.c b/arch/arm64/kvm/hyp/nvhe/list_debug.c index 16266a939a4c..46a2d4f2b3c6 100644 --- a/arch/arm64/kvm/hyp/nvhe/list_debug.c +++ b/arch/arm64/kvm/hyp/nvhe/list_debug.c @@ -26,6 +26,7 @@ static inline __must_check bool nvhe_check_data_corruption(bool v) /* The predicates checked here are taken from lib/list_debug.c. */ +__list_valid_slowpath bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev, struct list_head *next) { @@ -37,6 +38,7 @@ bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev, return true; } +__list_valid_slowpath bool __list_del_entry_valid_or_report(struct list_head *entry) { struct list_head *prev, *next; diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c index 3c95600ab2f7..963b4dee6a7d 100644 --- a/drivers/misc/lkdtm/bugs.c +++ b/drivers/misc/lkdtm/bugs.c @@ -393,7 +393,7 @@ static void lkdtm_CORRUPT_LIST_ADD(void) pr_err("Overwrite did not happen, but no BUG?!\n"); else { pr_err("list_add() corruption not detected!\n"); - pr_expected_config(CONFIG_DEBUG_LIST); + pr_expected_config(CONFIG_LIST_HARDENED); } } @@ -420,7 +420,7 @@ static void lkdtm_CORRUPT_LIST_DEL(void) pr_err("Overwrite did not happen, but no BUG?!\n"); else { pr_err("list_del() corruption not detected!\n"); - pr_expected_config(CONFIG_DEBUG_LIST); + pr_expected_config(CONFIG_LIST_HARDENED); } } diff --git a/include/linux/list.h b/include/linux/list.h index 130c6a1bb45c..164b4d0e9d2a 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -38,39 +38,91 @@ static inline void INIT_LIST_HEAD(struct list_head *list) WRITE_ONCE(list->prev, list); } +#ifdef CONFIG_LIST_HARDENED + #ifdef CONFIG_DEBUG_LIST +# define __list_valid_slowpath +#else +# define __list_valid_slowpath __cold __preserve_most +#endif + /* * Performs the full set of list corruption checks before __list_add(). * On list corruption reports a warning, and returns false. */ -extern bool __list_add_valid_or_report(struct list_head *new, - struct list_head *prev, - struct list_head *next); +extern bool __list_valid_slowpath __list_add_valid_or_report(struct list_head *new, + struct list_head *prev, + struct list_head *next); /* * Performs list corruption checks before __list_add(). Returns false if a * corruption is detected, true otherwise. + * + * With CONFIG_LIST_HARDENED only, performs minimal list integrity checking + * inline to catch non-faulting corruptions, and only if a corruption is + * detected calls the reporting function __list_add_valid_or_report(). */ static __always_inline bool __list_add_valid(struct list_head *new, struct list_head *prev, struct list_head *next) { - return __list_add_valid_or_report(new, prev, next); + bool ret = true; + + if (!IS_ENABLED(CONFIG_DEBUG_LIST)) { + /* + * With the hardening version, elide checking if next and prev + * are NULL, since the immediate dereference of them below would + * result in a fault if NULL. + * + * With the reduced set of checks, we can afford to inline the + * checks, which also gives the compiler a chance to elide some + * of them completely if they can be proven at compile-time. If + * one of the pre-conditions does not hold, the slow-path will + * show a report which pre-condition failed. + */ + if (likely(next->prev == prev && prev->next == next && new != prev && new != next)) + return true; + ret = false; + } + + ret &= __list_add_valid_or_report(new, prev, next); + return ret; } /* * Performs the full set of list corruption checks before __list_del_entry(). * On list corruption reports a warning, and returns false. */ -extern bool __list_del_entry_valid_or_report(struct list_head *entry); +extern bool __list_valid_slowpath __list_del_entry_valid_or_report(struct list_head *entry); /* * Performs list corruption checks before __list_del_entry(). Returns false if a * corruption is detected, true otherwise. + * + * With CONFIG_LIST_HARDENED only, performs minimal list integrity checking + * inline to catch non-faulting corruptions, and only if a corruption is + * detected calls the reporting function __list_del_entry_valid_or_report(). */ static __always_inline bool __list_del_entry_valid(struct list_head *entry) { - return __list_del_entry_valid_or_report(entry); + bool ret = true; + + if (!IS_ENABLED(CONFIG_DEBUG_LIST)) { + struct list_head *prev = entry->prev; + struct list_head *next = entry->next; + + /* + * With the hardening version, elide checking if next and prev + * are NULL, LIST_POISON1 or LIST_POISON2, since the immediate + * dereference of them below would result in a fault. + */ + if (likely(prev->next == entry && next->prev == entry)) + return true; + ret = false; + } + + ret &= __list_del_entry_valid_or_report(entry); + return ret; } #else static inline bool __list_add_valid(struct list_head *new, diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index fbc89baf7de6..c38745ad46eb 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1674,9 +1674,14 @@ menu "Debug kernel data structures" config DEBUG_LIST bool "Debug linked list manipulation" depends on DEBUG_KERNEL || BUG_ON_DATA_CORRUPTION + select LIST_HARDENED help - Enable this to turn on extended checks in the linked-list - walking routines. + Enable this to turn on extended checks in the linked-list walking + routines. + + This option trades better quality error reports for performance, and + is more suitable for kernel debugging. If you care about performance, + you should only enable CONFIG_LIST_HARDENED instead. If unsure, say N. diff --git a/lib/Makefile b/lib/Makefile index 42d307ade225..110936c9a68b 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -161,7 +161,7 @@ obj-$(CONFIG_BTREE) += btree.o obj-$(CONFIG_INTERVAL_TREE) += interval_tree.o obj-$(CONFIG_ASSOCIATIVE_ARRAY) += assoc_array.o obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o -obj-$(CONFIG_DEBUG_LIST) += list_debug.o +obj-$(CONFIG_LIST_HARDENED) += list_debug.o obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o obj-$(CONFIG_BITREVERSE) += bitrev.o diff --git a/lib/list_debug.c b/lib/list_debug.c index 2def33b1491f..db602417febf 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -2,7 +2,8 @@ * Copyright 2006, Red Hat, Inc., Dave Jones * Released under the General Public License (GPL). * - * This file contains the linked list validation for DEBUG_LIST. + * This file contains the linked list validation and error reporting for + * LIST_HARDENED and DEBUG_LIST. */ #include @@ -17,6 +18,7 @@ * attempt). */ +__list_valid_slowpath bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev, struct list_head *next) { @@ -39,6 +41,7 @@ bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev, } EXPORT_SYMBOL(__list_add_valid_or_report); +__list_valid_slowpath bool __list_del_entry_valid_or_report(struct list_head *entry) { struct list_head *prev, *next; diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index 0f295961e773..ffc3c702b461 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -279,6 +279,19 @@ config ZERO_CALL_USED_REGS endmenu +menu "Hardening of kernel data structures" + +config LIST_HARDENED + bool "Check integrity of linked list manipulation" + help + Minimal integrity checking in the linked-list manipulation routines + to catch memory corruptions that are not guaranteed to result in an + immediate access fault. + + If unsure, say N. + +endmenu + config CC_HAS_RANDSTRUCT def_bool $(cc-option,-frandomize-layout-seed-file=/dev/null) # Randstruct was first added in Clang 15, but it isn't safe to use until -- cgit v1.3.1 From c8248faf3ca276ebdf60f003b3e04bf764daba91 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Aug 2023 13:06:03 -0700 Subject: Compiler Attributes: counted_by: Adjust name and identifier expansion GCC and Clang's current RFCs name this attribute "counted_by", and have moved away from using a string for the member name. Update the kernel's macros to match. Additionally provide a UAPI no-op macro for UAPI structs that will gain annotations. Cc: Miguel Ojeda Cc: Nick Desaulniers Fixes: dd06e72e68bc ("Compiler Attributes: Add __counted_by macro") Acked-by: Miguel Ojeda Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/r/20230817200558.never.077-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/compiler_attributes.h | 26 +++++++++++++------------- include/uapi/linux/stddef.h | 4 ++++ 2 files changed, 17 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 00efa35c350f..28566624f008 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -94,6 +94,19 @@ # define __copy(symbol) #endif +/* + * Optional: only supported since gcc >= 14 + * Optional: only supported since clang >= 18 + * + * gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896 + * clang: https://reviews.llvm.org/D148381 + */ +#if __has_attribute(__counted_by__) +# define __counted_by(member) __attribute__((__counted_by__(member))) +#else +# define __counted_by(member) +#endif + /* * Optional: not supported by gcc * Optional: only supported since clang >= 14.0 @@ -129,19 +142,6 @@ # define __designated_init #endif -/* - * Optional: only supported since gcc >= 14 - * Optional: only supported since clang >= 17 - * - * gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896 - * clang: https://reviews.llvm.org/D148381 - */ -#if __has_attribute(__element_count__) -# define __counted_by(member) __attribute__((__element_count__(#member))) -#else -# define __counted_by(member) -#endif - /* * Optional: only supported since clang >= 14.0 * diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h index 7837ba4fe728..7c3fc3980881 100644 --- a/include/uapi/linux/stddef.h +++ b/include/uapi/linux/stddef.h @@ -45,3 +45,7 @@ TYPE NAME[]; \ } #endif + +#ifndef __counted_by +#define __counted_by(m) +#endif -- cgit v1.3.1 From 2ddd3cac1fa988323684cee567356e970e6750bd Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Thu, 17 Aug 2023 21:13:32 -0700 Subject: nsproxy: Convert nsproxy.count to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable nsproxy.count is used as pure reference counter. Convert it to refcount_t and fix up the operations. **Important note for maintainers: Some functions from refcount_t API defined in refcount.h have different memory ordering guarantees than their atomic counterparts. Please check Documentation/core-api/refcount-vs-atomic.rst for more information. Normally the differences should not matter since refcount_t provides enough guarantees to satisfy the refcounting use cases, but in some rare cases it might matter. Please double check that you don't have some undocumented memory guarantees for this variable usage. For the nsproxy.count it might make a difference in following places: - put_nsproxy() and switch_task_namespaces(): decrement in refcount_dec_and_test() only provides RELEASE ordering and ACQUIRE ordering on success vs. fully ordered atomic counterpart Suggested-by: Kees Cook Signed-off-by: Elena Reshetova Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Reviewed-by: Christian Brauner Link: https://lore.kernel.org/r/20230818041327.gonna.210-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/nsproxy.h | 7 +++---- kernel/nsproxy.c | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index fee881cded01..771cb0285872 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -29,7 +29,7 @@ struct fs_struct; * nsproxy is copied. */ struct nsproxy { - atomic_t count; + refcount_t count; struct uts_namespace *uts_ns; struct ipc_namespace *ipc_ns; struct mnt_namespace *mnt_ns; @@ -102,14 +102,13 @@ int __init nsproxy_cache_init(void); static inline void put_nsproxy(struct nsproxy *ns) { - if (atomic_dec_and_test(&ns->count)) { + if (refcount_dec_and_test(&ns->count)) free_nsproxy(ns); - } } static inline void get_nsproxy(struct nsproxy *ns) { - atomic_inc(&ns->count); + refcount_inc(&ns->count); } #endif diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 80d9c6d77a45..15781acaac1c 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -30,7 +30,7 @@ static struct kmem_cache *nsproxy_cachep; struct nsproxy init_nsproxy = { - .count = ATOMIC_INIT(1), + .count = REFCOUNT_INIT(1), .uts_ns = &init_uts_ns, #if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC) .ipc_ns = &init_ipc_ns, @@ -55,7 +55,7 @@ static inline struct nsproxy *create_nsproxy(void) nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL); if (nsproxy) - atomic_set(&nsproxy->count, 1); + refcount_set(&nsproxy->count, 1); return nsproxy; } -- cgit v1.3.1 From 5f536ac6a5a7b67351e4e5ae4f9e1e57d31268e6 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Aug 2023 16:59:56 -0700 Subject: LoadPin: Annotate struct dm_verity_loadpin_trusted_root_digest with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct dm_verity_loadpin_trusted_root_digest. Additionally, since the element count member must be set before accessing the annotated flexible array member, move its initialization earlier. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Alasdair Kergon Cc: Mike Snitzer Cc: dm-devel@redhat.com Cc: Paul Moore Cc: James Morris Cc: "Serge E. Hallyn" Cc: linux-security-module@vger.kernel.org Link: https://lore.kernel.org/r/20230817235955.never.762-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/dm-verity-loadpin.h | 2 +- security/loadpin/loadpin.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dm-verity-loadpin.h b/include/linux/dm-verity-loadpin.h index 552b817ab102..3ac6dbaeaa37 100644 --- a/include/linux/dm-verity-loadpin.h +++ b/include/linux/dm-verity-loadpin.h @@ -12,7 +12,7 @@ extern struct list_head dm_verity_loadpin_trusted_root_digests; struct dm_verity_loadpin_trusted_root_digest { struct list_head node; unsigned int len; - u8 data[]; + u8 data[] __counted_by(len); }; #if IS_ENABLED(CONFIG_SECURITY_LOADPIN_VERITY) diff --git a/security/loadpin/loadpin.c b/security/loadpin/loadpin.c index ebae964f7cc9..a9d40456a064 100644 --- a/security/loadpin/loadpin.c +++ b/security/loadpin/loadpin.c @@ -336,6 +336,7 @@ static int read_trusted_verity_root_digests(unsigned int fd) rc = -ENOMEM; goto err; } + trd->len = len; if (hex2bin(trd->data, d, len)) { kfree(trd); @@ -343,8 +344,6 @@ static int read_trusted_verity_root_digests(unsigned int fd) goto err; } - trd->len = len; - list_add_tail(&trd->node, &dm_verity_loadpin_trusted_root_digests); } -- cgit v1.3.1