diff options
Diffstat (limited to 'mm/slub.c')
-rw-r--r-- | mm/slub.c | 553 |
1 files changed, 376 insertions, 177 deletions
diff --git a/mm/slub.c b/mm/slub.c index 157527d7101b..891df05a4d45 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -187,6 +187,12 @@ do { \ #define USE_LOCKLESS_FAST_PATH() (false) #endif +#ifndef CONFIG_SLUB_TINY +#define __fastpath_inline __always_inline +#else +#define __fastpath_inline +#endif + #ifdef CONFIG_SLUB_DEBUG #ifdef CONFIG_SLUB_DEBUG_ON DEFINE_STATIC_KEY_TRUE(slub_debug_enabled); @@ -241,6 +247,7 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) /* Enable to log cmpxchg failures */ #undef SLUB_DEBUG_CMPXCHG +#ifndef CONFIG_SLUB_TINY /* * Minimum number of partial slabs. These will be left on the partial * lists even if they are empty. kmem_cache_shrink may reclaim them. @@ -253,6 +260,10 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) * sort the partial list by the number of objects in use. */ #define MAX_PARTIAL 10 +#else +#define MIN_PARTIAL 0 +#define MAX_PARTIAL 0 +#endif #define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS | SLAB_RED_ZONE | \ SLAB_POISON | SLAB_STORE_USER) @@ -298,7 +309,7 @@ struct track { enum track_item { TRACK_ALLOC, TRACK_FREE }; -#ifdef CONFIG_SYSFS +#ifdef SLAB_SUPPORTS_SYSFS static int sysfs_slab_add(struct kmem_cache *); static int sysfs_slab_alias(struct kmem_cache *, const char *); #else @@ -332,10 +343,12 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si) */ static nodemask_t slab_nodes; +#ifndef CONFIG_SLUB_TINY /* * Workqueue used for flush_cpu_slab(). */ static struct workqueue_struct *flushwq; +#endif /******************************************************************** * Core slab cache functions @@ -381,10 +394,12 @@ static inline void *get_freepointer(struct kmem_cache *s, void *object) return freelist_dereference(s, object + s->offset); } +#ifndef CONFIG_SLUB_TINY static void prefetch_freepointer(const struct kmem_cache *s, void *object) { prefetchw(object + s->offset); } +#endif /* * When running under KMSAN, get_freepointer_safe() may return an uninitialized @@ -829,6 +844,17 @@ static inline void set_orig_size(struct kmem_cache *s, if (!slub_debug_orig_size(s)) return; +#ifdef CONFIG_KASAN_GENERIC + /* + * KASAN could save its free meta data in object's data area at + * offset 0, if the size is larger than 'orig_size', it will + * overlap the data redzone in [orig_size+1, object_size], and + * the check should be skipped. + */ + if (kasan_metadata_size(s, true) > orig_size) + orig_size = s->object_size; +#endif + p += get_info_end(s); p += sizeof(struct track) * 2; @@ -848,6 +874,11 @@ static inline unsigned int get_orig_size(struct kmem_cache *s, void *object) return *(unsigned int *)p; } +void skip_orig_size_check(struct kmem_cache *s, const void *object) +{ + set_orig_size(s, (void *)object, s->object_size); +} + static void slab_bug(struct kmem_cache *s, char *fmt, ...) { struct va_format vaf; @@ -910,7 +941,7 @@ static void print_trailer(struct kmem_cache *s, struct slab *slab, u8 *p) if (slub_debug_orig_size(s)) off += sizeof(unsigned int); - off += kasan_metadata_size(s); + off += kasan_metadata_size(s, false); if (off != size_from_object(s)) /* Beginning of the filler is the free pointer */ @@ -966,17 +997,28 @@ static __printf(3, 4) void slab_err(struct kmem_cache *s, struct slab *slab, static void init_object(struct kmem_cache *s, void *object, u8 val) { u8 *p = kasan_reset_tag(object); + unsigned int poison_size = s->object_size; - if (s->flags & SLAB_RED_ZONE) + if (s->flags & SLAB_RED_ZONE) { memset(p - s->red_left_pad, val, s->red_left_pad); + if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) { + /* + * Redzone the extra allocated space by kmalloc than + * requested, and the poison size will be limited to + * the original request size accordingly. + */ + poison_size = get_orig_size(s, object); + } + } + if (s->flags & __OBJECT_POISON) { - memset(p, POISON_FREE, s->object_size - 1); - p[s->object_size - 1] = POISON_END; + memset(p, POISON_FREE, poison_size - 1); + p[poison_size - 1] = POISON_END; } if (s->flags & SLAB_RED_ZONE) - memset(p + s->object_size, val, s->inuse - s->object_size); + memset(p + poison_size, val, s->inuse - poison_size); } static void restore_bytes(struct kmem_cache *s, char *message, u8 data, @@ -1070,7 +1112,7 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p) off += sizeof(unsigned int); } - off += kasan_metadata_size(s); + off += kasan_metadata_size(s, false); if (size_from_object(s) == off) return 1; @@ -1120,6 +1162,7 @@ static int check_object(struct kmem_cache *s, struct slab *slab, { u8 *p = object; u8 *endobject = object + s->object_size; + unsigned int orig_size; if (s->flags & SLAB_RED_ZONE) { if (!check_bytes_and_report(s, slab, object, "Left Redzone", @@ -1129,6 +1172,17 @@ static int check_object(struct kmem_cache *s, struct slab *slab, if (!check_bytes_and_report(s, slab, object, "Right Redzone", endobject, val, s->inuse - s->object_size)) return 0; + + if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) { + orig_size = get_orig_size(s, object); + + if (s->object_size > orig_size && + !check_bytes_and_report(s, slab, object, + "kmalloc Redzone", p + orig_size, + val, s->object_size - orig_size)) { + return 0; + } + } } else { if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) { check_bytes_and_report(s, slab, p, "Alignment padding", @@ -1363,7 +1417,7 @@ static inline int alloc_consistency_checks(struct kmem_cache *s, return 1; } -static noinline int alloc_debug_processing(struct kmem_cache *s, +static noinline bool alloc_debug_processing(struct kmem_cache *s, struct slab *slab, void *object, int orig_size) { if (s->flags & SLAB_CONSISTENCY_CHECKS) { @@ -1375,7 +1429,7 @@ static noinline int alloc_debug_processing(struct kmem_cache *s, trace(s, slab, object, 1); set_orig_size(s, object, orig_size); init_object(s, object, SLUB_RED_ACTIVE); - return 1; + return true; bad: if (folio_test_slab(slab_folio(slab))) { @@ -1388,7 +1442,7 @@ bad: slab->inuse = slab->objects; slab->freelist = NULL; } - return 0; + return false; } static inline int free_consistency_checks(struct kmem_cache *s, @@ -1641,17 +1695,17 @@ static inline void setup_object_debug(struct kmem_cache *s, void *object) {} static inline void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr) {} -static inline int alloc_debug_processing(struct kmem_cache *s, - struct slab *slab, void *object, int orig_size) { return 0; } +static inline bool alloc_debug_processing(struct kmem_cache *s, + struct slab *slab, void *object, int orig_size) { return true; } -static inline void free_debug_processing( - struct kmem_cache *s, struct slab *slab, - void *head, void *tail, int bulk_cnt, - unsigned long addr) {} +static inline bool free_debug_processing(struct kmem_cache *s, + struct slab *slab, void *head, void *tail, int *bulk_cnt, + unsigned long addr, depot_stack_handle_t handle) { return true; } static inline void slab_pad_check(struct kmem_cache *s, struct slab *slab) {} static inline int check_object(struct kmem_cache *s, struct slab *slab, void *object, u8 val) { return 1; } +static inline depot_stack_handle_t set_track_prepare(void) { return 0; } static inline void set_track(struct kmem_cache *s, void *object, enum track_item alloc, unsigned long addr) {} static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, @@ -1676,11 +1730,13 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects) {} +#ifndef CONFIG_SLUB_TINY static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab, void **freelist, void *nextfree) { return false; } +#endif #endif /* CONFIG_SLUB_DEBUG */ /* @@ -1800,6 +1856,8 @@ static inline struct slab *alloc_slab_page(gfp_t flags, int node, slab = folio_slab(folio); __folio_set_slab(folio); + /* Make the flag visible before any changes to folio->mapping */ + smp_wmb(); if (page_is_pfmemalloc(folio_page(folio, 0))) slab_set_pfmemalloc(slab); @@ -1999,17 +2057,11 @@ static void __free_slab(struct kmem_cache *s, struct slab *slab) int order = folio_order(folio); int pages = 1 << order; - if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) { - void *p; - - slab_pad_check(s, slab); - for_each_object(p, s, slab_address(slab), slab->objects) - check_object(s, slab, p, SLUB_RED_INACTIVE); - } - __slab_clear_pfmemalloc(slab); - __folio_clear_slab(folio); folio->mapping = NULL; + /* Make the mapping reset visible before clearing the flag */ + smp_wmb(); + __folio_clear_slab(folio); if (current->reclaim_state) current->reclaim_state->reclaimed_slab += pages; unaccount_slab(slab, order, s); @@ -2025,9 +2077,17 @@ static void rcu_free_slab(struct rcu_head *h) static void free_slab(struct kmem_cache *s, struct slab *slab) { - if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) { + if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) { + void *p; + + slab_pad_check(s, slab); + for_each_object(p, s, slab_address(slab), slab->objects) + check_object(s, slab, p, SLUB_RED_INACTIVE); + } + + if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) call_rcu(&slab->rcu_head, rcu_free_slab); - } else + else __free_slab(s, slab); } @@ -2214,7 +2274,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, if (!pfmemalloc_match(slab, pc->flags)) continue; - if (kmem_cache_debug(s)) { + if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) { object = alloc_single_from_partial(s, n, slab, pc->orig_size); if (object) @@ -2329,6 +2389,8 @@ static void *get_partial(struct kmem_cache *s, int node, struct partial_context return get_any_partial(s, pc); } +#ifndef CONFIG_SLUB_TINY + #ifdef CONFIG_PREEMPTION /* * Calculate the next globally unique transaction for disambiguation @@ -2342,7 +2404,7 @@ static void *get_partial(struct kmem_cache *s, int node, struct partial_context * different cpus. */ #define TID_STEP 1 -#endif +#endif /* CONFIG_PREEMPTION */ static inline unsigned long next_tid(unsigned long tid) { @@ -2411,7 +2473,7 @@ static void init_kmem_cache_cpus(struct kmem_cache *s) static void deactivate_slab(struct kmem_cache *s, struct slab *slab, void *freelist) { - enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE, M_FULL_NOLIST }; + enum slab_modes { M_NONE, M_PARTIAL, M_FREE, M_FULL_NOLIST }; struct kmem_cache_node *n = get_node(s, slab_nid(slab)); int free_delta = 0; enum slab_modes mode = M_NONE; @@ -2487,14 +2549,6 @@ redo: * acquire_slab() will see a slab that is frozen */ spin_lock_irqsave(&n->list_lock, flags); - } else if (kmem_cache_debug_flags(s, SLAB_STORE_USER)) { - mode = M_FULL; - /* - * This also ensures that the scanning of full - * slabs from diagnostic functions will not see - * any frozen slabs. - */ - spin_lock_irqsave(&n->list_lock, flags); } else { mode = M_FULL_NOLIST; } @@ -2504,7 +2558,7 @@ redo: old.freelist, old.counters, new.freelist, new.counters, "unfreezing slab")) { - if (mode == M_PARTIAL || mode == M_FULL) + if (mode == M_PARTIAL) spin_unlock_irqrestore(&n->list_lock, flags); goto redo; } @@ -2518,10 +2572,6 @@ redo: stat(s, DEACTIVATE_EMPTY); discard_slab(s, slab); stat(s, FREE_SLAB); - } else if (mode == M_FULL) { - add_full(s, n, slab); - spin_unlock_irqrestore(&n->list_lock, flags); - stat(s, DEACTIVATE_FULL); } else if (mode == M_FULL_NOLIST) { stat(s, DEACTIVATE_FULL); } @@ -2803,6 +2853,13 @@ static int slub_cpu_dead(unsigned int cpu) return 0; } +#else /* CONFIG_SLUB_TINY */ +static inline void flush_all_cpus_locked(struct kmem_cache *s) { } +static inline void flush_all(struct kmem_cache *s) { } +static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) { } +static inline int slub_cpu_dead(unsigned int cpu) { return 0; } +#endif /* CONFIG_SLUB_TINY */ + /* * Check if the objects in a per cpu structure fit numa * locality expectations. @@ -2828,38 +2885,28 @@ static inline unsigned long node_nr_objs(struct kmem_cache_node *n) } /* Supports checking bulk free of a constructed freelist */ -static noinline void free_debug_processing( - struct kmem_cache *s, struct slab *slab, - void *head, void *tail, int bulk_cnt, - unsigned long addr) +static inline bool free_debug_processing(struct kmem_cache *s, + struct slab *slab, void *head, void *tail, int *bulk_cnt, + unsigned long addr, depot_stack_handle_t handle) { - struct kmem_cache_node *n = get_node(s, slab_nid(slab)); - struct slab *slab_free = NULL; + bool checks_ok = false; void *object = head; int cnt = 0; - unsigned long flags; - bool checks_ok = false; - depot_stack_handle_t handle = 0; - - if (s->flags & SLAB_STORE_USER) - handle = set_track_prepare(); - - spin_lock_irqsave(&n->list_lock, flags); if (s->flags & SLAB_CONSISTENCY_CHECKS) { if (!check_slab(s, slab)) goto out; } - if (slab->inuse < bulk_cnt) { + if (slab->inuse < *bulk_cnt) { slab_err(s, slab, "Slab has %d allocated objects but %d are to be freed\n", - slab->inuse, bulk_cnt); + slab->inuse, *bulk_cnt); goto out; } next_object: - if (++cnt > bulk_cnt) + if (++cnt > *bulk_cnt) goto out_cnt; if (s->flags & SLAB_CONSISTENCY_CHECKS) { @@ -2881,61 +2928,22 @@ next_object: checks_ok = true; out_cnt: - if (cnt != bulk_cnt) + if (cnt != *bulk_cnt) { slab_err(s, slab, "Bulk free expected %d objects but found %d\n", - bulk_cnt, cnt); - -out: - if (checks_ok) { - void *prior = slab->freelist; - - /* Perform the actual freeing while we still hold the locks */ - slab->inuse -= cnt; - set_freepointer(s, tail, prior); - slab->freelist = head; - - /* - * If the slab is empty, and node's partial list is full, - * it should be discarded anyway no matter it's on full or - * partial list. - */ - if (slab->inuse == 0 && n->nr_partial >= s->min_partial) - slab_free = slab; - - if (!prior) { - /* was on full list */ - remove_full(s, n, slab); - if (!slab_free) { - add_partial(n, slab, DEACTIVATE_TO_TAIL); - stat(s, FREE_ADD_PARTIAL); - } - } else if (slab_free) { - remove_partial(n, slab); - stat(s, FREE_REMOVE_PARTIAL); - } + *bulk_cnt, cnt); + *bulk_cnt = cnt; } - if (slab_free) { - /* - * Update the counters while still holding n->list_lock to - * prevent spurious validation warnings - */ - dec_slabs_node(s, slab_nid(slab_free), slab_free->objects); - } - - spin_unlock_irqrestore(&n->list_lock, flags); +out: if (!checks_ok) slab_fix(s, "Object at 0x%p not freed", object); - if (slab_free) { - stat(s, FREE_SLAB); - free_slab(s, slab_free); - } + return checks_ok; } #endif /* CONFIG_SLUB_DEBUG */ -#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS) +#if defined(CONFIG_SLUB_DEBUG) || defined(SLAB_SUPPORTS_SYSFS) static unsigned long count_partial(struct kmem_cache_node *n, int (*get_count)(struct slab *)) { @@ -2949,12 +2957,12 @@ static unsigned long count_partial(struct kmem_cache_node *n, spin_unlock_irqrestore(&n->list_lock, flags); return x; } -#endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */ +#endif /* CONFIG_SLUB_DEBUG || SLAB_SUPPORTS_SYSFS */ +#ifdef CONFIG_SLUB_DEBUG static noinline void slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) { -#ifdef CONFIG_SLUB_DEBUG static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); int node; @@ -2985,8 +2993,11 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) pr_warn(" node %d: slabs: %ld, objs: %ld, free: %ld\n", node, nr_slabs, nr_objs, nr_free); } -#endif } +#else /* CONFIG_SLUB_DEBUG */ +static inline void +slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) { } +#endif static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags) { @@ -2996,6 +3007,7 @@ static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags) return true; } +#ifndef CONFIG_SLUB_TINY /* * Check the slab->freelist and either transfer the freelist to the * per cpu freelist or deactivate the slab. @@ -3283,45 +3295,13 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, return p; } -/* - * If the object has been wiped upon free, make sure it's fully initialized by - * zeroing out freelist pointer. - */ -static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s, - void *obj) -{ - if (unlikely(slab_want_init_on_free(s)) && obj) - memset((void *)((char *)kasan_reset_tag(obj) + s->offset), - 0, sizeof(void *)); -} - -/* - * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc) - * have the fastpath folded into their functions. So no function call - * overhead for requests that can be satisfied on the fastpath. - * - * The fastpath works by first checking if the lockless freelist can be used. - * If not then __slab_alloc is called for slow processing. - * - * Otherwise we can simply pick the next object from the lockless free list. - */ -static __always_inline void *slab_alloc_node(struct kmem_cache *s, struct list_lru *lru, +static __always_inline void *__slab_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node, unsigned long addr, size_t orig_size) { - void *object; struct kmem_cache_cpu *c; struct slab *slab; unsigned long tid; - struct obj_cgroup *objcg = NULL; - bool init = false; - - s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags); - if (!s) - return NULL; - - object = kfence_alloc(s, orig_size, gfpflags); - if (unlikely(object)) - goto out; + void *object; redo: /* @@ -3391,22 +3371,95 @@ redo: stat(s, ALLOC_FASTPATH); } + return object; +} +#else /* CONFIG_SLUB_TINY */ +static void *__slab_alloc_node(struct kmem_cache *s, + gfp_t gfpflags, int node, unsigned long addr, size_t orig_size) +{ + struct partial_context pc; + struct slab *slab; + void *object; + + pc.flags = gfpflags; + pc.slab = &slab; + pc.orig_size = orig_size; + object = get_partial(s, node, &pc); + + if (object) + return object; + + slab = new_slab(s, gfpflags, node); + if (unlikely(!slab)) { + slab_out_of_memory(s, gfpflags, node); + return NULL; + } + + object = alloc_single_from_new_slab(s, slab, orig_size); + + return object; +} +#endif /* CONFIG_SLUB_TINY */ + +/* + * If the object has been wiped upon free, make sure it's fully initialized by + * zeroing out freelist pointer. + */ +static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s, + void *obj) +{ + if (unlikely(slab_want_init_on_free(s)) && obj) + memset((void *)((char *)kasan_reset_tag(obj) + s->offset), + 0, sizeof(void *)); +} + +/* + * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc) + * have the fastpath folded into their functions. So no function call + * overhead for requests that can be satisfied on the fastpath. + * + * The fastpath works by first checking if the lockless freelist can be used. + * If not then __slab_alloc is called for slow processing. + * + * Otherwise we can simply pick the next object from the lockless free list. + */ +static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list_lru *lru, + gfp_t gfpflags, int node, unsigned long addr, size_t orig_size) +{ + void *object; + struct obj_cgroup *objcg = NULL; + bool init = false; + + s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags); + if (!s) + return NULL; + + object = kfence_alloc(s, orig_size, gfpflags); + if (unlikely(object)) + goto out; + + object = __slab_alloc_node(s, gfpflags, node, addr, orig_size); + maybe_wipe_obj_freeptr(s, object); init = slab_want_init_on_alloc(gfpflags, s); out: - slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init); + /* + * When init equals 'true', like for kzalloc() family, only + * @orig_size bytes might be zeroed instead of s->object_size + */ + slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init, orig_size); return object; } -static __always_inline void *slab_alloc(struct kmem_cache *s, struct list_lru *lru, +static __fastpath_inline void *slab_alloc(struct kmem_cache *s, struct list_lru *lru, gfp_t gfpflags, unsigned long addr, size_t orig_size) { return slab_alloc_node(s, lru, gfpflags, NUMA_NO_NODE, addr, orig_size); } -static __always_inline +static __fastpath_inline void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru, gfp_t gfpflags) { @@ -3448,6 +3501,67 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) } EXPORT_SYMBOL(kmem_cache_alloc_node); +static noinline void free_to_partial_list( + struct kmem_cache *s, struct slab *slab, + void *head, void *tail, int bulk_cnt, + unsigned long addr) +{ + struct kmem_cache_node *n = get_node(s, slab_nid(slab)); + struct slab *slab_free = NULL; + int cnt = bulk_cnt; + unsigned long flags; + depot_stack_handle_t handle = 0; + + if (s->flags & SLAB_STORE_USER) + handle = set_track_prepare(); + + spin_lock_irqsave(&n->list_lock, flags); + + if (free_debug_processing(s, slab, head, tail, &cnt, addr, handle)) { + void *prior = slab->freelist; + + /* Perform the actual freeing while we still hold the locks */ + slab->inuse -= cnt; + set_freepointer(s, tail, prior); + slab->freelist = head; + + /* + * If the slab is empty, and node's partial list is full, + * it should be discarded anyway no matter it's on full or + * partial list. + */ + if (slab->inuse == 0 && n->nr_partial >= s->min_partial) + slab_free = slab; + + if (!prior) { + /* was on full list */ + remove_full(s, n, slab); + if (!slab_free) { + add_partial(n, slab, DEACTIVATE_TO_TAIL); + stat(s, FREE_ADD_PARTIAL); + } + } else if (slab_free) { + remove_partial(n, slab); + stat(s, FREE_REMOVE_PARTIAL); + } + } + + if (slab_free) { + /* + * Update the counters while still holding n->list_lock to + * prevent spurious validation warnings + */ + dec_slabs_node(s, slab_nid(slab_free), slab_free->objects); + } + + spin_unlock_irqrestore(&n->list_lock, flags); + + if (slab_free) { + stat(s, FREE_SLAB); + free_slab(s, slab_free); + } +} + /* * Slow path handling. This may still be called frequently since objects * have a longer lifetime than the cpu slabs in most processing loads. @@ -3473,8 +3587,8 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab, if (kfence_free(head)) return; - if (kmem_cache_debug(s)) { - free_debug_processing(s, slab, head, tail, cnt, addr); + if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) { + free_to_partial_list(s, slab, head, tail, cnt, addr); return; } @@ -3574,6 +3688,7 @@ slab_empty: discard_slab(s, slab); } +#ifndef CONFIG_SLUB_TINY /* * Fastpath with forced inlining to produce a kfree and kmem_cache_free that * can perform fastpath freeing without additional function calls. @@ -3648,8 +3763,18 @@ redo: } stat(s, FREE_FASTPATH); } +#else /* CONFIG_SLUB_TINY */ +static void do_slab_free(struct kmem_cache *s, + struct slab *slab, void *head, void *tail, + int cnt, unsigned long addr) +{ + void *tail_obj = tail ? : head; + + __slab_free(s, slab, head, tail_obj, cnt, addr); +} +#endif /* CONFIG_SLUB_TINY */ -static __always_inline void slab_free(struct kmem_cache *s, struct slab *slab, +static __fastpath_inline void slab_free(struct kmem_cache *s, struct slab *slab, void *head, void *tail, void **p, int cnt, unsigned long addr) { @@ -3782,18 +3907,13 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) } EXPORT_SYMBOL(kmem_cache_free_bulk); -/* Note that interrupts must be enabled when calling this function. */ -int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, - void **p) +#ifndef CONFIG_SLUB_TINY +static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, + size_t size, void **p, struct obj_cgroup *objcg) { struct kmem_cache_cpu *c; int i; - struct obj_cgroup *objcg = NULL; - /* memcg and kmem_cache debug support */ - s = slab_pre_alloc_hook(s, NULL, &objcg, size, flags); - if (unlikely(!s)) - return false; /* * Drain objects in the per cpu slab, while disabling local * IRQs, which protects against PREEMPT and interrupts @@ -3847,19 +3967,72 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, local_unlock_irq(&s->cpu_slab->lock); slub_put_cpu_ptr(s->cpu_slab); - /* - * memcg and kmem_cache debug support and memory initialization. - * Done outside of the IRQ disabled fastpath loop. - */ - slab_post_alloc_hook(s, objcg, flags, size, p, - slab_want_init_on_alloc(flags, s)); return i; + error: slub_put_cpu_ptr(s->cpu_slab); - slab_post_alloc_hook(s, objcg, flags, i, p, false); + slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size); + kmem_cache_free_bulk(s, i, p); + return 0; + +} +#else /* CONFIG_SLUB_TINY */ +static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, + size_t size, void **p, struct obj_cgroup *objcg) +{ + int i; + + for (i = 0; i < size; i++) { + void *object = kfence_alloc(s, s->object_size, flags); + + if (unlikely(object)) { + p[i] = object; + continue; + } + + p[i] = __slab_alloc_node(s, flags, NUMA_NO_NODE, + _RET_IP_, s->object_size); + if (unlikely(!p[i])) + goto error; + + maybe_wipe_obj_freeptr(s, p[i]); + } + + return i; + +error: + slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size); kmem_cache_free_bulk(s, i, p); return 0; } +#endif /* CONFIG_SLUB_TINY */ + +/* Note that interrupts must be enabled when calling this function. */ +int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, + void **p) +{ + int i; + struct obj_cgroup *objcg = NULL; + + if (!size) + return 0; + + /* memcg and kmem_cache debug support */ + s = slab_pre_alloc_hook(s, NULL, &objcg, size, flags); + if (unlikely(!s)) + return 0; + + i = __kmem_cache_alloc_bulk(s, flags, size, p, objcg); + + /* + * memcg and kmem_cache debug support and memory initialization. + * Done outside of the IRQ disabled fastpath loop. + */ + if (i != 0) + slab_post_alloc_hook(s, objcg, flags, size, p, + slab_want_init_on_alloc(flags, s), s->object_size); + return i; +} EXPORT_SYMBOL(kmem_cache_alloc_bulk); @@ -3883,7 +4056,8 @@ EXPORT_SYMBOL(kmem_cache_alloc_bulk); * take the list_lock. */ static unsigned int slub_min_order; -static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER; +static unsigned int slub_max_order = + IS_ENABLED(CONFIG_SLUB_TINY) ? 1 : PAGE_ALLOC_COSTLY_ORDER; static unsigned int slub_min_objects; /* @@ -4014,10 +4188,12 @@ init_kmem_cache_node(struct kmem_cache_node *n) #endif } +#ifndef CONFIG_SLUB_TINY static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) { BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < - KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu)); + NR_KMALLOC_TYPES * KMALLOC_SHIFT_HIGH * + sizeof(struct kmem_cache_cpu)); /* * Must align to double word boundary for the double cmpxchg @@ -4033,6 +4209,12 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) return 1; } +#else +static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) +{ + return 1; +} +#endif /* CONFIG_SLUB_TINY */ static struct kmem_cache *kmem_cache_node; @@ -4095,7 +4277,9 @@ static void free_kmem_cache_nodes(struct kmem_cache *s) void __kmem_cache_release(struct kmem_cache *s) { cache_random_seq_destroy(s); +#ifndef CONFIG_SLUB_TINY free_percpu(s->cpu_slab); +#endif free_kmem_cache_nodes(s); } @@ -4202,7 +4386,8 @@ static int calculate_sizes(struct kmem_cache *s) */ s->inuse = size; - if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) || + if (slub_debug_orig_size(s) || + (flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) || ((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) || s->ctor) { /* @@ -4872,8 +5057,10 @@ void __init kmem_cache_init(void) void __init kmem_cache_init_late(void) { +#ifndef CONFIG_SLUB_TINY flushwq = alloc_workqueue("slub_flushwq", WQ_MEM_RECLAIM, 0); WARN_ON(!flushwq); +#endif } struct kmem_cache * @@ -4924,7 +5111,7 @@ int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags) return 0; } -#ifdef CONFIG_SYSFS +#ifdef SLAB_SUPPORTS_SYSFS static int count_inuse(struct slab *slab) { return slab->inuse; @@ -5182,7 +5369,7 @@ static void process_slab(struct loc_track *t, struct kmem_cache *s, #endif /* CONFIG_DEBUG_FS */ #endif /* CONFIG_SLUB_DEBUG */ -#ifdef CONFIG_SYSFS +#ifdef SLAB_SUPPORTS_SYSFS enum slab_stat_type { SL_ALL, /* All slabs */ SL_PARTIAL, /* Only partially allocated slabs */ @@ -5502,11 +5689,13 @@ static ssize_t cache_dma_show(struct kmem_cache *s, char *buf) SLAB_ATTR_RO(cache_dma); #endif +#ifdef CONFIG_HARDENED_USERCOPY static ssize_t usersize_show(struct kmem_cache *s, char *buf) { return sysfs_emit(buf, "%u\n", s->usersize); } SLAB_ATTR_RO(usersize); +#endif static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf) { @@ -5586,7 +5775,21 @@ static ssize_t failslab_show(struct kmem_cache *s, char *buf) { return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB)); } -SLAB_ATTR_RO(failslab); + +static ssize_t failslab_store(struct kmem_cache *s, const char *buf, + size_t length) +{ + if (s->refcount > 1) + return -EINVAL; + + if (buf[0] == '1') + WRITE_ONCE(s->flags, s->flags | SLAB_FAILSLAB); + else + WRITE_ONCE(s->flags, s->flags & ~SLAB_FAILSLAB); + + return length; +} +SLAB_ATTR(failslab); #endif static ssize_t shrink_show(struct kmem_cache *s, char *buf) @@ -5803,7 +6006,9 @@ static struct attribute *slab_attrs[] = { #ifdef CONFIG_FAILSLAB &failslab_attr.attr, #endif +#ifdef CONFIG_HARDENED_USERCOPY &usersize_attr.attr, +#endif #ifdef CONFIG_KFENCE &skip_kfence_attr.attr, #endif @@ -5920,11 +6125,6 @@ static int sysfs_slab_add(struct kmem_cache *s) struct kset *kset = cache_kset(s); int unmergeable = slab_unmergeable(s); - if (!kset) { - kobject_init(&s->kobj, &slab_ktype); - return 0; - } - if (!unmergeable && disable_higher_order_debug && (slub_debug & DEBUG_METADATA_FLAGS)) unmergeable = 1; @@ -6054,9 +6254,8 @@ static int __init slab_sysfs_init(void) mutex_unlock(&slab_mutex); return 0; } - -__initcall(slab_sysfs_init); -#endif /* CONFIG_SYSFS */ +late_initcall(slab_sysfs_init); +#endif /* SLAB_SUPPORTS_SYSFS */ #if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS) static int slab_debugfs_show(struct seq_file *seq, void *v) |