summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt10
-rw-r--r--Documentation/mm/slub.rst9
-rw-r--r--include/linux/slab.h60
-rw-r--r--lib/slub_kunit.c42
-rw-r--r--mm/kasan/generic.c7
-rw-r--r--mm/slab.h11
-rw-r--r--mm/slab_common.c103
-rw-r--r--mm/slub.c218
8 files changed, 324 insertions, 136 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a4736ee87b1a..062ea4adb51d 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6158,6 +6158,16 @@
For more information see Documentation/mm/slub.rst.
(slub_nomerge legacy name also accepted for now)
+ slab_strict_numa [MM]
+ Support memory policies on a per object level
+ in the slab allocator. The default is for memory
+ policies to be applied at the folio level when
+ a new folio is needed or a partial folio is
+ retrieved from the lists. Increases overhead
+ in the slab fastpaths but gains more accurate
+ NUMA kernel object placement which helps with slow
+ interconnects in NUMA systems.
+
slram= [HW,MTD]
smart2= [HW]
diff --git a/Documentation/mm/slub.rst b/Documentation/mm/slub.rst
index 60d350d08362..84ca1dc94e5e 100644
--- a/Documentation/mm/slub.rst
+++ b/Documentation/mm/slub.rst
@@ -175,6 +175,15 @@ can be influenced by kernel parameters:
``slab_max_order`` to 0, what cause minimum possible order of
slabs allocation.
+``slab_strict_numa``
+ Enables the application of memory policies on each
+ allocation. This results in more accurate placement of
+ objects which may result in the reduction of accesses
+ to remote nodes. The default is to only apply memory
+ policies at the folio level when a new folio is acquired
+ or a folio is retrieved from the lists. Enabling this
+ option reduces the fastpath performance of the slab allocator.
+
SLUB Debug output
=================
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 0268ea7abf8b..10a971c2bde3 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -77,7 +77,17 @@ enum _slab_flag_bits {
#define SLAB_POISON __SLAB_FLAG_BIT(_SLAB_POISON)
/* Indicate a kmalloc slab */
#define SLAB_KMALLOC __SLAB_FLAG_BIT(_SLAB_KMALLOC)
-/* Align objs on cache lines */
+/**
+ * define SLAB_HWCACHE_ALIGN - Align objects on cache line boundaries.
+ *
+ * Sufficiently large objects are aligned on cache line boundary. For object
+ * size smaller than a half of cache line size, the alignment is on the half of
+ * cache line size. In general, if object size is smaller than 1/2^n of cache
+ * line size, the alignment is adjusted to 1/2^n.
+ *
+ * If explicit alignment is also requested by the respective
+ * &struct kmem_cache_args field, the greater of both is alignments is applied.
+ */
#define SLAB_HWCACHE_ALIGN __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN)
/* Use GFP_DMA memory */
#define SLAB_CACHE_DMA __SLAB_FLAG_BIT(_SLAB_CACHE_DMA)
@@ -87,8 +97,8 @@ enum _slab_flag_bits {
#define SLAB_STORE_USER __SLAB_FLAG_BIT(_SLAB_STORE_USER)
/* Panic if kmem_cache_create() fails */
#define SLAB_PANIC __SLAB_FLAG_BIT(_SLAB_PANIC)
-/*
- * SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS!
+/**
+ * define SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS!
*
* This delays freeing the SLAB page by a grace period, it does _NOT_
* delay object freeing. This means that if you do kmem_cache_free()
@@ -99,20 +109,22 @@ enum _slab_flag_bits {
* stays valid, the trick to using this is relying on an independent
* object validation pass. Something like:
*
- * begin:
- * rcu_read_lock();
- * obj = lockless_lookup(key);
- * if (obj) {
- * if (!try_get_ref(obj)) // might fail for free objects
- * rcu_read_unlock();
- * goto begin;
+ * ::
+ *
+ * begin:
+ * rcu_read_lock();
+ * obj = lockless_lookup(key);
+ * if (obj) {
+ * if (!try_get_ref(obj)) // might fail for free objects
+ * rcu_read_unlock();
+ * goto begin;
*
- * if (obj->key != key) { // not the object we expected
- * put_ref(obj);
- * rcu_read_unlock();
- * goto begin;
- * }
- * }
+ * if (obj->key != key) { // not the object we expected
+ * put_ref(obj);
+ * rcu_read_unlock();
+ * goto begin;
+ * }
+ * }
* rcu_read_unlock();
*
* This is useful if we need to approach a kernel structure obliquely,
@@ -137,7 +149,6 @@ enum _slab_flag_bits {
*
* Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU.
*/
-/* Defer freeing slabs to RCU */
#define SLAB_TYPESAFE_BY_RCU __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU)
/* Trace allocations and frees */
#define SLAB_TRACE __SLAB_FLAG_BIT(_SLAB_TRACE)
@@ -170,7 +181,12 @@ enum _slab_flag_bits {
#else
# define SLAB_FAILSLAB __SLAB_FLAG_UNUSED
#endif
-/* Account to memcg */
+/**
+ * define SLAB_ACCOUNT - Account allocations to memcg.
+ *
+ * All object allocations from this cache will be memcg accounted, regardless of
+ * __GFP_ACCOUNT being or not being passed to individual allocations.
+ */
#ifdef CONFIG_MEMCG
# define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT)
#else
@@ -197,7 +213,13 @@ enum _slab_flag_bits {
#endif
/* The following flags affect the page allocator grouping pages by mobility */
-/* Objects are reclaimable */
+/**
+ * define SLAB_RECLAIM_ACCOUNT - Objects are reclaimable.
+ *
+ * Use this flag for caches that have an associated shrinker. As a result, slab
+ * pages are allocated with __GFP_RECLAIMABLE, which affects grouping pages by
+ * mobility, and are accounted in SReclaimable counter in /proc/meminfo
+ */
#ifndef CONFIG_SLUB_TINY
#define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_BIT(_SLAB_RECLAIM_ACCOUNT)
#else
diff --git a/lib/slub_kunit.c b/lib/slub_kunit.c
index 33564f965958..f11691315c2f 100644
--- a/lib/slub_kunit.c
+++ b/lib/slub_kunit.c
@@ -192,6 +192,47 @@ static void test_leak_destroy(struct kunit *test)
KUNIT_EXPECT_EQ(test, 2, slab_errors);
}
+static void test_krealloc_redzone_zeroing(struct kunit *test)
+{
+ u8 *p;
+ int i;
+ struct kmem_cache *s = test_kmem_cache_create("TestSlub_krealloc", 64,
+ SLAB_KMALLOC|SLAB_STORE_USER|SLAB_RED_ZONE);
+
+ p = alloc_hooks(__kmalloc_cache_noprof(s, GFP_KERNEL, 48));
+ memset(p, 0xff, 48);
+
+ kasan_disable_current();
+ OPTIMIZER_HIDE_VAR(p);
+
+ /* Test shrink */
+ p = krealloc(p, 40, GFP_KERNEL | __GFP_ZERO);
+ for (i = 40; i < 64; i++)
+ KUNIT_EXPECT_EQ(test, p[i], SLUB_RED_ACTIVE);
+
+ /* Test grow within the same 64B kmalloc object */
+ p = krealloc(p, 56, GFP_KERNEL | __GFP_ZERO);
+ for (i = 40; i < 56; i++)
+ KUNIT_EXPECT_EQ(test, p[i], 0);
+ for (i = 56; i < 64; i++)
+ KUNIT_EXPECT_EQ(test, p[i], SLUB_RED_ACTIVE);
+
+ validate_slab_cache(s);
+ KUNIT_EXPECT_EQ(test, 0, slab_errors);
+
+ memset(p, 0xff, 56);
+ /* Test grow with allocating a bigger 128B object */
+ p = krealloc(p, 112, GFP_KERNEL | __GFP_ZERO);
+ for (i = 0; i < 56; i++)
+ KUNIT_EXPECT_EQ(test, p[i], 0xff);
+ for (i = 56; i < 112; i++)
+ KUNIT_EXPECT_EQ(test, p[i], 0);
+
+ kfree(p);
+ kasan_enable_current();
+ kmem_cache_destroy(s);
+}
+
static int test_init(struct kunit *test)
{
slab_errors = 0;
@@ -214,6 +255,7 @@ static struct kunit_case test_cases[] = {
KUNIT_CASE(test_kmalloc_redzone_access),
KUNIT_CASE(test_kfree_rcu),
KUNIT_CASE(test_leak_destroy),
+ KUNIT_CASE(test_krealloc_redzone_zeroing),
{}
};
diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c
index 6310a180278b..8b9e348113b1 100644
--- a/mm/kasan/generic.c
+++ b/mm/kasan/generic.c
@@ -392,9 +392,12 @@ void kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
* 1. Object is SLAB_TYPESAFE_BY_RCU, which means that it can
* be touched after it was freed, or
* 2. Object has a constructor, which means it's expected to
- * retain its content until the next allocation.
+ * retain its content until the next allocation, or
+ * 3. It is from a kmalloc cache which enables the debug option
+ * to store original size.
*/
- if ((cache->flags & SLAB_TYPESAFE_BY_RCU) || cache->ctor) {
+ if ((cache->flags & SLAB_TYPESAFE_BY_RCU) || cache->ctor ||
+ slub_debug_orig_size(cache)) {
cache->kasan_info.free_meta_offset = *size;
*size += sizeof(struct kasan_free_meta);
goto free_meta_added;
diff --git a/mm/slab.h b/mm/slab.h
index 6c6fe6d630ce..632fedd71fea 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -73,6 +73,11 @@ struct slab {
struct {
unsigned inuse:16;
unsigned objects:15;
+ /*
+ * If slab debugging is enabled then the
+ * frozen bit can be reused to indicate
+ * that the slab was corrupted
+ */
unsigned frozen:1;
};
};
@@ -695,6 +700,12 @@ void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
void __check_heap_object(const void *ptr, unsigned long n,
const struct slab *slab, bool to_user);
+static inline bool slub_debug_orig_size(struct kmem_cache *s)
+{
+ return (kmem_cache_debug_flags(s, SLAB_STORE_USER) &&
+ (s->flags & SLAB_KMALLOC));
+}
+
#ifdef CONFIG_SLUB_DEBUG
void skip_orig_size_check(struct kmem_cache *s, const void *object);
#endif
diff --git a/mm/slab_common.c b/mm/slab_common.c
index a7174455db9f..a29457bef626 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -222,15 +222,12 @@ static struct kmem_cache *create_cache(const char *name,
struct kmem_cache *s;
int err;
- if (WARN_ON(args->useroffset + args->usersize > object_size))
- args->useroffset = args->usersize = 0;
-
/* If a custom freelist pointer is requested make sure it's sane. */
err = -EINVAL;
if (args->use_freeptr_offset &&
(args->freeptr_offset >= object_size ||
!(flags & SLAB_TYPESAFE_BY_RCU) ||
- !IS_ALIGNED(args->freeptr_offset, sizeof(freeptr_t))))
+ !IS_ALIGNED(args->freeptr_offset, __alignof__(freeptr_t))))
goto out;
err = -ENOMEM;
@@ -257,11 +254,23 @@ out:
* @object_size: The size of objects to be created in this cache.
* @args: Additional arguments for the cache creation (see
* &struct kmem_cache_args).
- * @flags: See %SLAB_* flags for an explanation of individual @flags.
+ * @flags: See the desriptions of individual flags. The common ones are listed
+ * in the description below.
*
* Not to be called directly, use the kmem_cache_create() wrapper with the same
* parameters.
*
+ * Commonly used @flags:
+ *
+ * &SLAB_ACCOUNT - Account allocations to memcg.
+ *
+ * &SLAB_HWCACHE_ALIGN - Align objects on cache line boundaries.
+ *
+ * &SLAB_RECLAIM_ACCOUNT - Objects are reclaimable.
+ *
+ * &SLAB_TYPESAFE_BY_RCU - Slab page (not individual objects) freeing delayed
+ * by a grace period - see the full description before using.
+ *
* Context: Cannot be called within a interrupt, but can be interrupted.
*
* Return: a pointer to the cache on success, NULL on failure.
@@ -1199,90 +1208,6 @@ module_init(slab_proc_init);
#endif /* CONFIG_SLUB_DEBUG */
-static __always_inline __realloc_size(2) void *
-__do_krealloc(const void *p, size_t new_size, gfp_t flags)
-{
- void *ret;
- size_t ks;
-
- /* Check for double-free before calling ksize. */
- if (likely(!ZERO_OR_NULL_PTR(p))) {
- if (!kasan_check_byte(p))
- return NULL;
- ks = ksize(p);
- } else
- ks = 0;
-
- /* If the object still fits, repoison it precisely. */
- if (ks >= new_size) {
- /* Zero out spare memory. */
- if (want_init_on_alloc(flags)) {
- kasan_disable_current();
- memset(kasan_reset_tag(p) + new_size, 0, ks - new_size);
- kasan_enable_current();
- }
-
- p = kasan_krealloc((void *)p, new_size, flags);
- return (void *)p;
- }
-
- ret = kmalloc_node_track_caller_noprof(new_size, flags, NUMA_NO_NODE, _RET_IP_);
- if (ret && p) {
- /* Disable KASAN checks as the object's redzone is accessed. */
- kasan_disable_current();
- memcpy(ret, kasan_reset_tag(p), ks);
- kasan_enable_current();
- }
-
- return ret;
-}
-
-/**
- * krealloc - reallocate memory. The contents will remain unchanged.
- * @p: object to reallocate memory for.
- * @new_size: how many bytes of memory are required.
- * @flags: the type of memory to allocate.
- *
- * If @p is %NULL, krealloc() behaves exactly like kmalloc(). If @new_size
- * is 0 and @p is not a %NULL pointer, the object pointed to is freed.
- *
- * If __GFP_ZERO logic is requested, callers must ensure that, starting with the
- * initial memory allocation, every subsequent call to this API for the same
- * memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that
- * __GFP_ZERO is not fully honored by this API.
- *
- * This is the case, since krealloc() only knows about the bucket size of an
- * allocation (but not the exact size it was allocated with) and hence
- * implements the following semantics for shrinking and growing buffers with
- * __GFP_ZERO.
- *
- * new bucket
- * 0 size size
- * |--------|----------------|
- * | keep | zero |
- *
- * In any case, the contents of the object pointed to are preserved up to the
- * lesser of the new and old sizes.
- *
- * Return: pointer to the allocated memory or %NULL in case of error
- */
-void *krealloc_noprof(const void *p, size_t new_size, gfp_t flags)
-{
- void *ret;
-
- if (unlikely(!new_size)) {
- kfree(p);
- return ZERO_SIZE_PTR;
- }
-
- ret = __do_krealloc(p, new_size, flags);
- if (ret && kasan_reset_tag(p) != kasan_reset_tag(ret))
- kfree(p);
-
- return ret;
-}
-EXPORT_SYMBOL(krealloc_noprof);
-
/**
* kfree_sensitive - Clear sensitive information in memory before freeing
* @p: object to free memory of
diff --git a/mm/slub.c b/mm/slub.c
index 5b832512044e..19980419b176 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -218,6 +218,10 @@ DEFINE_STATIC_KEY_FALSE(slub_debug_enabled);
#endif
#endif /* CONFIG_SLUB_DEBUG */
+#ifdef CONFIG_NUMA
+static DEFINE_STATIC_KEY_FALSE(strict_numa);
+#endif
+
/* Structure holding parameters for get_partial() call chain */
struct partial_context {
gfp_t flags;
@@ -230,12 +234,6 @@ static inline bool kmem_cache_debug(struct kmem_cache *s)
return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS);
}
-static inline bool slub_debug_orig_size(struct kmem_cache *s)
-{
- return (kmem_cache_debug_flags(s, SLAB_STORE_USER) &&
- (s->flags & SLAB_KMALLOC));
-}
-
void *fixup_red_left(struct kmem_cache *s, void *p)
{
if (kmem_cache_debug_flags(s, SLAB_RED_ZONE))
@@ -760,21 +758,10 @@ static inline void set_orig_size(struct kmem_cache *s,
void *object, unsigned int orig_size)
{
void *p = kasan_reset_tag(object);
- unsigned int kasan_meta_size;
if (!slub_debug_orig_size(s))
return;
- /*
- * KASAN can save its free meta data inside of the object at offset 0.
- * If this meta data size is larger than 'orig_size', it will overlap
- * the data redzone in [orig_size+1, object_size]. Thus, we adjust
- * 'orig_size' to be as at least as big as KASAN's meta data.
- */
- kasan_meta_size = kasan_metadata_size(s, true);
- if (kasan_meta_size > orig_size)
- orig_size = kasan_meta_size;
-
p += get_info_end(s);
p += sizeof(struct track) * 2;
@@ -785,6 +772,9 @@ static inline unsigned int get_orig_size(struct kmem_cache *s, void *object)
{
void *p = kasan_reset_tag(object);
+ if (is_kfence_address(object))
+ return kfence_ksize(object);
+
if (!slub_debug_orig_size(s))
return s->object_size;
@@ -1423,6 +1413,11 @@ static int check_slab(struct kmem_cache *s, struct slab *slab)
slab->inuse, slab->objects);
return 0;
}
+ if (slab->frozen) {
+ slab_err(s, slab, "Slab disabled since SLUB metadata consistency check failed");
+ return 0;
+ }
+
/* Slab_pad_check fixes things up after itself */
slab_pad_check(s, slab);
return 1;
@@ -1603,6 +1598,7 @@ bad:
slab_fix(s, "Marking all objects used");
slab->inuse = slab->objects;
slab->freelist = NULL;
+ slab->frozen = 1; /* mark consistency-failed slab as frozen */
}
return false;
}
@@ -2744,7 +2740,8 @@ static void *alloc_single_from_partial(struct kmem_cache *s,
slab->inuse++;
if (!alloc_debug_processing(s, slab, object, orig_size)) {
- remove_partial(n, slab);
+ if (folio_test_slab(slab_folio(slab)))
+ remove_partial(n, slab);
return NULL;
}
@@ -3956,6 +3953,28 @@ redo:
object = c->freelist;
slab = c->slab;
+#ifdef CONFIG_NUMA
+ if (static_branch_unlikely(&strict_numa) &&
+ node == NUMA_NO_NODE) {
+
+ struct mempolicy *mpol = current->mempolicy;
+
+ if (mpol) {
+ /*
+ * Special BIND rule support. If existing slab
+ * is in permitted set then do not redirect
+ * to a particular node.
+ * Otherwise we apply the memory policy to get
+ * the node we need to allocate on.
+ */
+ if (mpol->mode != MPOL_BIND || !slab ||
+ !node_isset(slab_nid(slab), mpol->nodes))
+
+ node = mempolicy_slab_node();
+ }
+ }
+#endif
+
if (!USE_LOCKLESS_FAST_PATH() ||
unlikely(!object || !slab || !node_match(slab, node))) {
object = __slab_alloc(s, gfpflags, node, addr, c, orig_size);
@@ -4728,6 +4747,126 @@ void kfree(const void *object)
}
EXPORT_SYMBOL(kfree);
+static __always_inline __realloc_size(2) void *
+__do_krealloc(const void *p, size_t new_size, gfp_t flags)
+{
+ void *ret;
+ size_t ks = 0;
+ int orig_size = 0;
+ struct kmem_cache *s = NULL;
+
+ if (unlikely(ZERO_OR_NULL_PTR(p)))
+ goto alloc_new;
+
+ /* Check for double-free. */
+ if (!kasan_check_byte(p))
+ return NULL;
+
+ if (is_kfence_address(p)) {
+ ks = orig_size = kfence_ksize(p);
+ } else {
+ struct folio *folio;
+
+ folio = virt_to_folio(p);
+ if (unlikely(!folio_test_slab(folio))) {
+ /* Big kmalloc object */
+ WARN_ON(folio_size(folio) <= KMALLOC_MAX_CACHE_SIZE);
+ WARN_ON(p != folio_address(folio));
+ ks = folio_size(folio);
+ } else {
+ s = folio_slab(folio)->slab_cache;
+ orig_size = get_orig_size(s, (void *)p);
+ ks = s->object_size;
+ }
+ }
+
+ /* If the old object doesn't fit, allocate a bigger one */
+ if (new_size > ks)
+ goto alloc_new;
+
+ /* Zero out spare memory. */
+ if (want_init_on_alloc(flags)) {
+ kasan_disable_current();
+ if (orig_size && orig_size < new_size)
+ memset(kasan_reset_tag(p) + orig_size, 0, new_size - orig_size);
+ else
+ memset(kasan_reset_tag(p) + new_size, 0, ks - new_size);
+ kasan_enable_current();
+ }
+
+ /* Setup kmalloc redzone when needed */
+ if (s && slub_debug_orig_size(s)) {
+ set_orig_size(s, (void *)p, new_size);
+ if (s->flags & SLAB_RED_ZONE && new_size < ks)
+ memset_no_sanitize_memory(kasan_reset_tag(p) + new_size,
+ SLUB_RED_ACTIVE, ks - new_size);
+ }
+
+ p = kasan_krealloc(p, new_size, flags);
+ return (void *)p;
+
+alloc_new:
+ ret = kmalloc_node_track_caller_noprof(new_size, flags, NUMA_NO_NODE, _RET_IP_);
+ if (ret && p) {
+ /* Disable KASAN checks as the object's redzone is accessed. */
+ kasan_disable_current();
+ memcpy(ret, kasan_reset_tag(p), orig_size ?: ks);
+ kasan_enable_current();
+ }
+
+ return ret;
+}
+
+/**
+ * krealloc - reallocate memory. The contents will remain unchanged.
+ * @p: object to reallocate memory for.
+ * @new_size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate.
+ *
+ * If @p is %NULL, krealloc() behaves exactly like kmalloc(). If @new_size
+ * is 0 and @p is not a %NULL pointer, the object pointed to is freed.
+ *
+ * If __GFP_ZERO logic is requested, callers must ensure that, starting with the
+ * initial memory allocation, every subsequent call to this API for the same
+ * memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that
+ * __GFP_ZERO is not fully honored by this API.
+ *
+ * When slub_debug_orig_size() is off, krealloc() only knows about the bucket
+ * size of an allocation (but not the exact size it was allocated with) and
+ * hence implements the following semantics for shrinking and growing buffers
+ * with __GFP_ZERO.
+ *
+ * new bucket
+ * 0 size size
+ * |--------|----------------|
+ * | keep | zero |
+ *
+ * Otherwise, the original allocation size 'orig_size' could be used to
+ * precisely clear the requested size, and the new size will also be stored
+ * as the new 'orig_size'.
+ *
+ * In any case, the contents of the object pointed to are preserved up to the
+ * lesser of the new and old sizes.
+ *
+ * Return: pointer to the allocated memory or %NULL in case of error
+ */
+void *krealloc_noprof(const void *p, size_t new_size, gfp_t flags)
+{
+ void *ret;
+
+ if (unlikely(!new_size)) {
+ kfree(p);
+ return ZERO_SIZE_PTR;
+ }
+
+ ret = __do_krealloc(p, new_size, flags);
+ if (ret && kasan_reset_tag(p) != kasan_reset_tag(ret))
+ kfree(p);
+
+ return ret;
+}
+EXPORT_SYMBOL(krealloc_noprof);
+
struct detached_freelist {
struct slab *slab;
void *tail;
@@ -5602,6 +5741,23 @@ static int __init setup_slub_min_objects(char *str)
__setup("slab_min_objects=", setup_slub_min_objects);
__setup_param("slub_min_objects=", slub_min_objects, setup_slub_min_objects, 0);
+#ifdef CONFIG_NUMA
+static int __init setup_slab_strict_numa(char *str)
+{
+ if (nr_node_ids > 1) {
+ static_branch_enable(&strict_numa);
+ pr_info("SLUB: Strict NUMA enabled.\n");
+ } else {
+ pr_warn("slab_strict_numa parameter set on non NUMA system.\n");
+ }
+
+ return 1;
+}
+
+__setup("slab_strict_numa", setup_slab_strict_numa);
+#endif
+
+
#ifdef CONFIG_HARDENED_USERCOPY
/*
* Rejects incorrectly sized objects and objects that are to be copied
@@ -5960,7 +6116,8 @@ __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
s = find_mergeable(size, align, flags, name, ctor);
if (s) {
if (sysfs_slab_alias(s, name))
- return NULL;
+ pr_err("SLUB: Unable to add cache alias %s to sysfs\n",
+ name);
s->refcount++;
@@ -6042,15 +6199,18 @@ int do_kmem_cache_create(struct kmem_cache *s, const char *name,
if (!alloc_kmem_cache_cpus(s))
goto out;
+ err = 0;
+
/* Mutex is not taken during early boot */
- if (slab_state <= UP) {
- err = 0;
+ if (slab_state <= UP)
goto out;
- }
- err = sysfs_slab_add(s);
- if (err)
- goto out;
+ /*
+ * Failing to create sysfs files is not critical to SLUB functionality.
+ * If it fails, proceed with cache creation without these files.
+ */
+ if (sysfs_slab_add(s))
+ pr_err("SLUB: Unable to add cache %s to sysfs\n", s->name);
if (s->flags & SLAB_STORE_USER)
debugfs_slab_add(s);
@@ -7120,7 +7280,8 @@ out_del_kobj:
void sysfs_slab_unlink(struct kmem_cache *s)
{
- kobject_del(&s->kobj);
+ if (s->kobj.state_in_sysfs)
+ kobject_del(&s->kobj);
}
void sysfs_slab_release(struct kmem_cache *s)
@@ -7149,6 +7310,11 @@ static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
* If we have a leftover link then remove it.
*/
sysfs_remove_link(&slab_kset->kobj, name);
+ /*
+ * The original cache may have failed to generate sysfs file.
+ * In that case, sysfs_create_link() returns -ENOENT and
+ * symbolic link creation is skipped.
+ */
return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
}