summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig18
-rw-r--r--mm/gup.c51
-rw-r--r--mm/kfence/kfence_test.c7
-rw-r--r--mm/memblock.c34
-rw-r--r--mm/mmap.c121
-rw-r--r--mm/mremap.c28
-rw-r--r--mm/nommu.c16
-rw-r--r--mm/slab.c37
-rw-r--r--mm/slab.h13
-rw-r--r--mm/slab_common.c33
-rw-r--r--mm/slub.c47
11 files changed, 210 insertions, 195 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index e00df648c91d..09130434e30d 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -234,18 +234,23 @@ choice
help
This option allows to select a slab allocator.
-config SLAB
- bool "SLAB"
+config SLAB_DEPRECATED
+ bool "SLAB (DEPRECATED)"
depends on !PREEMPT_RT
- select HAVE_HARDENED_USERCOPY_ALLOCATOR
help
+ Deprecated and scheduled for removal in a few cycles. Replaced by
+ SLUB.
+
+ If you cannot migrate to SLUB, please contact linux-mm@kvack.org
+ and the people listed in the SLAB ALLOCATOR section of MAINTAINERS
+ file, explaining why.
+
The regular slab allocator that is established and known to work
well in all environments. It organizes cache hot objects in
per cpu and per node queues.
config SLUB
bool "SLUB (Unqueued Allocator)"
- select HAVE_HARDENED_USERCOPY_ALLOCATOR
help
SLUB is a slab allocator that minimizes cache line usage
instead of managing queues of cached objects (SLAB approach).
@@ -256,6 +261,11 @@ config SLUB
endchoice
+config SLAB
+ bool
+ default y
+ depends on SLAB_DEPRECATED
+
config SLUB_TINY
bool "Configure SLUB for minimal memory footprint"
depends on SLUB && EXPERT
diff --git a/mm/gup.c b/mm/gup.c
index ef29641671c7..76d222ccc3ff 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1091,6 +1091,45 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
return 0;
}
+/*
+ * This is "vma_lookup()", but with a warning if we would have
+ * historically expanded the stack in the GUP code.
+ */
+static struct vm_area_struct *gup_vma_lookup(struct mm_struct *mm,
+ unsigned long addr)
+{
+#ifdef CONFIG_STACK_GROWSUP
+ return vma_lookup(mm, addr);
+#else
+ static volatile unsigned long next_warn;
+ struct vm_area_struct *vma;
+ unsigned long now, next;
+
+ vma = find_vma(mm, addr);
+ if (!vma || (addr >= vma->vm_start))
+ return vma;
+
+ /* Only warn for half-way relevant accesses */
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ return NULL;
+ if (vma->vm_start - addr > 65536)
+ return NULL;
+
+ /* Let's not warn more than once an hour.. */
+ now = jiffies; next = next_warn;
+ if (next && time_before(now, next))
+ return NULL;
+ next_warn = now + 60*60*HZ;
+
+ /* Let people know things may have changed. */
+ pr_warn("GUP no longer grows the stack in %s (%d): %lx-%lx (%lx)\n",
+ current->comm, task_pid_nr(current),
+ vma->vm_start, vma->vm_end, addr);
+ dump_stack();
+ return NULL;
+#endif
+}
+
/**
* __get_user_pages() - pin user pages in memory
* @mm: mm_struct of target mm
@@ -1168,11 +1207,7 @@ static long __get_user_pages(struct mm_struct *mm,
/* first iteration or cross vma bound */
if (!vma || start >= vma->vm_end) {
- vma = find_vma(mm, start);
- if (vma && (start < vma->vm_start)) {
- WARN_ON_ONCE(vma->vm_flags & VM_GROWSDOWN);
- vma = NULL;
- }
+ vma = gup_vma_lookup(mm, start);
if (!vma && in_gate_area(mm, start)) {
ret = get_gate_page(mm, start & PAGE_MASK,
gup_flags, &vma,
@@ -1337,13 +1372,9 @@ int fixup_user_fault(struct mm_struct *mm,
fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
retry:
- vma = find_vma(mm, address);
+ vma = gup_vma_lookup(mm, address);
if (!vma)
return -EFAULT;
- if (address < vma->vm_start ) {
- WARN_ON_ONCE(vma->vm_flags & VM_GROWSDOWN);
- return -EFAULT;
- }
if (!vma_permits_fault(vma, fault_flags))
return -EFAULT;
diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c
index 6aee19a79236..9e008a336d9f 100644
--- a/mm/kfence/kfence_test.c
+++ b/mm/kfence/kfence_test.c
@@ -191,11 +191,10 @@ static size_t setup_test_cache(struct kunit *test, size_t size, slab_flags_t fla
kunit_info(test, "%s: size=%zu, ctor=%ps\n", __func__, size, ctor);
/*
- * Use SLAB_NOLEAKTRACE to prevent merging with existing caches. Any
- * other flag in SLAB_NEVER_MERGE also works. Use SLAB_ACCOUNT to
- * allocate via memcg, if enabled.
+ * Use SLAB_NO_MERGE to prevent merging with existing caches.
+ * Use SLAB_ACCOUNT to allocate via memcg, if enabled.
*/
- flags |= SLAB_NOLEAKTRACE | SLAB_ACCOUNT;
+ flags |= SLAB_NO_MERGE | SLAB_ACCOUNT;
test_cache = kmem_cache_create("test", size, 1, flags, ctor);
KUNIT_ASSERT_TRUE_MSG(test, test_cache, "could not create cache");
diff --git a/mm/memblock.c b/mm/memblock.c
index 388bc0c78998..f9e61e565a53 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -156,10 +156,10 @@ static __refdata struct memblock_type *memblock_memory = &memblock.memory;
} while (0)
static int memblock_debug __initdata_memblock;
-static bool system_has_some_mirror __initdata_memblock = false;
+static bool system_has_some_mirror __initdata_memblock;
static int memblock_can_resize __initdata_memblock;
-static int memblock_memory_in_slab __initdata_memblock = 0;
-static int memblock_reserved_in_slab __initdata_memblock = 0;
+static int memblock_memory_in_slab __initdata_memblock;
+static int memblock_reserved_in_slab __initdata_memblock;
static enum memblock_flags __init_memblock choose_memblock_flags(void)
{
@@ -2178,20 +2178,44 @@ void __init memblock_free_all(void)
}
#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_ARCH_KEEP_MEMBLOCK)
+static const char * const flagname[] = {
+ [ilog2(MEMBLOCK_HOTPLUG)] = "HOTPLUG",
+ [ilog2(MEMBLOCK_MIRROR)] = "MIRROR",
+ [ilog2(MEMBLOCK_NOMAP)] = "NOMAP",
+ [ilog2(MEMBLOCK_DRIVER_MANAGED)] = "DRV_MNG",
+};
static int memblock_debug_show(struct seq_file *m, void *private)
{
struct memblock_type *type = m->private;
struct memblock_region *reg;
- int i;
+ int i, j, nid;
+ unsigned int count = ARRAY_SIZE(flagname);
phys_addr_t end;
for (i = 0; i < type->cnt; i++) {
reg = &type->regions[i];
end = reg->base + reg->size - 1;
+ nid = memblock_get_region_node(reg);
seq_printf(m, "%4d: ", i);
- seq_printf(m, "%pa..%pa\n", &reg->base, &end);
+ seq_printf(m, "%pa..%pa ", &reg->base, &end);
+ if (nid != MAX_NUMNODES)
+ seq_printf(m, "%4d ", nid);
+ else
+ seq_printf(m, "%4c ", 'x');
+ if (reg->flags) {
+ for (j = 0; j < count; j++) {
+ if (reg->flags & (1U << j)) {
+ seq_printf(m, "%s\n", flagname[j]);
+ break;
+ }
+ }
+ if (j == count)
+ seq_printf(m, "%s\n", "UNKNOWN");
+ } else {
+ seq_printf(m, "%s\n", "NONE");
+ }
}
return 0;
}
diff --git a/mm/mmap.c b/mm/mmap.c
index 9b5188b65800..204ddcd52625 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -193,8 +193,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
struct mm_struct *mm = current->mm;
struct vm_area_struct *brkvma, *next = NULL;
unsigned long min_brk;
- bool populate;
- bool downgraded = false;
+ bool populate = false;
LIST_HEAD(uf);
struct vma_iterator vmi;
@@ -236,13 +235,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
goto success;
}
- /*
- * Always allow shrinking brk.
- * do_vma_munmap() may downgrade mmap_lock to read.
- */
+ /* Always allow shrinking brk. */
if (brk <= mm->brk) {
- int ret;
-
/* Search one past newbrk */
vma_iter_init(&vmi, mm, newbrk);
brkvma = vma_find(&vmi, oldbrk);
@@ -250,19 +244,14 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
goto out; /* mapping intersects with an existing non-brk vma. */
/*
* mm->brk must be protected by write mmap_lock.
- * do_vma_munmap() may downgrade the lock, so update it
+ * do_vma_munmap() will drop the lock on success, so update it
* before calling do_vma_munmap().
*/
mm->brk = brk;
- ret = do_vma_munmap(&vmi, brkvma, newbrk, oldbrk, &uf, true);
- if (ret == 1) {
- downgraded = true;
- goto success;
- } else if (!ret)
- goto success;
+ if (do_vma_munmap(&vmi, brkvma, newbrk, oldbrk, &uf, true))
+ goto out;
- mm->brk = origbrk;
- goto out;
+ goto success_unlocked;
}
if (check_brk_limits(oldbrk, newbrk - oldbrk))
@@ -283,19 +272,19 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
goto out;
mm->brk = brk;
+ if (mm->def_flags & VM_LOCKED)
+ populate = true;
success:
- populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
- if (downgraded)
- mmap_read_unlock(mm);
- else
- mmap_write_unlock(mm);
+ mmap_write_unlock(mm);
+success_unlocked:
userfaultfd_unmap_complete(mm, &uf);
if (populate)
mm_populate(oldbrk, newbrk - oldbrk);
return brk;
out:
+ mm->brk = origbrk;
mmap_write_unlock(mm);
return origbrk;
}
@@ -2428,14 +2417,16 @@ int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
* @start: The aligned start address to munmap.
* @end: The aligned end address to munmap.
* @uf: The userfaultfd list_head
- * @downgrade: Set to true to attempt a write downgrade of the mmap_lock
+ * @unlock: Set to true to drop the mmap_lock. unlocking only happens on
+ * success.
*
- * If @downgrade is true, check return code for potential release of the lock.
+ * Return: 0 on success and drops the lock if so directed, error and leaves the
+ * lock held otherwise.
*/
static int
do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
struct mm_struct *mm, unsigned long start,
- unsigned long end, struct list_head *uf, bool downgrade)
+ unsigned long end, struct list_head *uf, bool unlock)
{
struct vm_area_struct *prev, *next = NULL;
struct maple_tree mt_detach;
@@ -2489,7 +2480,8 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
}
vma_start_write(next);
mas_set_range(&mas_detach, next->vm_start, next->vm_end - 1);
- if (mas_store_gfp(&mas_detach, next, GFP_KERNEL))
+ error = mas_store_gfp(&mas_detach, next, GFP_KERNEL);
+ if (error)
goto munmap_gather_failed;
vma_mark_detached(next, true);
if (next->vm_flags & VM_LOCKED)
@@ -2542,41 +2534,31 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
BUG_ON(count != test_count);
}
#endif
- /* Point of no return */
- error = -ENOMEM;
vma_iter_set(vmi, start);
- if (vma_iter_clear_gfp(vmi, start, end, GFP_KERNEL))
+ error = vma_iter_clear_gfp(vmi, start, end, GFP_KERNEL);
+ if (error)
goto clear_tree_failed;
+ /* Point of no return */
mm->locked_vm -= locked_vm;
mm->map_count -= count;
- /*
- * Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or
- * VM_GROWSUP VMA. Such VMAs can change their size under
- * down_read(mmap_lock) and collide with the VMA we are about to unmap.
- */
- if (downgrade) {
- if (next && (next->vm_flags & VM_GROWSDOWN))
- downgrade = false;
- else if (prev && (prev->vm_flags & VM_GROWSUP))
- downgrade = false;
- else
- mmap_write_downgrade(mm);
- }
+ if (unlock)
+ mmap_write_downgrade(mm);
/*
* We can free page tables without write-locking mmap_lock because VMAs
* were isolated before we downgraded mmap_lock.
*/
- unmap_region(mm, &mt_detach, vma, prev, next, start, end, !downgrade);
+ unmap_region(mm, &mt_detach, vma, prev, next, start, end, !unlock);
/* Statistics and freeing VMAs */
mas_set(&mas_detach, start);
remove_mt(mm, &mas_detach);
__mt_destroy(&mt_detach);
-
-
validate_mm(mm);
- return downgrade ? 1 : 0;
+ if (unlock)
+ mmap_read_unlock(mm);
+
+ return 0;
clear_tree_failed:
userfaultfd_error:
@@ -2589,6 +2571,7 @@ end_split_failed:
__mt_destroy(&mt_detach);
start_split_failed:
map_count_exceeded:
+ validate_mm(mm);
return error;
}
@@ -2599,18 +2582,18 @@ map_count_exceeded:
* @start: The start address to munmap
* @len: The length of the range to munmap
* @uf: The userfaultfd list_head
- * @downgrade: set to true if the user wants to attempt to write_downgrade the
- * mmap_lock
+ * @unlock: set to true if the user wants to drop the mmap_lock on success
*
* This function takes a @mas that is either pointing to the previous VMA or set
* to MA_START and sets it up to remove the mapping(s). The @len will be
* aligned and any arch_unmap work will be preformed.
*
- * Returns: -EINVAL on failure, 1 on success and unlock, 0 otherwise.
+ * Return: 0 on success and drops the lock if so directed, error and leaves the
+ * lock held otherwise.
*/
int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
unsigned long start, size_t len, struct list_head *uf,
- bool downgrade)
+ bool unlock)
{
unsigned long end;
struct vm_area_struct *vma;
@@ -2627,10 +2610,13 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
/* Find the first overlapping VMA */
vma = vma_find(vmi, end);
- if (!vma)
+ if (!vma) {
+ if (unlock)
+ mmap_write_unlock(mm);
return 0;
+ }
- return do_vmi_align_munmap(vmi, vma, mm, start, end, uf, downgrade);
+ return do_vmi_align_munmap(vmi, vma, mm, start, end, uf, unlock);
}
/* do_munmap() - Wrapper function for non-maple tree aware do_munmap() calls.
@@ -2638,6 +2624,8 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
* @start: The start address to munmap
* @len: The length to be munmapped.
* @uf: The userfaultfd list_head
+ *
+ * Return: 0 on success, error otherwise.
*/
int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
struct list_head *uf)
@@ -2898,7 +2886,7 @@ unacct_error:
return error;
}
-static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
+static int __vm_munmap(unsigned long start, size_t len, bool unlock)
{
int ret;
struct mm_struct *mm = current->mm;
@@ -2908,16 +2896,8 @@ static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
if (mmap_write_lock_killable(mm))
return -EINTR;
- ret = do_vmi_munmap(&vmi, mm, start, len, &uf, downgrade);
- /*
- * Returning 1 indicates mmap_lock is downgraded.
- * But 1 is not legal return value of vm_munmap() and munmap(), reset
- * it to 0 before return.
- */
- if (ret == 1) {
- mmap_read_unlock(mm);
- ret = 0;
- } else
+ ret = do_vmi_munmap(&vmi, mm, start, len, &uf, unlock);
+ if (ret || !unlock)
mmap_write_unlock(mm);
userfaultfd_unmap_complete(mm, &uf);
@@ -3027,23 +3007,22 @@ out:
* @start: the start of the address to unmap
* @end: The end of the address to unmap
* @uf: The userfaultfd list_head
- * @downgrade: Attempt to downgrade or not
+ * @unlock: Drop the lock on success
*
- * Returns: 0 on success and not downgraded, 1 on success and downgraded.
* unmaps a VMA mapping when the vma iterator is already in position.
* Does not handle alignment.
+ *
+ * Return: 0 on success drops the lock of so directed, error on failure and will
+ * still hold the lock.
*/
int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
- unsigned long start, unsigned long end,
- struct list_head *uf, bool downgrade)
+ unsigned long start, unsigned long end, struct list_head *uf,
+ bool unlock)
{
struct mm_struct *mm = vma->vm_mm;
- int ret;
arch_unmap(mm, start, end);
- ret = do_vmi_align_munmap(vmi, vma, mm, start, end, uf, downgrade);
- validate_mm(mm);
- return ret;
+ return do_vmi_align_munmap(vmi, vma, mm, start, end, uf, unlock);
}
/*
diff --git a/mm/mremap.c b/mm/mremap.c
index fe6b722ae633..11e06e4ab33b 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -715,7 +715,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
}
vma_iter_init(&vmi, mm, old_addr);
- if (do_vmi_munmap(&vmi, mm, old_addr, old_len, uf_unmap, false) < 0) {
+ if (!do_vmi_munmap(&vmi, mm, old_addr, old_len, uf_unmap, false)) {
/* OOM: unable to split vma, just get accounts right */
if (vm_flags & VM_ACCOUNT && !(flags & MREMAP_DONTUNMAP))
vm_acct_memory(old_len >> PAGE_SHIFT);
@@ -913,7 +913,6 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
struct vm_area_struct *vma;
unsigned long ret = -EINVAL;
bool locked = false;
- bool downgraded = false;
struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX;
LIST_HEAD(uf_unmap_early);
LIST_HEAD(uf_unmap);
@@ -999,24 +998,23 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
* Always allow a shrinking remap: that just unmaps
* the unnecessary pages..
* do_vmi_munmap does all the needed commit accounting, and
- * downgrades mmap_lock to read if so directed.
+ * unlocks the mmap_lock if so directed.
*/
if (old_len >= new_len) {
- int retval;
VMA_ITERATOR(vmi, mm, addr + new_len);
- retval = do_vmi_munmap(&vmi, mm, addr + new_len,
- old_len - new_len, &uf_unmap, true);
- /* Returning 1 indicates mmap_lock is downgraded to read. */
- if (retval == 1) {
- downgraded = true;
- } else if (retval < 0 && old_len != new_len) {
- ret = retval;
+ if (old_len == new_len) {
+ ret = addr;
goto out;
}
+ ret = do_vmi_munmap(&vmi, mm, addr + new_len, old_len - new_len,
+ &uf_unmap, true);
+ if (ret)
+ goto out;
+
ret = addr;
- goto out;
+ goto out_unlocked;
}
/*
@@ -1101,12 +1099,10 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
out:
if (offset_in_page(ret))
locked = false;
- if (downgraded)
- mmap_read_unlock(current->mm);
- else
- mmap_write_unlock(current->mm);
+ mmap_write_unlock(current->mm);
if (locked && new_len > old_len)
mm_populate(new_addr + old_len, new_len - old_len);
+out_unlocked:
userfaultfd_unmap_complete(mm, &uf_unmap_early);
mremap_userfaultfd_complete(&uf, addr, ret, old_len);
userfaultfd_unmap_complete(mm, &uf_unmap);
diff --git a/mm/nommu.c b/mm/nommu.c
index 37d0b03143f1..c072a660ec2c 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -631,6 +631,22 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
EXPORT_SYMBOL(find_vma);
/*
+ * At least xtensa ends up having protection faults even with no
+ * MMU.. No stack expansion, at least.
+ */
+struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm,
+ unsigned long addr, struct pt_regs *regs)
+{
+ struct vm_area_struct *vma;
+
+ mmap_read_lock(mm);
+ vma = vma_lookup(mm, addr);
+ if (!vma)
+ mmap_read_unlock(mm);
+ return vma;
+}
+
+/*
* expand a stack to a given address
* - not supported under NOMMU conditions
*/
diff --git a/mm/slab.c b/mm/slab.c
index b7817dcba63e..88194391d553 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1883,14 +1883,12 @@ static bool set_on_slab_cache(struct kmem_cache *cachep,
return true;
}
-/**
+/*
* __kmem_cache_create - Create a cache.
* @cachep: cache management descriptor
* @flags: SLAB flags
*
- * Returns a ptr to the cache on success, NULL on failure.
- * Cannot be called within an int, but can be interrupted.
- * The @ctor is run when new pages are allocated by the cache.
+ * Returns zero on success, nonzero on failure.
*
* The flags are
*
@@ -1903,8 +1901,6 @@ static bool set_on_slab_cache(struct kmem_cache *cachep,
* %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
* cacheline. This can be beneficial if you're counting cycles as closely
* as davem.
- *
- * Return: a pointer to the created cache or %NULL in case of error
*/
int __kmem_cache_create(struct kmem_cache *cachep, slab_flags_t flags)
{
@@ -2355,44 +2351,34 @@ static void cache_init_objs_debug(struct kmem_cache *cachep, struct slab *slab)
#ifdef CONFIG_SLAB_FREELIST_RANDOM
/* Hold information during a freelist initialization */
-union freelist_init_state {
- struct {
- unsigned int pos;
- unsigned int *list;
- unsigned int count;
- };
- struct rnd_state rnd_state;
+struct freelist_init_state {
+ unsigned int pos;
+ unsigned int *list;
+ unsigned int count;
};
/*
* Initialize the state based on the randomization method available.
* return true if the pre-computed list is available, false otherwise.
*/
-static bool freelist_state_initialize(union freelist_init_state *state,
+static bool freelist_state_initialize(struct freelist_init_state *state,
struct kmem_cache *cachep,
unsigned int count)
{
bool ret;
- unsigned int rand;
-
- /* Use best entropy available to define a random shift */
- rand = get_random_u32();
-
- /* Use a random state if the pre-computed list is not available */
if (!cachep->random_seq) {
- prandom_seed_state(&state->rnd_state, rand);
ret = false;
} else {
state->list = cachep->random_seq;
state->count = count;
- state->pos = rand % count;
+ state->pos = get_random_u32_below(count);
ret = true;
}
return ret;
}
/* Get the next entry on the list and randomize it using a random shift */
-static freelist_idx_t next_random_slot(union freelist_init_state *state)
+static freelist_idx_t next_random_slot(struct freelist_init_state *state)
{
if (state->pos >= state->count)
state->pos = 0;
@@ -2413,7 +2399,7 @@ static void swap_free_obj(struct slab *slab, unsigned int a, unsigned int b)
static bool shuffle_freelist(struct kmem_cache *cachep, struct slab *slab)
{
unsigned int objfreelist = 0, i, rand, count = cachep->num;
- union freelist_init_state state;
+ struct freelist_init_state state;
bool precomputed;
if (count < 2)
@@ -2442,8 +2428,7 @@ static bool shuffle_freelist(struct kmem_cache *cachep, struct slab *slab)
/* Fisher-Yates shuffle */
for (i = count - 1; i > 0; i--) {
- rand = prandom_u32_state(&state.rnd_state);
- rand %= (i + 1);
+ rand = get_random_u32_below(i + 1);
swap_free_obj(slab, i, rand);
}
} else {
diff --git a/mm/slab.h b/mm/slab.h
index a59c8e5d2441..6a5633b25eb5 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -332,11 +332,11 @@ static inline bool is_kmalloc_cache(struct kmem_cache *s)
#if defined(CONFIG_SLAB)
#define SLAB_CACHE_FLAGS (SLAB_MEM_SPREAD | SLAB_NOLEAKTRACE | \
SLAB_RECLAIM_ACCOUNT | SLAB_TEMPORARY | \
- SLAB_ACCOUNT)
+ SLAB_ACCOUNT | SLAB_NO_MERGE)
#elif defined(CONFIG_SLUB)
#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \
SLAB_TEMPORARY | SLAB_ACCOUNT | \
- SLAB_NO_USER_FLAGS | SLAB_KMALLOC)
+ SLAB_NO_USER_FLAGS | SLAB_KMALLOC | SLAB_NO_MERGE)
#else
#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE)
#endif
@@ -357,6 +357,7 @@ static inline bool is_kmalloc_cache(struct kmem_cache *s)
SLAB_TEMPORARY | \
SLAB_ACCOUNT | \
SLAB_KMALLOC | \
+ SLAB_NO_MERGE | \
SLAB_NO_USER_FLAGS)
bool __kmem_cache_empty(struct kmem_cache *);
@@ -870,16 +871,8 @@ struct kmem_obj_info {
void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab);
#endif
-#ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR
void __check_heap_object(const void *ptr, unsigned long n,
const struct slab *slab, bool to_user);
-#else
-static inline
-void __check_heap_object(const void *ptr, unsigned long n,
- const struct slab *slab, bool to_user)
-{
-}
-#endif
#ifdef CONFIG_SLUB_DEBUG
void skip_orig_size_check(struct kmem_cache *s, const void *object);
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 43c008165f56..d1555ea2981a 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -49,7 +49,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
*/
#define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
- SLAB_FAILSLAB | kasan_never_merge())
+ SLAB_FAILSLAB | SLAB_NO_MERGE | kasan_never_merge())
#define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
@@ -238,14 +238,12 @@ static struct kmem_cache *create_cache(const char *name,
s->refcount = 1;
list_add(&s->list, &slab_caches);
-out:
- if (err)
- return ERR_PTR(err);
return s;
out_free_cache:
kmem_cache_free(kmem_cache, s);
- goto out;
+out:
+ return ERR_PTR(err);
}
/**
@@ -892,6 +890,13 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
flags |= SLAB_CACHE_DMA;
}
+ /*
+ * If CONFIG_MEMCG_KMEM is enabled, disable cache merging for
+ * KMALLOC_NORMAL caches.
+ */
+ if (IS_ENABLED(CONFIG_MEMCG_KMEM) && (type == KMALLOC_NORMAL))
+ flags |= SLAB_NO_MERGE;
+
if (minalign > ARCH_KMALLOC_MINALIGN) {
aligned_size = ALIGN(aligned_size, minalign);
aligned_idx = __kmalloc_index(aligned_size, false);
@@ -903,13 +908,6 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
aligned_size, flags);
if (idx != aligned_idx)
kmalloc_caches[type][idx] = kmalloc_caches[type][aligned_idx];
-
- /*
- * If CONFIG_MEMCG_KMEM is enabled, disable cache merging for
- * KMALLOC_NORMAL caches.
- */
- if (IS_ENABLED(CONFIG_MEMCG_KMEM) && (type == KMALLOC_NORMAL))
- kmalloc_caches[type][idx]->refcount = -1;
}
/*
@@ -1162,7 +1160,7 @@ EXPORT_SYMBOL(kmalloc_large_node);
#ifdef CONFIG_SLAB_FREELIST_RANDOM
/* Randomize a generic freelist */
-static void freelist_randomize(struct rnd_state *state, unsigned int *list,
+static void freelist_randomize(unsigned int *list,
unsigned int count)
{
unsigned int rand;
@@ -1173,8 +1171,7 @@ static void freelist_randomize(struct rnd_state *state, unsigned int *list,
/* Fisher-Yates shuffle */
for (i = count - 1; i > 0; i--) {
- rand = prandom_u32_state(state);
- rand %= (i + 1);
+ rand = get_random_u32_below(i + 1);
swap(list[i], list[rand]);
}
}
@@ -1183,7 +1180,6 @@ static void freelist_randomize(struct rnd_state *state, unsigned int *list,
int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count,
gfp_t gfp)
{
- struct rnd_state state;
if (count < 2 || cachep->random_seq)
return 0;
@@ -1192,10 +1188,7 @@ int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count,
if (!cachep->random_seq)
return -ENOMEM;
- /* Get best entropy at this stage of boot */
- prandom_seed_state(&state, get_random_long());
-
- freelist_randomize(&state, cachep->random_seq, count);
+ freelist_randomize(cachep->random_seq, count);
return 0;
}
diff --git a/mm/slub.c b/mm/slub.c
index 7529626bbec2..e3b5d5c0eb3a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1365,14 +1365,6 @@ static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct
list_del(&slab->slab_list);
}
-/* Tracking of the number of slabs for debugging purposes */
-static inline unsigned long slabs_node(struct kmem_cache *s, int node)
-{
- struct kmem_cache_node *n = get_node(s, node);
-
- return atomic_long_read(&n->nr_slabs);
-}
-
static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
{
return atomic_long_read(&n->nr_slabs);
@@ -1743,8 +1735,6 @@ slab_flags_t kmem_cache_flags(unsigned int object_size,
#define disable_higher_order_debug 0
-static inline unsigned long slabs_node(struct kmem_cache *s, int node)
- { return 0; }
static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
{ return 0; }
static inline void inc_slabs_node(struct kmem_cache *s, int node,
@@ -4623,7 +4613,7 @@ bool __kmem_cache_empty(struct kmem_cache *s)
struct kmem_cache_node *n;
for_each_kmem_cache_node(s, node, n)
- if (n->nr_partial || slabs_node(s, node))
+ if (n->nr_partial || node_nr_slabs(n))
return false;
return true;
}
@@ -4640,7 +4630,7 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
/* Attempt to free all objects */
for_each_kmem_cache_node(s, node, n) {
free_partial(s, n);
- if (n->nr_partial || slabs_node(s, node))
+ if (n->nr_partial || node_nr_slabs(n))
return 1;
}
return 0;
@@ -4853,7 +4843,7 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s)
list_for_each_entry_safe(slab, t, &discard, slab_list)
free_slab(s, slab);
- if (slabs_node(s, node))
+ if (node_nr_slabs(n))
ret = 1;
}
@@ -5191,9 +5181,9 @@ static int validate_slab_node(struct kmem_cache *s,
validate_slab(s, slab, obj_map);
count++;
}
- if (count != atomic_long_read(&n->nr_slabs)) {
+ if (count != node_nr_slabs(n)) {
pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
- s->name, count, atomic_long_read(&n->nr_slabs));
+ s->name, count, node_nr_slabs(n));
slab_add_kunit_errors();
}
@@ -5477,12 +5467,11 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
for_each_kmem_cache_node(s, node, n) {
if (flags & SO_TOTAL)
- x = atomic_long_read(&n->total_objects);
+ x = node_nr_objs(n);
else if (flags & SO_OBJECTS)
- x = atomic_long_read(&n->total_objects) -
- count_partial(n, count_free);
+ x = node_nr_objs(n) - count_partial(n, count_free);
else
- x = atomic_long_read(&n->nr_slabs);
+ x = node_nr_slabs(n);
total += x;
nodes[node] += x;
}
@@ -5637,12 +5626,6 @@ static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
}
SLAB_ATTR_RO(cpu_slabs);
-static ssize_t objects_show(struct kmem_cache *s, char *buf)
-{
- return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
-}
-SLAB_ATTR_RO(objects);
-
static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
{
return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
@@ -5671,7 +5654,7 @@ static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
objects = (slabs * oo_objects(s->oo)) / 2;
len += sysfs_emit_at(buf, len, "%d(%d)", objects, slabs);
-#if defined(CONFIG_SLUB_CPU_PARTIAL) && defined(CONFIG_SMP)
+#ifdef CONFIG_SLUB_CPU_PARTIAL
for_each_online_cpu(cpu) {
struct slab *slab;
@@ -5737,6 +5720,12 @@ static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
}
SLAB_ATTR_RO(total_objects);
+static ssize_t objects_show(struct kmem_cache *s, char *buf)
+{
+ return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
+}
+SLAB_ATTR_RO(objects);
+
static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
{
return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS));
@@ -5968,7 +5957,6 @@ static struct attribute *slab_attrs[] = {
&order_attr.attr,
&min_partial_attr.attr,
&cpu_partial_attr.attr,
- &objects_attr.attr,
&objects_partial_attr.attr,
&partial_attr.attr,
&cpu_slabs_attr.attr,
@@ -5982,6 +5970,7 @@ static struct attribute *slab_attrs[] = {
&slabs_cpu_partial_attr.attr,
#ifdef CONFIG_SLUB_DEBUG
&total_objects_attr.attr,
+ &objects_attr.attr,
&slabs_attr.attr,
&sanity_checks_attr.attr,
&trace_attr.attr,
@@ -6249,7 +6238,7 @@ static int __init slab_sysfs_init(void)
if (!slab_kset) {
mutex_unlock(&slab_mutex);
pr_err("Cannot register slab subsystem.\n");
- return -ENOSYS;
+ return -ENOMEM;
}
slab_state = FULL;
@@ -6421,7 +6410,7 @@ static int slab_debug_trace_open(struct inode *inode, struct file *filep)
unsigned long flags;
struct slab *slab;
- if (!atomic_long_read(&n->nr_slabs))
+ if (!node_nr_slabs(n))
continue;
spin_lock_irqsave(&n->list_lock, flags);