From 8e7f37f2aaa56b723a24f6872817cf9c6410b613 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 7 Dec 2020 17:41:02 -0800 Subject: mm: Add mem_dump_obj() to print source of memory block There are kernel facilities such as per-CPU reference counts that give error messages in generic handlers or callbacks, whose messages are unenlightening. In the case of per-CPU reference-count underflow, this is not a problem when creating a new use of this facility because in that case the bug is almost certainly in the code implementing that new use. However, trouble arises when deploying across many systems, which might exercise corner cases that were not seen during development and testing. Here, it would be really nice to get some kind of hint as to which of several uses the underflow was caused by. This commit therefore exposes a mem_dump_obj() function that takes a pointer to memory (which must still be allocated if it has been dynamically allocated) and prints available information on where that memory came from. This pointer can reference the middle of the block as well as the beginning of the block, as needed by things like RCU callback functions and timer handlers that might not know where the beginning of the memory block is. These functions and handlers can use mem_dump_obj() to print out better hints as to where the problem might lie. The information printed can depend on kernel configuration. For example, the allocation return address can be printed only for slab and slub, and even then only when the necessary debug has been enabled. For slab, build with CONFIG_DEBUG_SLAB=y, and either use sizes with ample space to the next power of two or use the SLAB_STORE_USER when creating the kmem_cache structure. For slub, build with CONFIG_SLUB_DEBUG=y and boot with slub_debug=U, or pass SLAB_STORE_USER to kmem_cache_create() if more focused use is desired. Also for slub, use CONFIG_STACKTRACE to enable printing of the allocation-time stack trace. Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Andrew Morton Cc: Reported-by: Andrii Nakryiko [ paulmck: Convert to printing and change names per Joonsoo Kim. ] [ paulmck: Move slab definition per Stephen Rothwell and kbuild test robot. ] [ paulmck: Handle CONFIG_MMU=n case where vmalloc() is kmalloc(). ] [ paulmck: Apply Vlastimil Babka feedback on slab.c kmem_provenance(). ] [ paulmck: Extract more info from !SLUB_DEBUG per Joonsoo Kim. ] [ paulmck: Explicitly check for small pointers per Naresh Kamboju. ] Acked-by: Joonsoo Kim Acked-by: Vlastimil Babka Tested-by: Naresh Kamboju Signed-off-by: Paul E. McKenney --- mm/slab.c | 20 +++++++++++++++ mm/slab.h | 12 +++++++++ mm/slab_common.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ mm/slob.c | 6 +++++ mm/slub.c | 40 ++++++++++++++++++++++++++++++ mm/util.c | 24 ++++++++++++++++++ 6 files changed, 177 insertions(+) (limited to 'mm') diff --git a/mm/slab.c b/mm/slab.c index d7c8da9319c7..dcc55e78f353 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3635,6 +3635,26 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t flags, EXPORT_SYMBOL(__kmalloc_node_track_caller); #endif /* CONFIG_NUMA */ +void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page) +{ + struct kmem_cache *cachep; + unsigned int objnr; + void *objp; + + kpp->kp_ptr = object; + kpp->kp_page = page; + cachep = page->slab_cache; + kpp->kp_slab_cache = cachep; + objp = object - obj_offset(cachep); + kpp->kp_data_offset = obj_offset(cachep); + page = virt_to_head_page(objp); + objnr = obj_to_index(cachep, page, objp); + objp = index_to_obj(cachep, page, objnr); + kpp->kp_objp = objp; + if (DEBUG && cachep->flags & SLAB_STORE_USER) + kpp->kp_ret = *dbg_userword(cachep, objp); +} + /** * __do_kmalloc - allocate memory * @size: how many bytes of memory are required. diff --git a/mm/slab.h b/mm/slab.h index 1a756a359fa8..ecad9b57bc44 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -615,4 +615,16 @@ static inline bool slab_want_init_on_free(struct kmem_cache *c) return false; } +#define KS_ADDRS_COUNT 16 +struct kmem_obj_info { + void *kp_ptr; + struct page *kp_page; + void *kp_objp; + unsigned long kp_data_offset; + struct kmem_cache *kp_slab_cache; + void *kp_ret; + void *kp_stack[KS_ADDRS_COUNT]; +}; +void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page); + #endif /* MM_SLAB_H */ diff --git a/mm/slab_common.c b/mm/slab_common.c index e981c80d216c..adbace4256ef 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -537,6 +537,81 @@ bool slab_is_available(void) return slab_state >= UP; } +/** + * kmem_valid_obj - does the pointer reference a valid slab object? + * @object: pointer to query. + * + * Return: %true if the pointer is to a not-yet-freed object from + * kmalloc() or kmem_cache_alloc(), either %true or %false if the pointer + * is to an already-freed object, and %false otherwise. + */ +bool kmem_valid_obj(void *object) +{ + struct page *page; + + /* Some arches consider ZERO_SIZE_PTR to be a valid address. */ + if (object < (void *)PAGE_SIZE || !virt_addr_valid(object)) + return false; + page = virt_to_head_page(object); + return PageSlab(page); +} + +/** + * kmem_dump_obj - Print available slab provenance information + * @object: slab object for which to find provenance information. + * + * This function uses pr_cont(), so that the caller is expected to have + * printed out whatever preamble is appropriate. The provenance information + * depends on the type of object and on how much debugging is enabled. + * For a slab-cache object, the fact that it is a slab object is printed, + * and, if available, the slab name, return address, and stack trace from + * the allocation of that object. + * + * This function will splat if passed a pointer to a non-slab object. + * If you are not sure what type of object you have, you should instead + * use mem_dump_obj(). + */ +void kmem_dump_obj(void *object) +{ + char *cp = IS_ENABLED(CONFIG_MMU) ? "" : "/vmalloc"; + int i; + struct page *page; + unsigned long ptroffset; + struct kmem_obj_info kp = { }; + + if (WARN_ON_ONCE(!virt_addr_valid(object))) + return; + page = virt_to_head_page(object); + if (WARN_ON_ONCE(!PageSlab(page))) { + pr_cont(" non-slab memory.\n"); + return; + } + kmem_obj_info(&kp, object, page); + if (kp.kp_slab_cache) + pr_cont(" slab%s %s", cp, kp.kp_slab_cache->name); + else + pr_cont(" slab%s", cp); + if (kp.kp_objp) + pr_cont(" start %px", kp.kp_objp); + if (kp.kp_data_offset) + pr_cont(" data offset %lu", kp.kp_data_offset); + if (kp.kp_objp) { + ptroffset = ((char *)object - (char *)kp.kp_objp) - kp.kp_data_offset; + pr_cont(" pointer offset %lu", ptroffset); + } + if (kp.kp_slab_cache && kp.kp_slab_cache->usersize) + pr_cont(" size %u", kp.kp_slab_cache->usersize); + if (kp.kp_ret) + pr_cont(" allocated at %pS\n", kp.kp_ret); + else + pr_cont("\n"); + for (i = 0; i < ARRAY_SIZE(kp.kp_stack); i++) { + if (!kp.kp_stack[i]) + break; + pr_info(" %pS\n", kp.kp_stack[i]); + } +} + #ifndef CONFIG_SLOB /* Create a cache during boot when no slab services are available yet */ void __init create_boot_cache(struct kmem_cache *s, const char *name, diff --git a/mm/slob.c b/mm/slob.c index 8d4bfa46247f..ef87ada8705d 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -461,6 +461,12 @@ out: spin_unlock_irqrestore(&slob_lock, flags); } +void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page) +{ + kpp->kp_ptr = object; + kpp->kp_page = page; +} + /* * End of slob allocator proper. Begin kmem_cache_alloc and kmalloc frontend. */ diff --git a/mm/slub.c b/mm/slub.c index 0c8b43a5b3b0..3c1a84316fd7 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3919,6 +3919,46 @@ int __kmem_cache_shutdown(struct kmem_cache *s) return 0; } +void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page) +{ + void *base; + int __maybe_unused i; + unsigned int objnr; + void *objp; + void *objp0; + struct kmem_cache *s = page->slab_cache; + struct track __maybe_unused *trackp; + + kpp->kp_ptr = object; + kpp->kp_page = page; + kpp->kp_slab_cache = s; + base = page_address(page); + objp0 = kasan_reset_tag(object); +#ifdef CONFIG_SLUB_DEBUG + objp = restore_red_left(s, objp0); +#else + objp = objp0; +#endif + objnr = obj_to_index(s, page, objp); + kpp->kp_data_offset = (unsigned long)((char *)objp0 - (char *)objp); + objp = base + s->size * objnr; + kpp->kp_objp = objp; + if (WARN_ON_ONCE(objp < base || objp >= base + page->objects * s->size || (objp - base) % s->size) || + !(s->flags & SLAB_STORE_USER)) + return; +#ifdef CONFIG_SLUB_DEBUG + trackp = get_track(s, objp, TRACK_ALLOC); + kpp->kp_ret = (void *)trackp->addr; +#ifdef CONFIG_STACKTRACE + for (i = 0; i < KS_ADDRS_COUNT && i < TRACK_ADDRS_COUNT; i++) { + kpp->kp_stack[i] = (void *)trackp->addrs[i]; + if (!kpp->kp_stack[i]) + break; + } +#endif +#endif +} + /******************************************************************** * Kmalloc subsystem *******************************************************************/ diff --git a/mm/util.c b/mm/util.c index 8c9b7d1e7c49..da46f9d6c382 100644 --- a/mm/util.c +++ b/mm/util.c @@ -982,3 +982,27 @@ int __weak memcmp_pages(struct page *page1, struct page *page2) kunmap_atomic(addr1); return ret; } + +/** + * mem_dump_obj - Print available provenance information + * @object: object for which to find provenance information. + * + * This function uses pr_cont(), so that the caller is expected to have + * printed out whatever preamble is appropriate. The provenance information + * depends on the type of object and on how much debugging is enabled. + * For example, for a slab-cache object, the slab name is printed, and, + * if available, the return address and stack trace from the allocation + * of that object. + */ +void mem_dump_obj(void *object) +{ + if (!virt_addr_valid(object)) { + pr_cont(" non-paged (local) memory.\n"); + return; + } + if (kmem_valid_obj(object)) { + kmem_dump_obj(object); + return; + } + pr_cont(" non-slab memory.\n"); +} -- cgit v1.2.3-70-g09d2 From b70fa3b12fc8d2b870d1ac7fd44da89271eb8705 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 8 Dec 2020 15:26:22 -0800 Subject: mm: Make mem_dump_obj() handle NULL and zero-sized pointers This commit makes mem_dump_obj() call out NULL and zero-sized pointers specially instead of classifying them as non-paged memory. Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Andrew Morton Cc: Reported-by: Andrii Nakryiko Acked-by: Vlastimil Babka Tested-by: Naresh Kamboju Signed-off-by: Paul E. McKenney --- mm/util.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/util.c b/mm/util.c index da46f9d6c382..92f23d2c1277 100644 --- a/mm/util.c +++ b/mm/util.c @@ -997,7 +997,12 @@ int __weak memcmp_pages(struct page *page1, struct page *page2) void mem_dump_obj(void *object) { if (!virt_addr_valid(object)) { - pr_cont(" non-paged (local) memory.\n"); + if (object == NULL) + pr_cont(" NULL pointer.\n"); + else if (object == ZERO_SIZE_PTR) + pr_cont(" zero-size pointer.\n"); + else + pr_cont(" non-paged (local) memory.\n"); return; } if (kmem_valid_obj(object)) { -- cgit v1.2.3-70-g09d2 From 98f180837a896ecedf8f7e12af22b57f271d43c9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 8 Dec 2020 16:13:57 -0800 Subject: mm: Make mem_dump_obj() handle vmalloc() memory This commit adds vmalloc() support to mem_dump_obj(). Note that the vmalloc_dump_obj() function combines the checking and dumping, in contrast with the split between kmem_valid_obj() and kmem_dump_obj(). The reason for the difference is that the checking in the vmalloc() case involves acquiring a global lock, and redundant acquisitions of global locks should be avoided, even on not-so-fast paths. Note that this change causes on-stack variables to be reported as vmalloc() storage from kernel_clone() or similar, depending on the degree of inlining that your compiler does. This is likely more helpful than the earlier "non-paged (local) memory". Cc: Andrew Morton Cc: Joonsoo Kim Cc: Reported-by: Andrii Nakryiko Acked-by: Vlastimil Babka Tested-by: Naresh Kamboju Signed-off-by: Paul E. McKenney --- include/linux/vmalloc.h | 6 ++++++ mm/util.c | 14 ++++++++------ mm/vmalloc.c | 12 ++++++++++++ 3 files changed, 26 insertions(+), 6 deletions(-) (limited to 'mm') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 80c0181c411d..c18f4751a704 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -246,4 +246,10 @@ pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) int register_vmap_purge_notifier(struct notifier_block *nb); int unregister_vmap_purge_notifier(struct notifier_block *nb); +#ifdef CONFIG_MMU +bool vmalloc_dump_obj(void *object); +#else +static inline bool vmalloc_dump_obj(void *object) { return false; } +#endif + #endif /* _LINUX_VMALLOC_H */ diff --git a/mm/util.c b/mm/util.c index 92f23d2c1277..54870226cea6 100644 --- a/mm/util.c +++ b/mm/util.c @@ -996,18 +996,20 @@ int __weak memcmp_pages(struct page *page1, struct page *page2) */ void mem_dump_obj(void *object) { + if (kmem_valid_obj(object)) { + kmem_dump_obj(object); + return; + } + if (vmalloc_dump_obj(object)) + return; if (!virt_addr_valid(object)) { if (object == NULL) pr_cont(" NULL pointer.\n"); else if (object == ZERO_SIZE_PTR) pr_cont(" zero-size pointer.\n"); else - pr_cont(" non-paged (local) memory.\n"); - return; - } - if (kmem_valid_obj(object)) { - kmem_dump_obj(object); + pr_cont(" non-paged memory.\n"); return; } - pr_cont(" non-slab memory.\n"); + pr_cont(" non-slab/vmalloc memory.\n"); } diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 4d88fe5a277a..c274ea4f14ea 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3448,6 +3448,18 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) } #endif /* CONFIG_SMP */ +bool vmalloc_dump_obj(void *object) +{ + struct vm_struct *vm; + void *objp = (void *)PAGE_ALIGN((unsigned long)object); + + vm = find_vm_area(objp); + if (!vm) + return false; + pr_cont(" vmalloc allocated at %pS\n", vm->caller); + return true; +} + #ifdef CONFIG_PROC_FS static void *s_start(struct seq_file *m, loff_t *pos) __acquires(&vmap_purge_lock) -- cgit v1.2.3-70-g09d2 From bd34dcd4120d7e358baac9c22ef1321bd0c22079 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 9 Dec 2020 15:15:27 -0800 Subject: mm: Make mem_obj_dump() vmalloc() dumps include start and length This commit adds the starting address and number of pages to the vmalloc() information dumped by way of vmalloc_dump_obj(). Cc: Andrew Morton Cc: Joonsoo Kim Cc: Reported-by: Andrii Nakryiko Suggested-by: Vlastimil Babka Acked-by: Vlastimil Babka Tested-by: Naresh Kamboju Signed-off-by: Paul E. McKenney --- mm/vmalloc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index c274ea4f14ea..e3229ff627ea 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3456,7 +3456,8 @@ bool vmalloc_dump_obj(void *object) vm = find_vm_area(objp); if (!vm) return false; - pr_cont(" vmalloc allocated at %pS\n", vm->caller); + pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n", + vm->nr_pages, (unsigned long)vm->addr, vm->caller); return true; } -- cgit v1.2.3-70-g09d2