diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig.debug | 1 | ||||
-rw-r--r-- | lib/alloc_tag.c | 515 | ||||
-rw-r--r-- | lib/codetag.c | 104 | ||||
-rw-r--r-- | lib/maple_tree.c | 220 | ||||
-rw-r--r-- | lib/percpu_test.c | 11 |
5 files changed, 695 insertions, 156 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 7312ae7c3cc5..6798bbbcbd32 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -993,6 +993,7 @@ config CODE_TAGGING config MEM_ALLOC_PROFILING bool "Enable memory allocation profiling" default n + depends on MMU depends on PROC_FS depends on !DEBUG_FORCE_WEAK_PER_CPU select CODE_TAGGING diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index 81e5f9a70f22..2414a7ee7ec7 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -1,12 +1,26 @@ // SPDX-License-Identifier: GPL-2.0-only #include <linux/alloc_tag.h> +#include <linux/execmem.h> #include <linux/fs.h> #include <linux/gfp.h> +#include <linux/kallsyms.h> #include <linux/module.h> #include <linux/page_ext.h> #include <linux/proc_fs.h> #include <linux/seq_buf.h> #include <linux/seq_file.h> +#include <linux/vmalloc.h> + +#define ALLOCINFO_FILE_NAME "allocinfo" +#define MODULE_ALLOC_TAG_VMAP_SIZE (100000UL * sizeof(struct alloc_tag)) +#define SECTION_START(NAME) (CODETAG_SECTION_START_PREFIX NAME) +#define SECTION_STOP(NAME) (CODETAG_SECTION_STOP_PREFIX NAME) + +#ifdef CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT +static bool mem_profiling_support = true; +#else +static bool mem_profiling_support; +#endif static struct codetag_type *alloc_tag_cttype; @@ -15,6 +29,11 @@ EXPORT_SYMBOL(_shared_alloc_tag); DEFINE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT, mem_alloc_profiling_key); +DEFINE_STATIC_KEY_FALSE(mem_profiling_compressed); + +struct alloc_tag_kernel_section kernel_tags = { NULL, 0 }; +unsigned long alloc_tag_ref_mask; +int alloc_tag_ref_offs; struct allocinfo_private { struct codetag_iterator iter; @@ -144,44 +163,450 @@ size_t alloc_tag_top_users(struct codetag_bytes *tags, size_t count, bool can_sl return nr; } +void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) +{ + int i; + struct alloc_tag *tag; + unsigned int nr_pages = 1 << new_order; + + if (!mem_alloc_profiling_enabled()) + return; + + tag = pgalloc_tag_get(&folio->page); + if (!tag) + return; + + for (i = nr_pages; i < (1 << old_order); i += nr_pages) { + union pgtag_ref_handle handle; + union codetag_ref ref; + + if (get_page_tag_ref(folio_page(folio, i), &ref, &handle)) { + /* Set new reference to point to the original tag */ + alloc_tag_ref_set(&ref, tag); + update_page_tag_ref(handle, &ref); + put_page_tag_ref(handle); + } + } +} + +void pgalloc_tag_copy(struct folio *new, struct folio *old) +{ + union pgtag_ref_handle handle; + union codetag_ref ref; + struct alloc_tag *tag; + + tag = pgalloc_tag_get(&old->page); + if (!tag) + return; + + if (!get_page_tag_ref(&new->page, &ref, &handle)) + return; + + /* Clear the old ref to the original allocation tag. */ + clear_page_tag_ref(&old->page); + /* Decrement the counters of the tag on get_new_folio. */ + alloc_tag_sub(&ref, folio_size(new)); + __alloc_tag_ref_set(&ref, tag); + update_page_tag_ref(handle, &ref); + put_page_tag_ref(handle); +} + +static void shutdown_mem_profiling(bool remove_file) +{ + if (mem_alloc_profiling_enabled()) + static_branch_disable(&mem_alloc_profiling_key); + + if (!mem_profiling_support) + return; + + if (remove_file) + remove_proc_entry(ALLOCINFO_FILE_NAME, NULL); + mem_profiling_support = false; +} + static void __init procfs_init(void) { - proc_create_seq("allocinfo", 0400, NULL, &allocinfo_seq_op); + if (!mem_profiling_support) + return; + + if (!proc_create_seq(ALLOCINFO_FILE_NAME, 0400, NULL, &allocinfo_seq_op)) { + pr_err("Failed to create %s file\n", ALLOCINFO_FILE_NAME); + shutdown_mem_profiling(false); + } +} + +void __init alloc_tag_sec_init(void) +{ + struct alloc_tag *last_codetag; + + if (!mem_profiling_support) + return; + + if (!static_key_enabled(&mem_profiling_compressed)) + return; + + kernel_tags.first_tag = (struct alloc_tag *)kallsyms_lookup_name( + SECTION_START(ALLOC_TAG_SECTION_NAME)); + last_codetag = (struct alloc_tag *)kallsyms_lookup_name( + SECTION_STOP(ALLOC_TAG_SECTION_NAME)); + kernel_tags.count = last_codetag - kernel_tags.first_tag; + + /* Check if kernel tags fit into page flags */ + if (kernel_tags.count > (1UL << NR_UNUSED_PAGEFLAG_BITS)) { + shutdown_mem_profiling(false); /* allocinfo file does not exist yet */ + pr_err("%lu allocation tags cannot be references using %d available page flag bits. Memory allocation profiling is disabled!\n", + kernel_tags.count, NR_UNUSED_PAGEFLAG_BITS); + return; + } + + alloc_tag_ref_offs = (LRU_REFS_PGOFF - NR_UNUSED_PAGEFLAG_BITS); + alloc_tag_ref_mask = ((1UL << NR_UNUSED_PAGEFLAG_BITS) - 1); + pr_debug("Memory allocation profiling compression is using %d page flag bits!\n", + NR_UNUSED_PAGEFLAG_BITS); +} + +#ifdef CONFIG_MODULES + +static struct maple_tree mod_area_mt = MTREE_INIT(mod_area_mt, MT_FLAGS_ALLOC_RANGE); +static struct vm_struct *vm_module_tags; +/* A dummy object used to indicate an unloaded module */ +static struct module unloaded_mod; +/* A dummy object used to indicate a module prepended area */ +static struct module prepend_mod; + +struct alloc_tag_module_section module_tags; + +static inline unsigned long alloc_tag_align(unsigned long val) +{ + if (!static_key_enabled(&mem_profiling_compressed)) { + /* No alignment requirements when we are not indexing the tags */ + return val; + } + + if (val % sizeof(struct alloc_tag) == 0) + return val; + return ((val / sizeof(struct alloc_tag)) + 1) * sizeof(struct alloc_tag); } -static bool alloc_tag_module_unload(struct codetag_type *cttype, - struct codetag_module *cmod) +static bool ensure_alignment(unsigned long align, unsigned int *prepend) { - struct codetag_iterator iter = codetag_get_ct_iter(cttype); - struct alloc_tag_counters counter; - bool module_unused = true; + if (!static_key_enabled(&mem_profiling_compressed)) { + /* No alignment requirements when we are not indexing the tags */ + return true; + } + + /* + * If alloc_tag size is not a multiple of required alignment, tag + * indexing does not work. + */ + if (!IS_ALIGNED(sizeof(struct alloc_tag), align)) + return false; + + /* Ensure prepend consumes multiple of alloc_tag-sized blocks */ + if (*prepend) + *prepend = alloc_tag_align(*prepend); + + return true; +} + +static inline bool tags_addressable(void) +{ + unsigned long tag_idx_count; + + if (!static_key_enabled(&mem_profiling_compressed)) + return true; /* with page_ext tags are always addressable */ + + tag_idx_count = CODETAG_ID_FIRST + kernel_tags.count + + module_tags.size / sizeof(struct alloc_tag); + + return tag_idx_count < (1UL << NR_UNUSED_PAGEFLAG_BITS); +} + +static bool needs_section_mem(struct module *mod, unsigned long size) +{ + if (!mem_profiling_support) + return false; + + return size >= sizeof(struct alloc_tag); +} + +static struct alloc_tag *find_used_tag(struct alloc_tag *from, struct alloc_tag *to) +{ + while (from <= to) { + struct alloc_tag_counters counter; + + counter = alloc_tag_read(from); + if (counter.bytes) + return from; + from++; + } + + return NULL; +} + +/* Called with mod_area_mt locked */ +static void clean_unused_module_areas_locked(void) +{ + MA_STATE(mas, &mod_area_mt, 0, module_tags.size); + struct module *val; + + mas_for_each(&mas, val, module_tags.size) { + if (val != &unloaded_mod) + continue; + + /* Release area if all tags are unused */ + if (!find_used_tag((struct alloc_tag *)(module_tags.start_addr + mas.index), + (struct alloc_tag *)(module_tags.start_addr + mas.last))) + mas_erase(&mas); + } +} + +/* Called with mod_area_mt locked */ +static bool find_aligned_area(struct ma_state *mas, unsigned long section_size, + unsigned long size, unsigned int prepend, unsigned long align) +{ + bool cleanup_done = false; + +repeat: + /* Try finding exact size and hope the start is aligned */ + if (!mas_empty_area(mas, 0, section_size - 1, prepend + size)) { + if (IS_ALIGNED(mas->index + prepend, align)) + return true; + + /* Try finding larger area to align later */ + mas_reset(mas); + if (!mas_empty_area(mas, 0, section_size - 1, + size + prepend + align - 1)) + return true; + } + + /* No free area, try cleanup stale data and repeat the search once */ + if (!cleanup_done) { + clean_unused_module_areas_locked(); + cleanup_done = true; + mas_reset(mas); + goto repeat; + } + + return false; +} + +static int vm_module_tags_populate(void) +{ + unsigned long phys_size = vm_module_tags->nr_pages << PAGE_SHIFT; + + if (phys_size < module_tags.size) { + struct page **next_page = vm_module_tags->pages + vm_module_tags->nr_pages; + unsigned long addr = module_tags.start_addr + phys_size; + unsigned long more_pages; + unsigned long nr; + + more_pages = ALIGN(module_tags.size - phys_size, PAGE_SIZE) >> PAGE_SHIFT; + nr = alloc_pages_bulk_array_node(GFP_KERNEL | __GFP_NOWARN, + NUMA_NO_NODE, more_pages, next_page); + if (nr < more_pages || + vmap_pages_range(addr, addr + (nr << PAGE_SHIFT), PAGE_KERNEL, + next_page, PAGE_SHIFT) < 0) { + /* Clean up and error out */ + for (int i = 0; i < nr; i++) + __free_page(next_page[i]); + return -ENOMEM; + } + vm_module_tags->nr_pages += nr; + } + + return 0; +} + +static void *reserve_module_tags(struct module *mod, unsigned long size, + unsigned int prepend, unsigned long align) +{ + unsigned long section_size = module_tags.end_addr - module_tags.start_addr; + MA_STATE(mas, &mod_area_mt, 0, section_size - 1); + unsigned long offset; + void *ret = NULL; + + /* If no tags return error */ + if (size < sizeof(struct alloc_tag)) + return ERR_PTR(-EINVAL); + + /* + * align is always power of 2, so we can use IS_ALIGNED and ALIGN. + * align 0 or 1 means no alignment, to simplify set to 1. + */ + if (!align) + align = 1; + + if (!ensure_alignment(align, &prepend)) { + shutdown_mem_profiling(true); + pr_err("%s: alignment %lu is incompatible with allocation tag indexing. Memory allocation profiling is disabled!\n", + mod->name, align); + return ERR_PTR(-EINVAL); + } + + mas_lock(&mas); + if (!find_aligned_area(&mas, section_size, size, prepend, align)) { + ret = ERR_PTR(-ENOMEM); + goto unlock; + } + + /* Mark found area as reserved */ + offset = mas.index; + offset += prepend; + offset = ALIGN(offset, align); + if (offset != mas.index) { + unsigned long pad_start = mas.index; + + mas.last = offset - 1; + mas_store(&mas, &prepend_mod); + if (mas_is_err(&mas)) { + ret = ERR_PTR(xa_err(mas.node)); + goto unlock; + } + mas.index = offset; + mas.last = offset + size - 1; + mas_store(&mas, mod); + if (mas_is_err(&mas)) { + mas.index = pad_start; + mas_erase(&mas); + ret = ERR_PTR(xa_err(mas.node)); + } + } else { + mas.last = offset + size - 1; + mas_store(&mas, mod); + if (mas_is_err(&mas)) + ret = ERR_PTR(xa_err(mas.node)); + } +unlock: + mas_unlock(&mas); + + if (IS_ERR(ret)) + return ret; + + if (module_tags.size < offset + size) { + int grow_res; + + module_tags.size = offset + size; + if (mem_alloc_profiling_enabled() && !tags_addressable()) { + shutdown_mem_profiling(true); + pr_warn("With module %s there are too many tags to fit in %d page flag bits. Memory allocation profiling is disabled!\n", + mod->name, NR_UNUSED_PAGEFLAG_BITS); + } + + grow_res = vm_module_tags_populate(); + if (grow_res) { + shutdown_mem_profiling(true); + pr_err("Failed to allocate memory for allocation tags in the module %s. Memory allocation profiling is disabled!\n", + mod->name); + return ERR_PTR(grow_res); + } + } + + return (struct alloc_tag *)(module_tags.start_addr + offset); +} + +static void release_module_tags(struct module *mod, bool used) +{ + MA_STATE(mas, &mod_area_mt, module_tags.size, module_tags.size); struct alloc_tag *tag; - struct codetag *ct; + struct module *val; + + mas_lock(&mas); + mas_for_each_rev(&mas, val, 0) + if (val == mod) + break; + + if (!val) /* module not found */ + goto out; + + if (!used) + goto release_area; + + /* Find out if the area is used */ + tag = find_used_tag((struct alloc_tag *)(module_tags.start_addr + mas.index), + (struct alloc_tag *)(module_tags.start_addr + mas.last)); + if (tag) { + struct alloc_tag_counters counter = alloc_tag_read(tag); + + pr_info("%s:%u module %s func:%s has %llu allocated at module unload\n", + tag->ct.filename, tag->ct.lineno, tag->ct.modname, + tag->ct.function, counter.bytes); + } else { + used = false; + } +release_area: + mas_store(&mas, used ? &unloaded_mod : NULL); + val = mas_prev_range(&mas, 0); + if (val == &prepend_mod) + mas_store(&mas, NULL); +out: + mas_unlock(&mas); +} - for (ct = codetag_next_ct(&iter); ct; ct = codetag_next_ct(&iter)) { - if (iter.cmod != cmod) +static void replace_module(struct module *mod, struct module *new_mod) +{ + MA_STATE(mas, &mod_area_mt, 0, module_tags.size); + struct module *val; + + mas_lock(&mas); + mas_for_each(&mas, val, module_tags.size) { + if (val != mod) continue; - tag = ct_to_alloc_tag(ct); - counter = alloc_tag_read(tag); + mas_store_gfp(&mas, new_mod, GFP_KERNEL); + break; + } + mas_unlock(&mas); +} - if (WARN(counter.bytes, - "%s:%u module %s func:%s has %llu allocated at module unload", - ct->filename, ct->lineno, ct->modname, ct->function, counter.bytes)) - module_unused = false; +static int __init alloc_mod_tags_mem(void) +{ + /* Map space to copy allocation tags */ + vm_module_tags = execmem_vmap(MODULE_ALLOC_TAG_VMAP_SIZE); + if (!vm_module_tags) { + pr_err("Failed to map %lu bytes for module allocation tags\n", + MODULE_ALLOC_TAG_VMAP_SIZE); + module_tags.start_addr = 0; + return -ENOMEM; } - return module_unused; + vm_module_tags->pages = kmalloc_array(get_vm_area_size(vm_module_tags) >> PAGE_SHIFT, + sizeof(struct page *), GFP_KERNEL | __GFP_ZERO); + if (!vm_module_tags->pages) { + free_vm_area(vm_module_tags); + return -ENOMEM; + } + + module_tags.start_addr = (unsigned long)vm_module_tags->addr; + module_tags.end_addr = module_tags.start_addr + MODULE_ALLOC_TAG_VMAP_SIZE; + /* Ensure the base is alloc_tag aligned when required for indexing */ + module_tags.start_addr = alloc_tag_align(module_tags.start_addr); + + return 0; +} + +static void __init free_mod_tags_mem(void) +{ + int i; + + module_tags.start_addr = 0; + for (i = 0; i < vm_module_tags->nr_pages; i++) + __free_page(vm_module_tags->pages[i]); + kfree(vm_module_tags->pages); + free_vm_area(vm_module_tags); } -#ifdef CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT -static bool mem_profiling_support __meminitdata = true; -#else -static bool mem_profiling_support __meminitdata; -#endif +#else /* CONFIG_MODULES */ + +static inline int alloc_mod_tags_mem(void) { return 0; } +static inline void free_mod_tags_mem(void) {} + +#endif /* CONFIG_MODULES */ +/* See: Documentation/mm/allocation-profiling.rst */ static int __init setup_early_mem_profiling(char *str) { + bool compressed = false; bool enable; if (!str || !str[0]) @@ -190,22 +615,37 @@ static int __init setup_early_mem_profiling(char *str) if (!strncmp(str, "never", 5)) { enable = false; mem_profiling_support = false; + pr_info("Memory allocation profiling is disabled!\n"); } else { - int res; + char *token = strsep(&str, ","); - res = kstrtobool(str, &enable); - if (res) - return res; + if (kstrtobool(token, &enable)) + return -EINVAL; + if (str) { + + if (strcmp(str, "compressed")) + return -EINVAL; + + compressed = true; + } mem_profiling_support = true; + pr_info("Memory allocation profiling is enabled %s compression and is turned %s!\n", + compressed ? "with" : "without", enable ? "on" : "off"); } - if (enable != static_key_enabled(&mem_alloc_profiling_key)) { + if (enable != mem_alloc_profiling_enabled()) { if (enable) static_branch_enable(&mem_alloc_profiling_key); else static_branch_disable(&mem_alloc_profiling_key); } + if (compressed != static_key_enabled(&mem_profiling_compressed)) { + if (compressed) + static_branch_enable(&mem_profiling_compressed); + else + static_branch_disable(&mem_profiling_compressed); + } return 0; } @@ -213,6 +653,9 @@ early_param("sysctl.vm.mem_profiling", setup_early_mem_profiling); static __init bool need_page_alloc_tagging(void) { + if (static_key_enabled(&mem_profiling_compressed)) + return false; + return mem_profiling_support; } @@ -255,14 +698,26 @@ static inline void sysctl_init(void) {} static int __init alloc_tag_init(void) { const struct codetag_type_desc desc = { - .section = "alloc_tags", - .tag_size = sizeof(struct alloc_tag), - .module_unload = alloc_tag_module_unload, + .section = ALLOC_TAG_SECTION_NAME, + .tag_size = sizeof(struct alloc_tag), +#ifdef CONFIG_MODULES + .needs_section_mem = needs_section_mem, + .alloc_section_mem = reserve_module_tags, + .free_section_mem = release_module_tags, + .module_replaced = replace_module, +#endif }; + int res; + + res = alloc_mod_tags_mem(); + if (res) + return res; alloc_tag_cttype = codetag_register_type(&desc); - if (IS_ERR(alloc_tag_cttype)) + if (IS_ERR(alloc_tag_cttype)) { + free_mod_tags_mem(); return PTR_ERR(alloc_tag_cttype); + } sysctl_init(); procfs_init(); diff --git a/lib/codetag.c b/lib/codetag.c index d1fbbb7c2ec3..42aadd6c1454 100644 --- a/lib/codetag.c +++ b/lib/codetag.c @@ -149,8 +149,8 @@ static struct codetag_range get_section_range(struct module *mod, const char *section) { return (struct codetag_range) { - get_symbol(mod, "__start_", section), - get_symbol(mod, "__stop_", section), + get_symbol(mod, CODETAG_SECTION_START_PREFIX, section), + get_symbol(mod, CODETAG_SECTION_STOP_PREFIX, section), }; } @@ -207,6 +207,94 @@ static int codetag_module_init(struct codetag_type *cttype, struct module *mod) } #ifdef CONFIG_MODULES +#define CODETAG_SECTION_PREFIX ".codetag." + +/* Some codetag types need a separate module section */ +bool codetag_needs_module_section(struct module *mod, const char *name, + unsigned long size) +{ + const char *type_name; + struct codetag_type *cttype; + bool ret = false; + + if (strncmp(name, CODETAG_SECTION_PREFIX, strlen(CODETAG_SECTION_PREFIX))) + return false; + + type_name = name + strlen(CODETAG_SECTION_PREFIX); + mutex_lock(&codetag_lock); + list_for_each_entry(cttype, &codetag_types, link) { + if (strcmp(type_name, cttype->desc.section) == 0) { + if (!cttype->desc.needs_section_mem) + break; + + down_write(&cttype->mod_lock); + ret = cttype->desc.needs_section_mem(mod, size); + up_write(&cttype->mod_lock); + break; + } + } + mutex_unlock(&codetag_lock); + + return ret; +} + +void *codetag_alloc_module_section(struct module *mod, const char *name, + unsigned long size, unsigned int prepend, + unsigned long align) +{ + const char *type_name = name + strlen(CODETAG_SECTION_PREFIX); + struct codetag_type *cttype; + void *ret = ERR_PTR(-EINVAL); + + mutex_lock(&codetag_lock); + list_for_each_entry(cttype, &codetag_types, link) { + if (strcmp(type_name, cttype->desc.section) == 0) { + if (WARN_ON(!cttype->desc.alloc_section_mem)) + break; + + down_write(&cttype->mod_lock); + ret = cttype->desc.alloc_section_mem(mod, size, prepend, align); + up_write(&cttype->mod_lock); + break; + } + } + mutex_unlock(&codetag_lock); + + return ret; +} + +void codetag_free_module_sections(struct module *mod) +{ + struct codetag_type *cttype; + + mutex_lock(&codetag_lock); + list_for_each_entry(cttype, &codetag_types, link) { + if (!cttype->desc.free_section_mem) + continue; + + down_write(&cttype->mod_lock); + cttype->desc.free_section_mem(mod, false); + up_write(&cttype->mod_lock); + } + mutex_unlock(&codetag_lock); +} + +void codetag_module_replaced(struct module *mod, struct module *new_mod) +{ + struct codetag_type *cttype; + + mutex_lock(&codetag_lock); + list_for_each_entry(cttype, &codetag_types, link) { + if (!cttype->desc.module_replaced) + continue; + + down_write(&cttype->mod_lock); + cttype->desc.module_replaced(mod, new_mod); + up_write(&cttype->mod_lock); + } + mutex_unlock(&codetag_lock); +} + void codetag_load_module(struct module *mod) { struct codetag_type *cttype; @@ -220,13 +308,12 @@ void codetag_load_module(struct module *mod) mutex_unlock(&codetag_lock); } -bool codetag_unload_module(struct module *mod) +void codetag_unload_module(struct module *mod) { struct codetag_type *cttype; - bool unload_ok = true; if (!mod) - return true; + return; /* await any module's kfree_rcu() operations to complete */ kvfree_rcu_barrier(); @@ -246,18 +333,17 @@ bool codetag_unload_module(struct module *mod) } if (found) { if (cttype->desc.module_unload) - if (!cttype->desc.module_unload(cttype, cmod)) - unload_ok = false; + cttype->desc.module_unload(cttype, cmod); cttype->count -= range_size(cttype, &cmod->range); idr_remove(&cttype->mod_idr, mod_id); kfree(cmod); } up_write(&cttype->mod_lock); + if (found && cttype->desc.free_section_mem) + cttype->desc.free_section_mem(mod, true); } mutex_unlock(&codetag_lock); - - return unload_ok; } #endif /* CONFIG_MODULES */ diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 3619301dda2e..38aa8abf8eb8 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -64,6 +64,21 @@ #define CREATE_TRACE_POINTS #include <trace/events/maple_tree.h> +/* + * Kernel pointer hashing renders much of the maple tree dump useless as tagged + * pointers get hashed to arbitrary values. + * + * If CONFIG_DEBUG_VM_MAPLE_TREE is set we are in a debug mode where it is + * permissible to bypass this. Otherwise remain cautious and retain the hashing. + * + * Userland doesn't know about %px so also use %p there. + */ +#if defined(__KERNEL__) && defined(CONFIG_DEBUG_VM_MAPLE_TREE) +#define PTR_FMT "%px" +#else +#define PTR_FMT "%p" +#endif + #define MA_ROOT_PARENT 1 /* @@ -120,7 +135,6 @@ static const unsigned char mt_min_slots[] = { #define MAPLE_BIG_NODE_GAPS (MAPLE_ARANGE64_SLOTS * 2 + 1) struct maple_big_node { - struct maple_pnode *parent; unsigned long pivot[MAPLE_BIG_NODE_SLOTS - 1]; union { struct maple_enode *slot[MAPLE_BIG_NODE_SLOTS]; @@ -1193,19 +1207,17 @@ static inline void mas_push_node(struct ma_state *mas, struct maple_node *used) reuse->request_count = 0; reuse->node_count = 0; - if (count && (head->node_count < MAPLE_ALLOC_SLOTS)) { - head->slot[head->node_count++] = reuse; - head->total++; - goto done; - } - - reuse->total = 1; - if ((head) && !((unsigned long)head & 0x1)) { + if (count) { + if (head->node_count < MAPLE_ALLOC_SLOTS) { + head->slot[head->node_count++] = reuse; + head->total++; + goto done; + } reuse->slot[0] = head; reuse->node_count = 1; - reuse->total += head->total; } + reuse->total = count + 1; mas->alloc = reuse; done: if (requested > 1) @@ -1251,11 +1263,11 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) mas->alloc = node; node->total = ++allocated; + node->request_count = 0; requested--; } node = mas->alloc; - node->request_count = 0; while (requested) { max_req = MAPLE_ALLOC_SLOTS - node->node_count; slots = (void **)&node->slot[node->node_count]; @@ -1271,7 +1283,10 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) node->node_count += count; allocated += count; - node = node->slot[0]; + /* find a non-full node*/ + do { + node = node->slot[0]; + } while (unlikely(node->node_count == MAPLE_ALLOC_SLOTS)); requested -= count; } mas->alloc->total = allocated; @@ -1280,10 +1295,9 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) nomem_bulk: /* Clean up potential freed allocations on bulk failure */ memset(slots, 0, max_req * sizeof(unsigned long)); + mas->alloc->total = allocated; nomem_one: mas_set_alloc_req(mas, requested); - if (mas->alloc && !(((unsigned long)mas->alloc & 0x1))) - mas->alloc->total = allocated; mas_set_err(mas, -ENOMEM); } @@ -1943,14 +1957,13 @@ static inline void mas_mab_cp(struct ma_state *mas, unsigned char mas_start, for (; i < piv_end; i++, j++) { b_node->pivot[j] = pivots[i]; if (unlikely(!b_node->pivot[j])) - break; + goto complete; if (unlikely(mas->max == b_node->pivot[j])) goto complete; } - if (likely(i <= mas_end)) - b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt); + b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt); complete: b_node->b_end = ++j; @@ -2139,9 +2152,7 @@ static inline bool mas_prev_sibling(struct ma_state *mas) { unsigned int p_slot = mte_parent_slot(mas->node); - if (mte_is_root(mas->node)) - return false; - + /* For root node, p_slot is set to 0 by mte_parent_slot(). */ if (!p_slot) return false; @@ -3159,10 +3170,7 @@ static inline void mast_fill_bnode(struct maple_subtree_state *mast, bool cp = true; unsigned char split; - memset(mast->bn->gap, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->gap)); - memset(mast->bn->slot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->slot)); - memset(mast->bn->pivot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->pivot)); - mast->bn->b_end = 0; + memset(mast->bn, 0, sizeof(struct maple_big_node)); if (mte_is_root(mas->node)) { cp = false; @@ -3889,7 +3897,8 @@ static inline void mas_wr_slot_store(struct ma_wr_state *wr_mas) wr_mas->pivots[offset] = mas->index - 1; mas->offset++; /* Keep mas accurate. */ } - } else if (!mt_in_rcu(mas->tree)) { + } else { + WARN_ON_ONCE(mt_in_rcu(mas->tree)); /* * Expand the range, only partially overwriting the previous and * next ranges @@ -3899,8 +3908,6 @@ static inline void mas_wr_slot_store(struct ma_wr_state *wr_mas) wr_mas->pivots[offset] = mas->index - 1; wr_mas->pivots[offset + 1] = mas->last; mas->offset++; /* Keep mas accurate. */ - } else { - return; } trace_ma_write(__func__, mas, 0, wr_mas->entry); @@ -4181,75 +4188,53 @@ static inline int mas_prealloc_calc(struct ma_state *mas, void *entry) } /* - * mas_wr_store_type() - Set the store type for a given + * mas_wr_store_type() - Determine the store type for a given * store operation. * @wr_mas: The maple write state + * + * Return: the type of store needed for the operation */ -static inline void mas_wr_store_type(struct ma_wr_state *wr_mas) +static inline enum store_type mas_wr_store_type(struct ma_wr_state *wr_mas) { struct ma_state *mas = wr_mas->mas; unsigned char new_end; - if (unlikely(mas_is_none(mas) || mas_is_ptr(mas))) { - mas->store_type = wr_store_root; - return; - } + if (unlikely(mas_is_none(mas) || mas_is_ptr(mas))) + return wr_store_root; - if (unlikely(!mas_wr_walk(wr_mas))) { - mas->store_type = wr_spanning_store; - return; - } + if (unlikely(!mas_wr_walk(wr_mas))) + return wr_spanning_store; /* At this point, we are at the leaf node that needs to be altered. */ mas_wr_end_piv(wr_mas); if (!wr_mas->entry) mas_wr_extend_null(wr_mas); - new_end = mas_wr_new_end(wr_mas); - if ((wr_mas->r_min == mas->index) && (wr_mas->r_max == mas->last)) { - mas->store_type = wr_exact_fit; - return; - } + if ((wr_mas->r_min == mas->index) && (wr_mas->r_max == mas->last)) + return wr_exact_fit; - if (unlikely(!mas->index && mas->last == ULONG_MAX)) { - mas->store_type = wr_new_root; - return; - } + if (unlikely(!mas->index && mas->last == ULONG_MAX)) + return wr_new_root; + new_end = mas_wr_new_end(wr_mas); /* Potential spanning rebalance collapsing a node */ if (new_end < mt_min_slots[wr_mas->type]) { - if (!mte_is_root(mas->node) && !(mas->mas_flags & MA_STATE_BULK)) { - mas->store_type = wr_rebalance; - return; - } - mas->store_type = wr_node_store; - return; + if (!mte_is_root(mas->node) && !(mas->mas_flags & MA_STATE_BULK)) + return wr_rebalance; + return wr_node_store; } - if (new_end >= mt_slots[wr_mas->type]) { - mas->store_type = wr_split_store; - return; - } + if (new_end >= mt_slots[wr_mas->type]) + return wr_split_store; - if (!mt_in_rcu(mas->tree) && (mas->offset == mas->end)) { - mas->store_type = wr_append; - return; - } + if (!mt_in_rcu(mas->tree) && (mas->offset == mas->end)) + return wr_append; if ((new_end == mas->end) && (!mt_in_rcu(mas->tree) || - (wr_mas->offset_end - mas->offset == 1))) { - mas->store_type = wr_slot_store; - return; - } - - if (mte_is_root(mas->node) || (new_end >= mt_min_slots[wr_mas->type]) || - (mas->mas_flags & MA_STATE_BULK)) { - mas->store_type = wr_node_store; - return; - } + (wr_mas->offset_end - mas->offset == 1))) + return wr_slot_store; - mas->store_type = wr_invalid; - MAS_WARN_ON(mas, 1); + return wr_node_store; } /** @@ -4264,7 +4249,7 @@ static inline void mas_wr_preallocate(struct ma_wr_state *wr_mas, void *entry) int request; mas_wr_prealloc_setup(wr_mas); - mas_wr_store_type(wr_mas); + mas->store_type = mas_wr_store_type(wr_mas); request = mas_prealloc_calc(mas, entry); if (!request) return; @@ -5419,7 +5404,8 @@ void *mas_store(struct ma_state *mas, void *entry) trace_ma_write(__func__, mas, 0, entry); #ifdef CONFIG_DEBUG_MAPLE_TREE if (MAS_WARN_ON(mas, mas->index > mas->last)) - pr_err("Error %lX > %lX %p\n", mas->index, mas->last, entry); + pr_err("Error %lX > %lX " PTR_FMT "\n", mas->index, mas->last, + entry); if (mas->index > mas->last) { mas_set_err(mas, -EINVAL); @@ -5435,7 +5421,7 @@ void *mas_store(struct ma_state *mas, void *entry) * overwrite multiple entries within a self-balancing B-Tree. */ mas_wr_prealloc_setup(&wr_mas); - mas_wr_store_type(&wr_mas); + mas->store_type = mas_wr_store_type(&wr_mas); if (mas->mas_flags & MA_STATE_PREALLOC) { mas_wr_store_entry(&wr_mas); MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas)); @@ -5538,7 +5524,7 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) int request; mas_wr_prealloc_setup(&wr_mas); - mas_wr_store_type(&wr_mas); + mas->store_type = mas_wr_store_type(&wr_mas); request = mas_prealloc_calc(mas, entry); if (!request) return ret; @@ -7124,14 +7110,14 @@ static void mt_dump_entry(void *entry, unsigned long min, unsigned long max, mt_dump_range(min, max, depth, format); if (xa_is_value(entry)) - pr_cont("value %ld (0x%lx) [%p]\n", xa_to_value(entry), - xa_to_value(entry), entry); + pr_cont("value %ld (0x%lx) [" PTR_FMT "]\n", xa_to_value(entry), + xa_to_value(entry), entry); else if (xa_is_zero(entry)) pr_cont("zero (%ld)\n", xa_to_internal(entry)); else if (mt_is_reserved(entry)) - pr_cont("UNKNOWN ENTRY (%p)\n", entry); + pr_cont("UNKNOWN ENTRY (" PTR_FMT ")\n", entry); else - pr_cont("%p\n", entry); + pr_cont(PTR_FMT "\n", entry); } static void mt_dump_range64(const struct maple_tree *mt, void *entry, @@ -7147,13 +7133,13 @@ static void mt_dump_range64(const struct maple_tree *mt, void *entry, for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++) { switch(format) { case mt_dump_hex: - pr_cont("%p %lX ", node->slot[i], node->pivot[i]); + pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]); break; case mt_dump_dec: - pr_cont("%p %lu ", node->slot[i], node->pivot[i]); + pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]); } } - pr_cont("%p\n", node->slot[i]); + pr_cont(PTR_FMT "\n", node->slot[i]); for (i = 0; i < MAPLE_RANGE64_SLOTS; i++) { unsigned long last = max; @@ -7175,11 +7161,11 @@ static void mt_dump_range64(const struct maple_tree *mt, void *entry, if (last > max) { switch(format) { case mt_dump_hex: - pr_err("node %p last (%lx) > max (%lx) at pivot %d!\n", + pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n", node, last, max, i); break; case mt_dump_dec: - pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n", + pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n", node, last, max, i); } } @@ -7209,13 +7195,13 @@ static void mt_dump_arange64(const struct maple_tree *mt, void *entry, for (i = 0; i < MAPLE_ARANGE64_SLOTS - 1; i++) { switch (format) { case mt_dump_hex: - pr_cont("%p %lX ", node->slot[i], node->pivot[i]); + pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]); break; case mt_dump_dec: - pr_cont("%p %lu ", node->slot[i], node->pivot[i]); + pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]); } } - pr_cont("%p\n", node->slot[i]); + pr_cont(PTR_FMT "\n", node->slot[i]); for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) { unsigned long last = max; @@ -7234,11 +7220,11 @@ static void mt_dump_arange64(const struct maple_tree *mt, void *entry, if (last > max) { switch(format) { case mt_dump_hex: - pr_err("node %p last (%lx) > max (%lx) at pivot %d!\n", + pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n", node, last, max, i); break; case mt_dump_dec: - pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n", + pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n", node, last, max, i); } } @@ -7256,8 +7242,8 @@ static void mt_dump_node(const struct maple_tree *mt, void *entry, mt_dump_range(min, max, depth, format); - pr_cont("node %p depth %d type %d parent %p", node, depth, type, - node ? node->parent : NULL); + pr_cont("node " PTR_FMT " depth %d type %d parent " PTR_FMT, node, + depth, type, node ? node->parent : NULL); switch (type) { case maple_dense: pr_cont("\n"); @@ -7285,7 +7271,7 @@ void mt_dump(const struct maple_tree *mt, enum mt_dump_format format) { void *entry = rcu_dereference_check(mt->ma_root, mt_locked(mt)); - pr_info("maple_tree(%p) flags %X, height %u root %p\n", + pr_info("maple_tree(" PTR_FMT ") flags %X, height %u root " PTR_FMT "\n", mt, mt->ma_flags, mt_height(mt), entry); if (!xa_is_node(entry)) mt_dump_entry(entry, 0, 0, 0, format); @@ -7337,7 +7323,7 @@ static void mas_validate_gaps(struct ma_state *mas) MT_BUG_ON(mas->tree, !entry); if (gap > p_end - p_start + 1) { - pr_err("%p[%u] %lu >= %lu - %lu + 1 (%lu)\n", + pr_err(PTR_FMT "[%u] %lu >= %lu - %lu + 1 (%lu)\n", mas_mn(mas), i, gap, p_end, p_start, p_end - p_start + 1); MT_BUG_ON(mas->tree, gap > p_end - p_start + 1); @@ -7357,19 +7343,19 @@ counted: MT_BUG_ON(mas->tree, !gaps); offset = ma_meta_gap(node); if (offset > i) { - pr_err("gap offset %p[%u] is invalid\n", node, offset); + pr_err("gap offset " PTR_FMT "[%u] is invalid\n", node, offset); MT_BUG_ON(mas->tree, 1); } if (gaps[offset] != max_gap) { - pr_err("gap %p[%u] is not the largest gap %lu\n", + pr_err("gap " PTR_FMT "[%u] is not the largest gap %lu\n", node, offset, max_gap); MT_BUG_ON(mas->tree, 1); } for (i++ ; i < mt_slot_count(mte); i++) { if (gaps[i] != 0) { - pr_err("gap %p[%u] beyond node limit != 0\n", + pr_err("gap " PTR_FMT "[%u] beyond node limit != 0\n", node, i); MT_BUG_ON(mas->tree, 1); } @@ -7383,7 +7369,7 @@ counted: p_mn = mte_parent(mte); MT_BUG_ON(mas->tree, max_gap > mas->max); if (ma_gaps(p_mn, mas_parent_type(mas, mte))[p_slot] != max_gap) { - pr_err("gap %p[%u] != %lu\n", p_mn, p_slot, max_gap); + pr_err("gap " PTR_FMT "[%u] != %lu\n", p_mn, p_slot, max_gap); mt_dump(mas->tree, mt_dump_hex); MT_BUG_ON(mas->tree, 1); } @@ -7413,11 +7399,11 @@ static void mas_validate_parent_slot(struct ma_state *mas) node = mas_slot(mas, slots, i); if (i == p_slot) { if (node != mas->node) - pr_err("parent %p[%u] does not have %p\n", + pr_err("parent " PTR_FMT "[%u] does not have " PTR_FMT "\n", parent, i, mas_mn(mas)); MT_BUG_ON(mas->tree, node != mas->node); } else if (node == mas->node) { - pr_err("Invalid child %p at parent %p[%u] p_slot %u\n", + pr_err("Invalid child " PTR_FMT " at parent " PTR_FMT "[%u] p_slot %u\n", mas_mn(mas), parent, i, p_slot); MT_BUG_ON(mas->tree, node == mas->node); } @@ -7439,20 +7425,20 @@ static void mas_validate_child_slot(struct ma_state *mas) child = mas_slot(mas, slots, i); if (!child) { - pr_err("Non-leaf node lacks child at %p[%u]\n", + pr_err("Non-leaf node lacks child at " PTR_FMT "[%u]\n", mas_mn(mas), i); MT_BUG_ON(mas->tree, 1); } if (mte_parent_slot(child) != i) { - pr_err("Slot error at %p[%u]: child %p has pslot %u\n", + pr_err("Slot error at " PTR_FMT "[%u]: child " PTR_FMT " has pslot %u\n", mas_mn(mas), i, mte_to_node(child), mte_parent_slot(child)); MT_BUG_ON(mas->tree, 1); } if (mte_parent(child) != mte_to_node(mas->node)) { - pr_err("child %p has parent %p not %p\n", + pr_err("child " PTR_FMT " has parent " PTR_FMT " not " PTR_FMT "\n", mte_to_node(child), mte_parent(child), mte_to_node(mas->node)); MT_BUG_ON(mas->tree, 1); @@ -7482,24 +7468,24 @@ static void mas_validate_limits(struct ma_state *mas) piv = mas_safe_pivot(mas, pivots, i, type); if (!piv && (i != 0)) { - pr_err("Missing node limit pivot at %p[%u]", + pr_err("Missing node limit pivot at " PTR_FMT "[%u]", mas_mn(mas), i); MAS_WARN_ON(mas, 1); } if (prev_piv > piv) { - pr_err("%p[%u] piv %lu < prev_piv %lu\n", + pr_err(PTR_FMT "[%u] piv %lu < prev_piv %lu\n", mas_mn(mas), i, piv, prev_piv); MAS_WARN_ON(mas, piv < prev_piv); } if (piv < mas->min) { - pr_err("%p[%u] %lu < %lu\n", mas_mn(mas), i, + pr_err(PTR_FMT "[%u] %lu < %lu\n", mas_mn(mas), i, piv, mas->min); MAS_WARN_ON(mas, piv < mas->min); } if (piv > mas->max) { - pr_err("%p[%u] %lu > %lu\n", mas_mn(mas), i, + pr_err(PTR_FMT "[%u] %lu > %lu\n", mas_mn(mas), i, piv, mas->max); MAS_WARN_ON(mas, piv > mas->max); } @@ -7509,7 +7495,7 @@ static void mas_validate_limits(struct ma_state *mas) } if (mas_data_end(mas) != i) { - pr_err("node%p: data_end %u != the last slot offset %u\n", + pr_err("node" PTR_FMT ": data_end %u != the last slot offset %u\n", mas_mn(mas), mas_data_end(mas), i); MT_BUG_ON(mas->tree, 1); } @@ -7518,8 +7504,8 @@ static void mas_validate_limits(struct ma_state *mas) void *entry = mas_slot(mas, slots, i); if (entry && (i != mt_slots[type] - 1)) { - pr_err("%p[%u] should not have entry %p\n", mas_mn(mas), - i, entry); + pr_err(PTR_FMT "[%u] should not have entry " PTR_FMT "\n", + mas_mn(mas), i, entry); MT_BUG_ON(mas->tree, entry != NULL); } @@ -7529,7 +7515,7 @@ static void mas_validate_limits(struct ma_state *mas) if (!piv) continue; - pr_err("%p[%u] should not have piv %lu\n", + pr_err(PTR_FMT "[%u] should not have piv %lu\n", mas_mn(mas), i, piv); MAS_WARN_ON(mas, i < mt_pivots[type] - 1); } @@ -7554,7 +7540,7 @@ static void mt_validate_nulls(struct maple_tree *mt) do { entry = mas_slot(&mas, slots, offset); if (!last && !entry) { - pr_err("Sequential nulls end at %p[%u]\n", + pr_err("Sequential nulls end at " PTR_FMT "[%u]\n", mas_mn(&mas), offset); } MT_BUG_ON(mt, !last && !entry); @@ -7596,7 +7582,8 @@ void mt_validate(struct maple_tree *mt) end = mas_data_end(&mas); if (MAS_WARN_ON(&mas, (end < mt_min_slot_count(mas.node)) && (mas.max != ULONG_MAX))) { - pr_err("Invalid size %u of %p\n", end, mas_mn(&mas)); + pr_err("Invalid size %u of " PTR_FMT "\n", + end, mas_mn(&mas)); } mas_validate_parent_slot(&mas); @@ -7612,7 +7599,8 @@ EXPORT_SYMBOL_GPL(mt_validate); void mas_dump(const struct ma_state *mas) { - pr_err("MAS: tree=%p enode=%p ", mas->tree, mas->node); + pr_err("MAS: tree=" PTR_FMT " enode=" PTR_FMT " ", + mas->tree, mas->node); switch (mas->status) { case ma_active: pr_err("(ma_active)"); @@ -7676,7 +7664,7 @@ void mas_dump(const struct ma_state *mas) pr_err("[%u/%u] index=%lx last=%lx\n", mas->offset, mas->end, mas->index, mas->last); - pr_err(" min=%lx max=%lx alloc=%p, depth=%u, flags=%x\n", + pr_err(" min=%lx max=%lx alloc=" PTR_FMT ", depth=%u, flags=%x\n", mas->min, mas->max, mas->alloc, mas->depth, mas->mas_flags); if (mas->index > mas->last) pr_err("Check index & last\n"); @@ -7685,7 +7673,7 @@ EXPORT_SYMBOL_GPL(mas_dump); void mas_wr_dump(const struct ma_wr_state *wr_mas) { - pr_err("WR_MAS: node=%p r_min=%lx r_max=%lx\n", + pr_err("WR_MAS: node=" PTR_FMT " r_min=%lx r_max=%lx\n", wr_mas->node, wr_mas->r_min, wr_mas->r_max); pr_err(" type=%u off_end=%u, node_end=%u, end_piv=%lx\n", wr_mas->type, wr_mas->offset_end, wr_mas->mas->end, diff --git a/lib/percpu_test.c b/lib/percpu_test.c index 4a3d70bbc1a0..ce7124b16dab 100644 --- a/lib/percpu_test.c +++ b/lib/percpu_test.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only +#include <linux/limits.h> #include <linux/module.h> /* validate @native and @pcp counter values match @expected */ @@ -24,8 +25,9 @@ static int __init percpu_test_init(void) * +ul_one/-ul_one below would replace with inc/dec instructions. */ volatile unsigned int ui_one = 1; - long l = 0; + unsigned long long ull = 0; unsigned long ul = 0; + long l = 0; pr_info("percpu test start\n"); @@ -112,6 +114,13 @@ static int __init percpu_test_init(void) CHECK(ul, ulong_counter, -1); CHECK(ul, ulong_counter, ULONG_MAX); + ul = ull = 0; + __this_cpu_write(ulong_counter, 0); + + ul = ull += UINT_MAX; + __this_cpu_add(ulong_counter, ull); + CHECK(ul, ulong_counter, UINT_MAX); + ul = 3; __this_cpu_write(ulong_counter, 3); |