From dd3b8353bae79395b12a178de057b183ff0122eb Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Mon, 23 Sep 2019 15:36:36 -0700 Subject: mm/vmalloc: do not keep unpurged areas in the busy tree The busy tree can be quite big, even though the area is freed or unmapped it still stays there until "purge" logic removes it. 1) Optimize and reduce the size of "busy" tree by removing a node from it right away as soon as user triggers free paths. It is possible to do so, because the allocation is done using another augmented tree. The vmalloc test driver shows the difference, for example the "fix_size_alloc_test" is ~11% better comparing with default configuration: sudo ./test_vmalloc.sh performance Summary: fix_size_alloc_test loops: 1000000 avg: 993985 usec Summary: full_fit_alloc_test loops: 1000000 avg: 973554 usec Summary: long_busy_list_alloc_test loops: 1000000 avg: 12617652 usec Summary: fix_size_alloc_test loops: 1000000 avg: 882263 usec Summary: full_fit_alloc_test loops: 1000000 avg: 973407 usec Summary: long_busy_list_alloc_test loops: 1000000 avg: 12593929 usec 2) Since the busy tree now contains allocated areas only and does not interfere with lazily free nodes, introduce the new function show_purge_info() that dumps "unpurged" areas that is propagated through "/proc/vmallocinfo". 3) Eliminate VM_LAZY_FREE flag. Link: http://lkml.kernel.org/r/20190716152656.12255-2-lpf.vector@gmail.com Signed-off-by: Uladzislau Rezki (Sony) Signed-off-by: Pengfei Li Cc: Roman Gushchin Cc: Uladzislau Rezki Cc: Hillf Danton Cc: Michal Hocko Cc: Matthew Wilcox Cc: Oleksiy Avramchenko Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 52 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 8 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index c1246d77cf75..d535ef125bda 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -329,7 +329,6 @@ EXPORT_SYMBOL(vmalloc_to_pfn); #define DEBUG_AUGMENT_PROPAGATE_CHECK 0 #define DEBUG_AUGMENT_LOWEST_MATCH_CHECK 0 -#define VM_LAZY_FREE 0x02 #define VM_VM_AREA 0x04 static DEFINE_SPINLOCK(vmap_area_lock); @@ -1282,7 +1281,14 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end) llist_for_each_entry_safe(va, n_va, valist, purge_list) { unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT; - __free_vmap_area(va); + /* + * Finally insert or merge lazily-freed area. It is + * detached and there is no need to "unlink" it from + * anything. + */ + merge_or_add_vmap_area(va, + &free_vmap_area_root, &free_vmap_area_list); + atomic_long_sub(nr, &vmap_lazy_nr); if (atomic_long_read(&vmap_lazy_nr) < resched_threshold) @@ -1324,6 +1330,10 @@ static void free_vmap_area_noflush(struct vmap_area *va) { unsigned long nr_lazy; + spin_lock(&vmap_area_lock); + unlink_va(va, &vmap_area_root); + spin_unlock(&vmap_area_lock); + nr_lazy = atomic_long_add_return((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr); @@ -2143,14 +2153,13 @@ struct vm_struct *remove_vm_area(const void *addr) might_sleep(); - va = find_vmap_area((unsigned long)addr); + spin_lock(&vmap_area_lock); + va = __find_vmap_area((unsigned long)addr); if (va && va->flags & VM_VM_AREA) { struct vm_struct *vm = va->vm; - spin_lock(&vmap_area_lock); va->vm = NULL; va->flags &= ~VM_VM_AREA; - va->flags |= VM_LAZY_FREE; spin_unlock(&vmap_area_lock); kasan_free_shadow(vm); @@ -2158,6 +2167,8 @@ struct vm_struct *remove_vm_area(const void *addr) return vm; } + + spin_unlock(&vmap_area_lock); return NULL; } @@ -3450,6 +3461,22 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v) } } +static void show_purge_info(struct seq_file *m) +{ + struct llist_node *head; + struct vmap_area *va; + + head = READ_ONCE(vmap_purge_list.first); + if (head == NULL) + return; + + llist_for_each_entry(va, head, purge_list) { + seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area\n", + (void *)va->va_start, (void *)va->va_end, + va->va_end - va->va_start); + } +} + static int s_show(struct seq_file *m, void *p) { struct vmap_area *va; @@ -3462,10 +3489,9 @@ static int s_show(struct seq_file *m, void *p) * behalf of vmap area is being tear down or vm_map_ram allocation. */ if (!(va->flags & VM_VM_AREA)) { - seq_printf(m, "0x%pK-0x%pK %7ld %s\n", + seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n", (void *)va->va_start, (void *)va->va_end, - va->va_end - va->va_start, - va->flags & VM_LAZY_FREE ? "unpurged vm_area" : "vm_map_ram"); + va->va_end - va->va_start); return 0; } @@ -3504,6 +3530,16 @@ static int s_show(struct seq_file *m, void *p) show_numa_info(m, v); seq_putc(m, '\n'); + + /* + * As a final step, dump "unpurged" areas. Note, + * that entire "/proc/vmallocinfo" output will not + * be address sorted, because the purge list is not + * sorted. + */ + if (list_is_last(&va->list, &vmap_area_list)) + show_purge_info(m); + return 0; } -- cgit v1.2.3-70-g09d2 From 688fcbfc06e4fdfbb7e1d5a942a1460fe6379d2d Mon Sep 17 00:00:00 2001 From: Pengfei Li Date: Mon, 23 Sep 2019 15:36:39 -0700 Subject: mm/vmalloc: modify struct vmap_area to reduce its size Objective --------- The current implementation of struct vmap_area wasted space. After applying this commit, sizeof(struct vmap_area) has been reduced from 11 words to 8 words. Description ----------- 1) Pack "subtree_max_size", "vm" and "purge_list". This is no problem because A) "subtree_max_size" is only used when vmap_area is in "free" tree B) "vm" is only used when vmap_area is in "busy" tree C) "purge_list" is only used when vmap_area is in vmap_purge_list 2) Eliminate "flags". ;Since only one flag VM_VM_AREA is being used, and the same thing can be done by judging whether "vm" is NULL, then the "flags" can be eliminated. Link: http://lkml.kernel.org/r/20190716152656.12255-3-lpf.vector@gmail.com Signed-off-by: Pengfei Li Suggested-by: Uladzislau Rezki (Sony) Reviewed-by: Uladzislau Rezki (Sony) Cc: Hillf Danton Cc: Matthew Wilcox Cc: Michal Hocko Cc: Oleksiy Avramchenko Cc: Roman Gushchin Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 20 +++++++++++++------- mm/vmalloc.c | 24 ++++++++++-------------- 2 files changed, 23 insertions(+), 21 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index dfa718ffdd4f..4e7809408073 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -53,15 +53,21 @@ struct vmap_area { unsigned long va_start; unsigned long va_end; - /* - * Largest available free size in subtree. - */ - unsigned long subtree_max_size; - unsigned long flags; struct rb_node rb_node; /* address sorted rbtree */ struct list_head list; /* address sorted list */ - struct llist_node purge_list; /* "lazy purge" list */ - struct vm_struct *vm; + + /* + * The following three variables can be packed, because + * a vmap_area object is always one of the three states: + * 1) in "free" tree (root is vmap_area_root) + * 2) in "busy" tree (root is free_vmap_area_root) + * 3) in purge list (head is vmap_purge_list) + */ + union { + unsigned long subtree_max_size; /* in "free" tree */ + struct vm_struct *vm; /* in "busy" tree */ + struct llist_node purge_list; /* in purge list */ + }; }; /* diff --git a/mm/vmalloc.c b/mm/vmalloc.c index d535ef125bda..f095843fc243 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -329,7 +329,6 @@ EXPORT_SYMBOL(vmalloc_to_pfn); #define DEBUG_AUGMENT_PROPAGATE_CHECK 0 #define DEBUG_AUGMENT_LOWEST_MATCH_CHECK 0 -#define VM_VM_AREA 0x04 static DEFINE_SPINLOCK(vmap_area_lock); /* Export for kexec only */ @@ -1115,7 +1114,7 @@ retry: va->va_start = addr; va->va_end = addr + size; - va->flags = 0; + va->vm = NULL; insert_vmap_area(va, &vmap_area_root, &vmap_area_list); spin_unlock(&vmap_area_lock); @@ -1928,7 +1927,6 @@ void __init vmalloc_init(void) if (WARN_ON_ONCE(!va)) continue; - va->flags = VM_VM_AREA; va->va_start = (unsigned long)tmp->addr; va->va_end = va->va_start + tmp->size; va->vm = tmp; @@ -2026,7 +2024,6 @@ static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, vm->size = va->va_end - va->va_start; vm->caller = caller; va->vm = vm; - va->flags |= VM_VM_AREA; spin_unlock(&vmap_area_lock); } @@ -2131,10 +2128,10 @@ struct vm_struct *find_vm_area(const void *addr) struct vmap_area *va; va = find_vmap_area((unsigned long)addr); - if (va && va->flags & VM_VM_AREA) - return va->vm; + if (!va) + return NULL; - return NULL; + return va->vm; } /** @@ -2155,11 +2152,10 @@ struct vm_struct *remove_vm_area(const void *addr) spin_lock(&vmap_area_lock); va = __find_vmap_area((unsigned long)addr); - if (va && va->flags & VM_VM_AREA) { + if (va && va->vm) { struct vm_struct *vm = va->vm; va->vm = NULL; - va->flags &= ~VM_VM_AREA; spin_unlock(&vmap_area_lock); kasan_free_shadow(vm); @@ -2862,7 +2858,7 @@ long vread(char *buf, char *addr, unsigned long count) if (!count) break; - if (!(va->flags & VM_VM_AREA)) + if (!va->vm) continue; vm = va->vm; @@ -2942,7 +2938,7 @@ long vwrite(char *buf, char *addr, unsigned long count) if (!count) break; - if (!(va->flags & VM_VM_AREA)) + if (!va->vm) continue; vm = va->vm; @@ -3485,10 +3481,10 @@ static int s_show(struct seq_file *m, void *p) va = list_entry(p, struct vmap_area, list); /* - * s_show can encounter race with remove_vm_area, !VM_VM_AREA on - * behalf of vmap area is being tear down or vm_map_ram allocation. + * s_show can encounter race with remove_vm_area, !vm on behalf + * of vmap area is being tear down or vm_map_ram allocation. */ - if (!(va->flags & VM_VM_AREA)) { + if (!va->vm) { seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n", (void *)va->va_start, (void *)va->va_end, va->va_end - va->va_start); -- cgit v1.2.3-70-g09d2 From 7ea362427c170061b8822dd41bafaa72b3bcb9ad Mon Sep 17 00:00:00 2001 From: Austin Kim Date: Mon, 23 Sep 2019 15:36:42 -0700 Subject: mm/vmalloc.c: move 'area->pages' after if statement If !area->pages statement is true where memory allocation fails, area is freed. In this case 'area->pages = pages' should not executed. So move 'area->pages = pages' after if statement. [akpm@linux-foundation.org: give area->pages the same treatment] Link: http://lkml.kernel.org/r/20190830035716.GA190684@LGEARND20B15 Signed-off-by: Austin Kim Acked-by: Michal Hocko Reviewed-by: Andrew Morton Cc: Uladzislau Rezki (Sony) Cc: Roman Gushchin Cc: Roman Penyaev Cc: Rick Edgecombe Cc: Mike Rapoport Cc: Andrey Ryabinin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index f095843fc243..fcadd3e25c0c 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2409,7 +2409,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, nr_pages = get_vm_area_size(area) >> PAGE_SHIFT; array_size = (nr_pages * sizeof(struct page *)); - area->nr_pages = nr_pages; /* Please note that the recursion is strictly bounded. */ if (array_size > PAGE_SIZE) { pages = __vmalloc_node(array_size, 1, nested_gfp|highmem_mask, @@ -2417,13 +2416,16 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, } else { pages = kmalloc_node(array_size, nested_gfp, node); } - area->pages = pages; - if (!area->pages) { + + if (!pages) { remove_vm_area(area->addr); kfree(area); return NULL; } + area->pages = pages; + area->nr_pages = nr_pages; + for (i = 0; i < area->nr_pages; i++) { struct page *page; -- cgit v1.2.3-70-g09d2