From 4b90951c0bd8ca6695837354a253794192f6dfd5 Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Tue, 12 Nov 2013 15:07:11 -0800 Subject: mm/vmalloc: use NUMA_NO_NODE Use more appropriate "if (node == NUMA_NO_NODE)" instead of "if (node < 0)" Signed-off-by: Jianguo Wu Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 107454312d5e..dea15e6bfc8d 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1577,7 +1577,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, struct page *page; gfp_t tmp_mask = gfp_mask | __GFP_NOWARN; - if (node < 0) + if (node == NUMA_NO_NODE) page = alloc_page(tmp_mask); else page = alloc_pages_node(node, tmp_mask, order); -- cgit v1.2.3-70-g09d2 From 3722e13cff361035583f6ecfa784437b824fe659 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 12 Nov 2013 15:07:29 -0800 Subject: mm/vmalloc: don't set area->caller twice The caller address has already been set in set_vmalloc_vm(), there's no need to set it again in __vmalloc_area_node. Reviewed-by: Zhang Yanfei Signed-off-by: Wanpeng Li Cc: Joonsoo Kim Cc: KOSAKI Motohiro Cc: Mitsuo Hayasaka Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index dea15e6bfc8d..285f0e7d28e7 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1546,7 +1546,7 @@ static void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, pgprot_t prot, int node, const void *caller); static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, - pgprot_t prot, int node, const void *caller) + pgprot_t prot, int node) { const int order = 0; struct page **pages; @@ -1560,13 +1560,12 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, /* Please note that the recursion is strictly bounded. */ if (array_size > PAGE_SIZE) { pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM, - PAGE_KERNEL, node, caller); + PAGE_KERNEL, node, area->caller); area->flags |= VM_VPAGES; } else { pages = kmalloc_node(array_size, nested_gfp, node); } area->pages = pages; - area->caller = caller; if (!area->pages) { remove_vm_area(area->addr); kfree(area); @@ -1634,7 +1633,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, if (!area) goto fail; - addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller); + addr = __vmalloc_area_node(area, gfp_mask, prot, node); if (!addr) goto fail; -- cgit v1.2.3-70-g09d2 From c2ce8c142c43c360047e173d2018d94a4d0f7a59 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 12 Nov 2013 15:07:31 -0800 Subject: mm/vmalloc: fix show vmap_area information race with vmap_area tear down There is a race window between vmap_area tear down and show vmap_area information. A B remove_vm_area spin_lock(&vmap_area_lock); va->vm = NULL; va->flags &= ~VM_VM_AREA; spin_unlock(&vmap_area_lock); spin_lock(&vmap_area_lock); if (va->flags & (VM_LAZY_FREE | VM_LAZY_FREEZING)) return 0; if (!(va->flags & VM_VM_AREA)) { seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n", (void *)va->va_start, (void *)va->va_end, va->va_end - va->va_start); return 0; } free_unmap_vmap_area(va); flush_cache_vunmap free_unmap_vmap_area_noflush unmap_vmap_area free_vmap_area_noflush va->flags |= VM_LAZY_FREE The assumption !VM_VM_AREA represents vm_map_ram allocation is introduced by d4033afdf828 ("mm, vmalloc: iterate vmap_area_list, instead of vmlist, in vmallocinfo()"). However, !VM_VM_AREA also represents vmap_area is being tear down in race window mentioned above. This patch fix it by don't dump any information for !VM_VM_AREA case and also remove (VM_LAZY_FREE | VM_LAZY_FREEING) check since they are not possible for !VM_VM_AREA case. Suggested-by: Joonsoo Kim Acked-by: KOSAKI Motohiro Signed-off-by: Wanpeng Li Cc: Mitsuo Hayasaka Cc: Zhang Yanfei Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 285f0e7d28e7..814ce9122709 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2578,15 +2578,12 @@ static int s_show(struct seq_file *m, void *p) struct vmap_area *va = p; struct vm_struct *v; - if (va->flags & (VM_LAZY_FREE | VM_LAZY_FREEING)) - return 0; - - if (!(va->flags & VM_VM_AREA)) { - seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n", - (void *)va->va_start, (void *)va->va_end, - va->va_end - va->va_start); + /* + * s_show can encounter race with remove_vm_area, !VM_VM_AREA on + * behalf of vmap area is being tear down or vm_map_ram allocation. + */ + if (!(va->flags & VM_VM_AREA)) return 0; - } v = va->vm; -- cgit v1.2.3-70-g09d2 From af12346cdacda36f0c35c657088282b8ecd0df72 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 12 Nov 2013 15:07:32 -0800 Subject: mm/vmalloc: revert "mm/vmalloc.c: check VM_UNINITIALIZED flag in s_show instead of show_numa_info" The VM_UNINITIALIZED/VM_UNLIST flag introduced by f5252e009d5b ("mm: avoid null pointer access in vm_struct via /proc/vmallocinfo") is used to avoid accessing the pages field with unallocated page when show_numa_info() is called. This patch moves the check just before show_numa_info in order that some messages still can be dumped via /proc/vmallocinfo. This patch reverts commit d157a55815ff ("mm/vmalloc.c: check VM_UNINITIALIZED flag in s_show instead of show_numa_info"); Reviewed-by: Zhang Yanfei Signed-off-by: Wanpeng Li Cc: Mitsuo Hayasaka Cc: Joonsoo Kim Cc: KOSAKI Motohiro Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 814ce9122709..67535f87846c 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2562,6 +2562,11 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v) if (!counters) return; + /* Pair with smp_wmb() in clear_vm_uninitialized_flag() */ + smp_rmb(); + if (v->flags & VM_UNINITIALIZED) + return; + memset(counters, 0, nr_node_ids * sizeof(unsigned int)); for (nr = 0; nr < v->nr_pages; nr++) @@ -2587,11 +2592,6 @@ static int s_show(struct seq_file *m, void *p) v = va->vm; - /* Pair with smp_wmb() in clear_vm_uninitialized_flag() */ - smp_rmb(); - if (v->flags & VM_UNINITIALIZED) - return 0; - seq_printf(m, "0x%pK-0x%pK %7ld", v->addr, v->addr + v->size, v->size); -- cgit v1.2.3-70-g09d2 From b82225f3ff5be4c52cb588a4a53686db50aa6eb6 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 12 Nov 2013 15:07:33 -0800 Subject: revert mm/vmalloc.c: emit the failure message before return Don't warn twice in __vmalloc_area_node and __vmalloc_node_range if __vmalloc_area_node allocation failure. This patch reverts commit 46c001a2753f ("mm/vmalloc.c: emit the failure message before return"). Signed-off-by: Wanpeng Li Reviewed-by: Zhang Yanfei Cc: Joonsoo Kim Cc: KOSAKI Motohiro Cc: Mitsuo Hayasaka Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 67535f87846c..745fa9567475 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1635,7 +1635,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, addr = __vmalloc_area_node(area, gfp_mask, prot, node); if (!addr) - goto fail; + return NULL; /* * In this function, newly allocated vm_struct has VM_UNINITIALIZED -- cgit v1.2.3-70-g09d2 From 7f88f88f83ed609650a01b18572e605ea50cd163 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 12 Nov 2013 15:07:45 -0800 Subject: mm: kmemleak: avoid false negatives on vmalloc'ed objects Commit 248ac0e1943a ("mm/vmalloc: remove guard page from between vmap blocks") had the side effect of making vmap_area.va_end member point to the next vmap_area.va_start. This was creating an artificial reference to vmalloc'ed objects and kmemleak was rarely reporting vmalloc() leaks. This patch marks the vmap_area containing pointers explicitly and reduces the min ref_count to 2 as vm_struct still contains a reference to the vmalloc'ed object. The kmemleak add_scan_area() function has been improved to allow a SIZE_MAX argument covering the rest of the object (for simpler calling sites). Signed-off-by: Catalin Marinas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kmemleak.c | 4 +++- mm/vmalloc.c | 14 ++++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/mm/kmemleak.c b/mm/kmemleak.c index e126b0ef9ad2..31f01c5011e5 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -753,7 +753,9 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp) } spin_lock_irqsave(&object->lock, flags); - if (ptr + size > object->pointer + object->size) { + if (size == SIZE_MAX) { + size = object->pointer + object->size - ptr; + } else if (ptr + size > object->pointer + object->size) { kmemleak_warn("Scan area larger than object 0x%08lx\n", ptr); dump_object_info(object); kmem_cache_free(scan_area_cache, area); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 745fa9567475..0fdf96803c5b 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -359,6 +359,12 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, if (unlikely(!va)) return ERR_PTR(-ENOMEM); + /* + * Only scan the relevant parts containing pointers to other objects + * to avoid false negatives. + */ + kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask & GFP_RECLAIM_MASK); + retry: spin_lock(&vmap_area_lock); /* @@ -1645,11 +1651,11 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, clear_vm_uninitialized_flag(area); /* - * A ref_count = 3 is needed because the vm_struct and vmap_area - * structures allocated in the __get_vm_area_node() function contain - * references to the virtual address of the vmalloc'ed block. + * A ref_count = 2 is needed because vm_struct allocated in + * __get_vm_area_node() contains a reference to the virtual address of + * the vmalloc'ed block. */ - kmemleak_alloc(addr, real_size, 3, gfp_mask); + kmemleak_alloc(addr, real_size, 2, gfp_mask); return addr; -- cgit v1.2.3-70-g09d2