diff options
author | Liam R. Howlett <Liam.Howlett@Oracle.com> | 2024-08-30 00:00:54 -0400 |
---|---|---|
committer | Andrew Morton <akpm@linux-foundation.org> | 2024-09-03 21:15:51 -0700 |
commit | f8d112a4e657c65c888e6b8a8435ef61a66e4ab8 (patch) | |
tree | 29a13a59d9e65ededb27fac972068278dcd80e35 /mm/mmap.c | |
parent | 94f59ea591f17d5fb77f68e820b27522596a7e9e (diff) |
mm/mmap: avoid zeroing vma tree in mmap_region()
Instead of zeroing the vma tree and then overwriting the area, let the
area be overwritten and then clean up the gathered vmas using
vms_complete_munmap_vmas().
To ensure locking is downgraded correctly, the mm is set regardless of
MAP_FIXED or not (NULL vma).
If a driver is mapping over an existing vma, then clear the ptes before
the call_mmap() invocation. This is done using the vms_clean_up_area()
helper. If there is a close vm_ops, that must also be called to ensure
any cleanup is done before mapping over the area. This also means that
calling open has been added to the abort of an unmap operation, for now.
Since vm_ops->open() and vm_ops->close() are not always undo each other
(state cleanup may exist in ->close() that is lost forever), the code
cannot be left in this way, but that change has been isolated to another
commit to make this point very obvious for traceability.
Temporarily keep track of the number of pages that will be removed and
reduce the charged amount.
This also drops the validate_mm() call in the vma_expand() function. It
is necessary to drop the validate as it would fail since the mm map_count
would be incorrect during a vma expansion, prior to the cleanup from
vms_complete_munmap_vmas().
Clean up the error handing of the vms_gather_munmap_vmas() by calling the
verification within the function.
Link: https://lkml.kernel.org/r/20240830040101.822209-15-Liam.Howlett@oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Bert Karwatzki <spasswolf@web.de>
Cc: Jeff Xu <jeffxu@chromium.org>
Cc: Jiri Olsa <olsajiri@gmail.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm/mmap.c')
-rw-r--r-- | mm/mmap.c | 57 |
1 files changed, 27 insertions, 30 deletions
diff --git a/mm/mmap.c b/mm/mmap.c index 304dc085533a..405e0432c78e 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1373,23 +1373,19 @@ unsigned long mmap_region(struct file *file, unsigned long addr, unsigned long merge_start = addr, merge_end = end; bool writable_file_mapping = false; pgoff_t vm_pgoff; - int error; + int error = -ENOMEM; VMA_ITERATOR(vmi, mm, addr); + unsigned long nr_pages, nr_accounted; - /* Check against address space limit. */ - if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) { - unsigned long nr_pages; + nr_pages = count_vma_pages_range(mm, addr, end, &nr_accounted); - /* - * MAP_FIXED may remove pages of mappings that intersects with - * requested mapping. Account for the pages it would unmap. - */ - nr_pages = count_vma_pages_range(mm, addr, end); - - if (!may_expand_vm(mm, vm_flags, - (len >> PAGE_SHIFT) - nr_pages)) - return -ENOMEM; - } + /* + * Check against address space limit. + * MAP_FIXED may remove pages of mappings that intersects with requested + * mapping. Account for the pages it would unmap. + */ + if (!may_expand_vm(mm, vm_flags, (len >> PAGE_SHIFT) - nr_pages)) + return -ENOMEM; /* Find the first overlapping VMA */ vma = vma_find(&vmi, end); @@ -1403,13 +1399,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr, if (error) goto gather_failed; - /* Remove any existing mappings from the vma tree */ - error = vma_iter_clear_gfp(&vmi, addr, end, GFP_KERNEL); - if (error) - goto clear_tree_failed; - - /* Unmap any existing mapping in the area */ - vms_complete_munmap_vmas(&vms, &mas_detach); next = vms.next; prev = vms.prev; vma = NULL; @@ -1425,8 +1414,10 @@ unsigned long mmap_region(struct file *file, unsigned long addr, */ if (accountable_mapping(file, vm_flags)) { charged = len >> PAGE_SHIFT; + charged -= nr_accounted; if (security_vm_enough_memory_mm(mm, charged)) - return -ENOMEM; + goto abort_munmap; + vms.nr_accounted = 0; vm_flags |= VM_ACCOUNT; } @@ -1475,10 +1466,8 @@ cannot_expand: * not unmapped, but the maps are removed from the list. */ vma = vm_area_alloc(mm); - if (!vma) { - error = -ENOMEM; + if (!vma) goto unacct_error; - } vma_iter_config(&vmi, addr, end); vma_set_range(vma, addr, end, pgoff); @@ -1487,6 +1476,11 @@ cannot_expand: if (file) { vma->vm_file = get_file(file); + /* + * call_mmap() may map PTE, so ensure there are no existing PTEs + * call the vm_ops close function if one exists. + */ + vms_clean_up_area(&vms, &mas_detach, true); error = call_mmap(file, vma); if (error) goto unmap_and_free_vma; @@ -1577,6 +1571,9 @@ unmap_writable: expanded: perf_event_mmap(vma); + /* Unmap any existing mapping in the area */ + vms_complete_munmap_vmas(&vms, &mas_detach); + vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT); if (vm_flags & VM_LOCKED) { if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) || @@ -1605,7 +1602,7 @@ expanded: return addr; close_and_free_vma: - if (file && vma->vm_ops && vma->vm_ops->close) + if (file && !vms.closed_vm_ops && vma->vm_ops && vma->vm_ops->close) vma->vm_ops->close(vma); if (file || vma->vm_file) { @@ -1625,9 +1622,9 @@ unacct_error: if (charged) vm_unacct_memory(charged); -clear_tree_failed: - if (vms.vma_count) - abort_munmap_vmas(&mas_detach); +abort_munmap: + if (vms.nr_pages) + abort_munmap_vmas(&mas_detach, vms.closed_vm_ops); gather_failed: validate_mm(mm); return error; @@ -1960,7 +1957,7 @@ void exit_mmap(struct mm_struct *mm) do { if (vma->vm_flags & VM_ACCOUNT) nr_accounted += vma_pages(vma); - remove_vma(vma, true); + remove_vma(vma, /* unreachable = */ true, /* closed = */ false); count++; cond_resched(); vma = vma_next(&vmi); |