summaryrefslogtreecommitdiff
path: root/mm/vma.c
diff options
context:
space:
mode:
authorLiam R. Howlett <Liam.Howlett@Oracle.com>2024-08-30 00:00:54 -0400
committerAndrew Morton <akpm@linux-foundation.org>2024-09-03 21:15:51 -0700
commitf8d112a4e657c65c888e6b8a8435ef61a66e4ab8 (patch)
tree29a13a59d9e65ededb27fac972068278dcd80e35 /mm/vma.c
parent94f59ea591f17d5fb77f68e820b27522596a7e9e (diff)
mm/mmap: avoid zeroing vma tree in mmap_region()
Instead of zeroing the vma tree and then overwriting the area, let the area be overwritten and then clean up the gathered vmas using vms_complete_munmap_vmas(). To ensure locking is downgraded correctly, the mm is set regardless of MAP_FIXED or not (NULL vma). If a driver is mapping over an existing vma, then clear the ptes before the call_mmap() invocation. This is done using the vms_clean_up_area() helper. If there is a close vm_ops, that must also be called to ensure any cleanup is done before mapping over the area. This also means that calling open has been added to the abort of an unmap operation, for now. Since vm_ops->open() and vm_ops->close() are not always undo each other (state cleanup may exist in ->close() that is lost forever), the code cannot be left in this way, but that change has been isolated to another commit to make this point very obvious for traceability. Temporarily keep track of the number of pages that will be removed and reduce the charged amount. This also drops the validate_mm() call in the vma_expand() function. It is necessary to drop the validate as it would fail since the mm map_count would be incorrect during a vma expansion, prior to the cleanup from vms_complete_munmap_vmas(). Clean up the error handing of the vms_gather_munmap_vmas() by calling the verification within the function. Link: https://lkml.kernel.org/r/20240830040101.822209-15-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com> Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Cc: Bert Karwatzki <spasswolf@web.de> Cc: Jeff Xu <jeffxu@chromium.org> Cc: Jiri Olsa <olsajiri@gmail.com> Cc: Kees Cook <kees@kernel.org> Cc: Lorenzo Stoakes <lstoakes@gmail.com> Cc: Mark Brown <broonie@kernel.org> Cc: Matthew Wilcox <willy@infradead.org> Cc: "Paul E. McKenney" <paulmck@kernel.org> Cc: Paul Moore <paul@paul-moore.com> Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm/vma.c')
-rw-r--r--mm/vma.c54
1 files changed, 42 insertions, 12 deletions
diff --git a/mm/vma.c b/mm/vma.c
index 83c5c46c67b9..648c58da8ad4 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -136,10 +136,10 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
/*
* Close a vm structure and free it.
*/
-void remove_vma(struct vm_area_struct *vma, bool unreachable)
+void remove_vma(struct vm_area_struct *vma, bool unreachable, bool closed)
{
might_sleep();
- if (vma->vm_ops && vma->vm_ops->close)
+ if (!closed && vma->vm_ops && vma->vm_ops->close)
vma->vm_ops->close(vma);
if (vma->vm_file)
fput(vma->vm_file);
@@ -521,7 +521,6 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
vma_iter_store(vmi, vma);
vma_complete(&vp, vmi, vma->vm_mm);
- validate_mm(vma->vm_mm);
return 0;
nomem:
@@ -645,11 +644,14 @@ again:
uprobe_mmap(vp->insert);
}
-static void vms_complete_pte_clear(struct vma_munmap_struct *vms,
- struct ma_state *mas_detach, bool mm_wr_locked)
+static inline void vms_clear_ptes(struct vma_munmap_struct *vms,
+ struct ma_state *mas_detach, bool mm_wr_locked)
{
struct mmu_gather tlb;
+ if (!vms->clear_ptes) /* Nothing to do */
+ return;
+
/*
* We can free page tables without write-locking mmap_lock because VMAs
* were isolated before we downgraded mmap_lock.
@@ -658,11 +660,31 @@ static void vms_complete_pte_clear(struct vma_munmap_struct *vms,
lru_add_drain();
tlb_gather_mmu(&tlb, vms->mm);
update_hiwater_rss(vms->mm);
- unmap_vmas(&tlb, mas_detach, vms->vma, vms->start, vms->end, vms->vma_count, mm_wr_locked);
+ unmap_vmas(&tlb, mas_detach, vms->vma, vms->start, vms->end,
+ vms->vma_count, mm_wr_locked);
+
mas_set(mas_detach, 1);
/* start and end may be different if there is no prev or next vma. */
- free_pgtables(&tlb, mas_detach, vms->vma, vms->unmap_start, vms->unmap_end, mm_wr_locked);
+ free_pgtables(&tlb, mas_detach, vms->vma, vms->unmap_start,
+ vms->unmap_end, mm_wr_locked);
tlb_finish_mmu(&tlb);
+ vms->clear_ptes = false;
+}
+
+void vms_clean_up_area(struct vma_munmap_struct *vms,
+ struct ma_state *mas_detach, bool mm_wr_locked)
+{
+ struct vm_area_struct *vma;
+
+ if (!vms->nr_pages)
+ return;
+
+ vms_clear_ptes(vms, mas_detach, mm_wr_locked);
+ mas_set(mas_detach, 0);
+ mas_for_each(mas_detach, vma, ULONG_MAX)
+ if (vma->vm_ops && vma->vm_ops->close)
+ vma->vm_ops->close(vma);
+ vms->closed_vm_ops = true;
}
/*
@@ -686,7 +708,10 @@ void vms_complete_munmap_vmas(struct vma_munmap_struct *vms,
if (vms->unlock)
mmap_write_downgrade(mm);
- vms_complete_pte_clear(vms, mas_detach, !vms->unlock);
+ if (!vms->nr_pages)
+ return;
+
+ vms_clear_ptes(vms, mas_detach, !vms->unlock);
/* Update high watermark before we lower total_vm */
update_hiwater_vm(mm);
/* Stat accounting */
@@ -702,7 +727,7 @@ void vms_complete_munmap_vmas(struct vma_munmap_struct *vms,
/* Remove and clean up vmas */
mas_set(mas_detach, 0);
mas_for_each(mas_detach, vma, ULONG_MAX)
- remove_vma(vma, false);
+ remove_vma(vma, /* = */ false, vms->closed_vm_ops);
vm_unacct_memory(vms->nr_accounted);
validate_mm(mm);
@@ -846,13 +871,14 @@ int vms_gather_munmap_vmas(struct vma_munmap_struct *vms,
while (vma_iter_addr(vms->vmi) > vms->start)
vma_iter_prev_range(vms->vmi);
+ vms->clear_ptes = true;
return 0;
userfaultfd_error:
munmap_gather_failed:
end_split_failed:
modify_vma_failed:
- abort_munmap_vmas(mas_detach);
+ abort_munmap_vmas(mas_detach, /* closed = */ false);
start_split_failed:
map_count_exceeded:
return error;
@@ -897,7 +923,7 @@ int do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
return 0;
clear_tree_failed:
- abort_munmap_vmas(&mas_detach);
+ abort_munmap_vmas(&mas_detach, /* closed = */ false);
gather_failed:
validate_mm(mm);
return error;
@@ -1615,17 +1641,21 @@ bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
}
unsigned long count_vma_pages_range(struct mm_struct *mm,
- unsigned long addr, unsigned long end)
+ unsigned long addr, unsigned long end,
+ unsigned long *nr_accounted)
{
VMA_ITERATOR(vmi, mm, addr);
struct vm_area_struct *vma;
unsigned long nr_pages = 0;
+ *nr_accounted = 0;
for_each_vma_range(vmi, vma, end) {
unsigned long vm_start = max(addr, vma->vm_start);
unsigned long vm_end = min(end, vma->vm_end);
nr_pages += PHYS_PFN(vm_end - vm_start);
+ if (vma->vm_flags & VM_ACCOUNT)
+ *nr_accounted += PHYS_PFN(vm_end - vm_start);
}
return nr_pages;