diff options
Diffstat (limited to 'mm/mmap.c')
-rw-r--r-- | mm/mmap.c | 222 |
1 files changed, 121 insertions, 101 deletions
diff --git a/mm/mmap.c b/mm/mmap.c index d600404580b2..8f1000bc35df 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -182,7 +182,8 @@ static int check_brk_limits(unsigned long addr, unsigned long len) if (IS_ERR_VALUE(mapped_addr)) return mapped_addr; - return mlock_future_check(current->mm, current->mm->def_flags, len); + return mlock_future_ok(current->mm, current->mm->def_flags, len) + ? 0 : -EAGAIN; } static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *brkvma, unsigned long addr, unsigned long request, unsigned long flags); @@ -300,61 +301,40 @@ out: } #if defined(CONFIG_DEBUG_VM_MAPLE_TREE) -extern void mt_validate(struct maple_tree *mt); -extern void mt_dump(const struct maple_tree *mt); - -/* Validate the maple tree */ -static void validate_mm_mt(struct mm_struct *mm) -{ - struct maple_tree *mt = &mm->mm_mt; - struct vm_area_struct *vma_mt; - - MA_STATE(mas, mt, 0, 0); - - mt_validate(&mm->mm_mt); - mas_for_each(&mas, vma_mt, ULONG_MAX) { - if ((vma_mt->vm_start != mas.index) || - (vma_mt->vm_end - 1 != mas.last)) { - pr_emerg("issue in %s\n", current->comm); - dump_stack(); - dump_vma(vma_mt); - pr_emerg("mt piv: %p %lu - %lu\n", vma_mt, - mas.index, mas.last); - pr_emerg("mt vma: %p %lu - %lu\n", vma_mt, - vma_mt->vm_start, vma_mt->vm_end); - - mt_dump(mas.tree); - if (vma_mt->vm_end != mas.last + 1) { - pr_err("vma: %p vma_mt %lu-%lu\tmt %lu-%lu\n", - mm, vma_mt->vm_start, vma_mt->vm_end, - mas.index, mas.last); - mt_dump(mas.tree); - } - VM_BUG_ON_MM(vma_mt->vm_end != mas.last + 1, mm); - if (vma_mt->vm_start != mas.index) { - pr_err("vma: %p vma_mt %p %lu - %lu doesn't match\n", - mm, vma_mt, vma_mt->vm_start, vma_mt->vm_end); - mt_dump(mas.tree); - } - VM_BUG_ON_MM(vma_mt->vm_start != mas.index, mm); - } - } -} - static void validate_mm(struct mm_struct *mm) { int bug = 0; int i = 0; struct vm_area_struct *vma; - MA_STATE(mas, &mm->mm_mt, 0, 0); - - validate_mm_mt(mm); + VMA_ITERATOR(vmi, mm, 0); - mas_for_each(&mas, vma, ULONG_MAX) { + mt_validate(&mm->mm_mt); + for_each_vma(vmi, vma) { #ifdef CONFIG_DEBUG_VM_RB struct anon_vma *anon_vma = vma->anon_vma; struct anon_vma_chain *avc; +#endif + unsigned long vmi_start, vmi_end; + bool warn = 0; + + vmi_start = vma_iter_addr(&vmi); + vmi_end = vma_iter_end(&vmi); + if (VM_WARN_ON_ONCE_MM(vma->vm_end != vmi_end, mm)) + warn = 1; + + if (VM_WARN_ON_ONCE_MM(vma->vm_start != vmi_start, mm)) + warn = 1; + if (warn) { + pr_emerg("issue in %s\n", current->comm); + dump_stack(); + dump_vma(vma); + pr_emerg("tree range: %px start %lx end %lx\n", vma, + vmi_start, vmi_end - 1); + vma_iter_dump_tree(&vmi); + } + +#ifdef CONFIG_DEBUG_VM_RB if (anon_vma) { anon_vma_lock_read(anon_vma); list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) @@ -365,14 +345,13 @@ static void validate_mm(struct mm_struct *mm) i++; } if (i != mm->map_count) { - pr_emerg("map_count %d mas_for_each %d\n", mm->map_count, i); + pr_emerg("map_count %d vma iterator %d\n", mm->map_count, i); bug = 1; } VM_BUG_ON_MM(bug, mm); } #else /* !CONFIG_DEBUG_VM_MAPLE_TREE */ -#define validate_mm_mt(root) do { } while (0) #define validate_mm(mm) do { } while (0) #endif /* CONFIG_DEBUG_VM_MAPLE_TREE */ @@ -1167,21 +1146,21 @@ static inline unsigned long round_hint_to_min(unsigned long hint) return hint; } -int mlock_future_check(struct mm_struct *mm, unsigned long flags, - unsigned long len) +bool mlock_future_ok(struct mm_struct *mm, unsigned long flags, + unsigned long bytes) { - unsigned long locked, lock_limit; + unsigned long locked_pages, limit_pages; - /* mlock MCL_FUTURE? */ - if (flags & VM_LOCKED) { - locked = len >> PAGE_SHIFT; - locked += mm->locked_vm; - lock_limit = rlimit(RLIMIT_MEMLOCK); - lock_limit >>= PAGE_SHIFT; - if (locked > lock_limit && !capable(CAP_IPC_LOCK)) - return -EAGAIN; - } - return 0; + if (!(flags & VM_LOCKED) || capable(CAP_IPC_LOCK)) + return true; + + locked_pages = bytes >> PAGE_SHIFT; + locked_pages += mm->locked_vm; + + limit_pages = rlimit(RLIMIT_MEMLOCK); + limit_pages >>= PAGE_SHIFT; + + return locked_pages <= limit_pages; } static inline u64 file_mmap_size_max(struct file *file, struct inode *inode) @@ -1293,7 +1272,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr, if (!can_do_mlock()) return -EPERM; - if (mlock_future_check(mm, vm_flags, len)) + if (!mlock_future_ok(mm, vm_flags, len)) return -EAGAIN; if (file) { @@ -1475,6 +1454,48 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg) } #endif /* __ARCH_WANT_SYS_OLD_MMAP */ +static bool vm_ops_needs_writenotify(const struct vm_operations_struct *vm_ops) +{ + return vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite); +} + +static bool vma_is_shared_writable(struct vm_area_struct *vma) +{ + return (vma->vm_flags & (VM_WRITE | VM_SHARED)) == + (VM_WRITE | VM_SHARED); +} + +static bool vma_fs_can_writeback(struct vm_area_struct *vma) +{ + /* No managed pages to writeback. */ + if (vma->vm_flags & VM_PFNMAP) + return false; + + return vma->vm_file && vma->vm_file->f_mapping && + mapping_can_writeback(vma->vm_file->f_mapping); +} + +/* + * Does this VMA require the underlying folios to have their dirty state + * tracked? + */ +bool vma_needs_dirty_tracking(struct vm_area_struct *vma) +{ + /* Only shared, writable VMAs require dirty tracking. */ + if (!vma_is_shared_writable(vma)) + return false; + + /* Does the filesystem need to be notified? */ + if (vm_ops_needs_writenotify(vma->vm_ops)) + return true; + + /* + * Even if the filesystem doesn't indicate a need for writenotify, if it + * can writeback, dirty tracking is still required. + */ + return vma_fs_can_writeback(vma); +} + /* * Some shared mappings will want the pages marked read-only * to track write events. If so, we'll downgrade vm_page_prot @@ -1483,21 +1504,18 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg) */ int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot) { - vm_flags_t vm_flags = vma->vm_flags; - const struct vm_operations_struct *vm_ops = vma->vm_ops; - /* If it was private or non-writable, the write bit is already clear */ - if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED))) + if (!vma_is_shared_writable(vma)) return 0; /* The backer wishes to know when pages are first written to? */ - if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite)) + if (vm_ops_needs_writenotify(vma->vm_ops)) return 1; /* The open routine did something to the protections that pgprot_modify * won't preserve? */ if (pgprot_val(vm_page_prot) != - pgprot_val(vm_pgprot_modify(vm_page_prot, vm_flags))) + pgprot_val(vm_pgprot_modify(vm_page_prot, vma->vm_flags))) return 0; /* @@ -1511,13 +1529,8 @@ int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot) if (userfaultfd_wp(vma)) return 1; - /* Specialty mapping? */ - if (vm_flags & VM_PFNMAP) - return 0; - /* Can the mapping track the dirty pages? */ - return vma->vm_file && vma->vm_file->f_mapping && - mapping_can_writeback(vma->vm_file->f_mapping); + return vma_fs_can_writeback(vma); } /* @@ -1911,7 +1924,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, return -ENOMEM; /* mlock limit tests */ - if (mlock_future_check(mm, vma->vm_flags, grow << PAGE_SHIFT)) + if (!mlock_future_ok(mm, vma->vm_flags, grow << PAGE_SHIFT)) return -ENOMEM; /* Check to ensure the stack will not grow into a hugetlb-only region */ @@ -2234,7 +2247,7 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, struct vm_area_struct *new; int err; - validate_mm_mt(vma->vm_mm); + validate_mm(vma->vm_mm); WARN_ON(vma->vm_start >= addr); WARN_ON(vma->vm_end <= addr); @@ -2292,7 +2305,7 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, /* Success. */ if (new_below) vma_next(vmi); - validate_mm_mt(vma->vm_mm); + validate_mm(vma->vm_mm); return 0; out_free_mpol: @@ -2301,7 +2314,7 @@ out_free_vmi: vma_iter_free(vmi); out_free_vma: vm_area_free(new); - validate_mm_mt(vma->vm_mm); + validate_mm(vma->vm_mm); return err; } @@ -2394,28 +2407,32 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, locked_vm += vma_pages(next); count++; + if (unlikely(uf)) { + /* + * If userfaultfd_unmap_prep returns an error the vmas + * will remain split, but userland will get a + * highly unexpected error anyway. This is no + * different than the case where the first of the two + * __split_vma fails, but we don't undo the first + * split, despite we could. This is unlikely enough + * failure that it's not worth optimizing it for. + */ + error = userfaultfd_unmap_prep(next, start, end, uf); + + if (error) + goto userfaultfd_error; + } #ifdef CONFIG_DEBUG_VM_MAPLE_TREE BUG_ON(next->vm_start < start); BUG_ON(next->vm_start > end); #endif } - next = vma_next(vmi); - if (unlikely(uf)) { - /* - * If userfaultfd_unmap_prep returns an error the vmas - * will remain split, but userland will get a - * highly unexpected error anyway. This is no - * different than the case where the first of the two - * __split_vma fails, but we don't undo the first - * split, despite we could. This is unlikely enough - * failure that it's not worth optimizing it for. - */ - error = userfaultfd_unmap_prep(mm, start, end, uf); + if (vma_iter_end(vmi) > end) + next = vma_iter_load(vmi); - if (error) - goto userfaultfd_error; - } + if (!next) + next = vma_next(vmi); #if defined(CONFIG_DEBUG_VM_MAPLE_TREE) /* Make sure no VMAs are about to be lost. */ @@ -2620,6 +2637,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr, } cannot_expand: + if (prev) + vma_iter_next_range(&vmi); + /* * Determine the object being mapped and call the appropriate * specific mapper. the address has already been validated, but @@ -2933,7 +2953,7 @@ int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, arch_unmap(mm, start, end); ret = do_vmi_align_munmap(vmi, vma, mm, start, end, uf, downgrade); - validate_mm_mt(mm); + validate_mm(mm); return ret; } @@ -2955,7 +2975,7 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma, struct mm_struct *mm = current->mm; struct vma_prepare vp; - validate_mm_mt(mm); + validate_mm(mm); /* * Check against address space limits by the changed size * Note: This happens *after* clearing old mappings in some code paths. @@ -3196,7 +3216,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, bool faulted_in_anon_vma = true; VMA_ITERATOR(vmi, mm, addr); - validate_mm_mt(mm); + validate_mm(mm); /* * If anonymous vma has not yet been faulted, update new pgoff * to match new location, to increase its chance of merging. @@ -3255,7 +3275,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, goto out_vma_link; *need_rmap_locks = false; } - validate_mm_mt(mm); + validate_mm(mm); return new_vma; out_vma_link: @@ -3271,7 +3291,7 @@ out_free_mempol: out_free_vma: vm_area_free(new_vma); out: - validate_mm_mt(mm); + validate_mm(mm); return NULL; } @@ -3408,7 +3428,7 @@ static struct vm_area_struct *__install_special_mapping( int ret; struct vm_area_struct *vma; - validate_mm_mt(mm); + validate_mm(mm); vma = vm_area_alloc(mm); if (unlikely(vma == NULL)) return ERR_PTR(-ENOMEM); @@ -3431,12 +3451,12 @@ static struct vm_area_struct *__install_special_mapping( perf_event_mmap(vma); - validate_mm_mt(mm); + validate_mm(mm); return vma; out: vm_area_free(vma); - validate_mm_mt(mm); + validate_mm(mm); return ERR_PTR(ret); } |