summaryrefslogtreecommitdiff
path: root/mm/userfaultfd.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/userfaultfd.c')
-rw-r--r--mm/userfaultfd.c170
1 files changed, 169 insertions, 1 deletions
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index acc56c75ba99..ce13c4062647 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -391,7 +391,7 @@ static int mfill_atomic_pte_continue(pmd_t *dst_pmd,
struct page *page;
int ret;
- ret = shmem_get_folio(inode, pgoff, &folio, SGP_NOALLOC);
+ ret = shmem_get_folio(inode, pgoff, 0, &folio, SGP_NOALLOC);
/* Our caller expects us to return -EFAULT if we failed to find folio */
if (ret == -ENOENT)
ret = -EFAULT;
@@ -1763,3 +1763,171 @@ out:
VM_WARN_ON(!moved && !err);
return moved ? moved : err;
}
+
+static void userfaultfd_set_vm_flags(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ const bool uffd_wp_changed = (vma->vm_flags ^ flags) & VM_UFFD_WP;
+
+ vm_flags_reset(vma, flags);
+ /*
+ * For shared mappings, we want to enable writenotify while
+ * userfaultfd-wp is enabled (see vma_wants_writenotify()). We'll simply
+ * recalculate vma->vm_page_prot whenever userfaultfd-wp changes.
+ */
+ if ((vma->vm_flags & VM_SHARED) && uffd_wp_changed)
+ vma_set_page_prot(vma);
+}
+
+static void userfaultfd_set_ctx(struct vm_area_struct *vma,
+ struct userfaultfd_ctx *ctx,
+ unsigned long flags)
+{
+ vma_start_write(vma);
+ vma->vm_userfaultfd_ctx = (struct vm_userfaultfd_ctx){ctx};
+ userfaultfd_set_vm_flags(vma,
+ (vma->vm_flags & ~__VM_UFFD_FLAGS) | flags);
+}
+
+void userfaultfd_reset_ctx(struct vm_area_struct *vma)
+{
+ userfaultfd_set_ctx(vma, NULL, 0);
+}
+
+struct vm_area_struct *userfaultfd_clear_vma(struct vma_iterator *vmi,
+ struct vm_area_struct *prev,
+ struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end)
+{
+ struct vm_area_struct *ret;
+
+ /* Reset ptes for the whole vma range if wr-protected */
+ if (userfaultfd_wp(vma))
+ uffd_wp_range(vma, start, end - start, false);
+
+ ret = vma_modify_flags_uffd(vmi, prev, vma, start, end,
+ vma->vm_flags & ~__VM_UFFD_FLAGS,
+ NULL_VM_UFFD_CTX);
+
+ /*
+ * In the vma_merge() successful mprotect-like case 8:
+ * the next vma was merged into the current one and
+ * the current one has not been updated yet.
+ */
+ if (!IS_ERR(ret))
+ userfaultfd_reset_ctx(ret);
+
+ return ret;
+}
+
+/* Assumes mmap write lock taken, and mm_struct pinned. */
+int userfaultfd_register_range(struct userfaultfd_ctx *ctx,
+ struct vm_area_struct *vma,
+ unsigned long vm_flags,
+ unsigned long start, unsigned long end,
+ bool wp_async)
+{
+ VMA_ITERATOR(vmi, ctx->mm, start);
+ struct vm_area_struct *prev = vma_prev(&vmi);
+ unsigned long vma_end;
+ unsigned long new_flags;
+
+ if (vma->vm_start < start)
+ prev = vma;
+
+ for_each_vma_range(vmi, vma, end) {
+ cond_resched();
+
+ BUG_ON(!vma_can_userfault(vma, vm_flags, wp_async));
+ BUG_ON(vma->vm_userfaultfd_ctx.ctx &&
+ vma->vm_userfaultfd_ctx.ctx != ctx);
+ WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
+
+ /*
+ * Nothing to do: this vma is already registered into this
+ * userfaultfd and with the right tracking mode too.
+ */
+ if (vma->vm_userfaultfd_ctx.ctx == ctx &&
+ (vma->vm_flags & vm_flags) == vm_flags)
+ goto skip;
+
+ if (vma->vm_start > start)
+ start = vma->vm_start;
+ vma_end = min(end, vma->vm_end);
+
+ new_flags = (vma->vm_flags & ~__VM_UFFD_FLAGS) | vm_flags;
+ vma = vma_modify_flags_uffd(&vmi, prev, vma, start, vma_end,
+ new_flags,
+ (struct vm_userfaultfd_ctx){ctx});
+ if (IS_ERR(vma))
+ return PTR_ERR(vma);
+
+ /*
+ * In the vma_merge() successful mprotect-like case 8:
+ * the next vma was merged into the current one and
+ * the current one has not been updated yet.
+ */
+ userfaultfd_set_ctx(vma, ctx, vm_flags);
+
+ if (is_vm_hugetlb_page(vma) && uffd_disable_huge_pmd_share(vma))
+ hugetlb_unshare_all_pmds(vma);
+
+skip:
+ prev = vma;
+ start = vma->vm_end;
+ }
+
+ return 0;
+}
+
+void userfaultfd_release_new(struct userfaultfd_ctx *ctx)
+{
+ struct mm_struct *mm = ctx->mm;
+ struct vm_area_struct *vma;
+ VMA_ITERATOR(vmi, mm, 0);
+
+ /* the various vma->vm_userfaultfd_ctx still points to it */
+ mmap_write_lock(mm);
+ for_each_vma(vmi, vma) {
+ if (vma->vm_userfaultfd_ctx.ctx == ctx)
+ userfaultfd_reset_ctx(vma);
+ }
+ mmap_write_unlock(mm);
+}
+
+void userfaultfd_release_all(struct mm_struct *mm,
+ struct userfaultfd_ctx *ctx)
+{
+ struct vm_area_struct *vma, *prev;
+ VMA_ITERATOR(vmi, mm, 0);
+
+ if (!mmget_not_zero(mm))
+ return;
+
+ /*
+ * Flush page faults out of all CPUs. NOTE: all page faults
+ * must be retried without returning VM_FAULT_SIGBUS if
+ * userfaultfd_ctx_get() succeeds but vma->vma_userfault_ctx
+ * changes while handle_userfault released the mmap_lock. So
+ * it's critical that released is set to true (above), before
+ * taking the mmap_lock for writing.
+ */
+ mmap_write_lock(mm);
+ prev = NULL;
+ for_each_vma(vmi, vma) {
+ cond_resched();
+ BUG_ON(!!vma->vm_userfaultfd_ctx.ctx ^
+ !!(vma->vm_flags & __VM_UFFD_FLAGS));
+ if (vma->vm_userfaultfd_ctx.ctx != ctx) {
+ prev = vma;
+ continue;
+ }
+
+ vma = userfaultfd_clear_vma(&vmi, prev, vma,
+ vma->vm_start, vma->vm_end);
+ prev = vma;
+ }
+ mmap_write_unlock(mm);
+ mmput(mm);
+}