From 5beaee54a324ba1fe307e341ec825d5d099f4091 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 26 Mar 2024 20:28:22 +0000
Subject: mm: add is_huge_zero_folio()

This is the folio equivalent of is_huge_zero_page().  It doesn't add any
efficiency, but it does prevent the caller from passing a tail page and
getting confused when the predicate returns false.

Link: https://lkml.kernel.org/r/20240326202833.523759-3-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/userfaultfd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'mm/userfaultfd.c')

diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 3c3539c573e7..a0ec14553fbe 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -1664,7 +1664,7 @@ ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start,
 			    !pmd_none(dst_pmdval)) {
 				struct folio *folio = pfn_folio(pmd_pfn(*src_pmd));
 
-				if (!folio || (!is_huge_zero_page(&folio->page) &&
+				if (!folio || (!is_huge_zero_folio(folio) &&
 					       !PageAnonExclusive(&folio->page))) {
 					spin_unlock(ptl);
 					err = -EBUSY;
-- 
cgit v1.2.3-70-g09d2


From e06d03d5590ae1c257b8aa2cfbfe6765e0755c14 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 26 Mar 2024 20:28:23 +0000
Subject: mm: add pmd_folio()

Convert directly from a pmd to a folio without going through another
representation first.  For now this is just a slightly shorter way to
write it, but it might end up being more efficient later.

Link: https://lkml.kernel.org/r/20240326202833.523759-4-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgtable.h | 2 ++
 mm/huge_memory.c        | 6 +++---
 mm/madvise.c            | 2 +-
 mm/mempolicy.c          | 2 +-
 mm/mlock.c              | 2 +-
 mm/userfaultfd.c        | 2 +-
 6 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'mm/userfaultfd.c')

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 600e17d03659..09c85c7bf9c2 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -50,6 +50,8 @@
 #define pmd_pgtable(pmd) pmd_page(pmd)
 #endif
 
+#define pmd_folio(pmd) page_folio(pmd_page(pmd))
+
 /*
  * A page table page can be thought of an array like this: pXd_t[PTRS_PER_PxD]
  *
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 5c043c7b5062..712263e3b1f6 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1816,7 +1816,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		goto out;
 	}
 
-	folio = pfn_folio(pmd_pfn(orig_pmd));
+	folio = pmd_folio(orig_pmd);
 	/*
 	 * If other processes are mapping this folio, we couldn't discard
 	 * the folio unless they all do MADV_FREE so let's skip the folio.
@@ -2086,7 +2086,7 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		if (pmd_protnone(*pmd))
 			goto unlock;
 
-		folio = page_folio(pmd_page(*pmd));
+		folio = pmd_folio(*pmd);
 		toptier = node_is_toptier(folio_nid(folio));
 		/*
 		 * Skip scanning top tier node if normal numa
@@ -2663,7 +2663,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 		 * It's safe to call pmd_page when folio is set because it's
 		 * guaranteed that pmd is present.
 		 */
-		if (folio && folio != page_folio(pmd_page(*pmd)))
+		if (folio && folio != pmd_folio(*pmd))
 			goto out;
 		__split_huge_pmd_locked(vma, pmd, range.start, freeze);
 	}
diff --git a/mm/madvise.c b/mm/madvise.c
index 7625830d6ae9..1f77a51baaac 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -363,7 +363,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
 			goto huge_unlock;
 		}
 
-		folio = pfn_folio(pmd_pfn(orig_pmd));
+		folio = pmd_folio(orig_pmd);
 
 		/* Do not interfere with other mappings of this folio */
 		if (folio_likely_mapped_shared(folio))
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 5743028a63a5..aec756ae5637 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -509,7 +509,7 @@ static void queue_folios_pmd(pmd_t *pmd, struct mm_walk *walk)
 		qp->nr_failed++;
 		return;
 	}
-	folio = pfn_folio(pmd_pfn(*pmd));
+	folio = pmd_folio(*pmd);
 	if (is_huge_zero_folio(folio)) {
 		walk->action = ACTION_CONTINUE;
 		return;
diff --git a/mm/mlock.c b/mm/mlock.c
index 1ed2f2ab37cd..30b51cdea89d 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -378,7 +378,7 @@ static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
 			goto out;
 		if (is_huge_zero_pmd(*pmd))
 			goto out;
-		folio = page_folio(pmd_page(*pmd));
+		folio = pmd_folio(*pmd);
 		if (vma->vm_flags & VM_LOCKED)
 			mlock_folio(folio);
 		else
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index a0ec14553fbe..b70618e8dcd2 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -1662,7 +1662,7 @@ ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start,
 			/* Check if we can move the pmd without splitting it. */
 			if (move_splits_huge_pmd(dst_addr, src_addr, src_start + len) ||
 			    !pmd_none(dst_pmdval)) {
-				struct folio *folio = pfn_folio(pmd_pfn(*src_pmd));
+				struct folio *folio = pmd_folio(*src_pmd);
 
 				if (!folio || (!is_huge_zero_folio(folio) &&
 					       !PageAnonExclusive(&folio->page))) {
-- 
cgit v1.2.3-70-g09d2


From b5ba3a64279355731252098d92550e12bf9649e4 Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Sun, 14 Apr 2024 19:08:21 -0700
Subject: userfaultfd: remove WRITE_ONCE when setting folio->index during
 UFFDIO_MOVE

When folio is moved with UFFDIO_MOVE it gets locked before the rmap and
index are modified.  Due to the folio lock being already held,
WRITE_ONCE() is not needed when setting the folio index.  Remove it.

Link: https://lkml.kernel.org/r/20240415020821.1152951-1-surenb@google.com
Reported-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Cc: Lokesh Gidra <lokeshgidra@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/huge_memory.c | 2 +-
 mm/userfaultfd.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'mm/userfaultfd.c')

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 264e09043f09..31b6bbffea52 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2200,7 +2200,7 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm
 		}
 
 		folio_move_anon_rmap(src_folio, dst_vma);
-		WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
+		src_folio->index = linear_page_index(dst_vma, dst_addr);
 
 		_dst_pmd = mk_huge_pmd(&src_folio->page, dst_vma->vm_page_prot);
 		/* Follow mremap() behavior and treat the entry dirty after the move */
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index b70618e8dcd2..575ccf90325a 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -1026,7 +1026,7 @@ static int move_present_pte(struct mm_struct *mm,
 	}
 
 	folio_move_anon_rmap(src_folio, dst_vma);
-	WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
+	src_folio->index = linear_page_index(dst_vma, dst_addr);
 
 	orig_dst_pte = mk_pte(&src_folio->page, dst_vma->vm_page_prot);
 	/* Follow mremap() behavior and treat the entry dirty after the move */
-- 
cgit v1.2.3-70-g09d2


From a568b4126b20ebbc01914e12d083379720911799 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 23 Apr 2024 23:55:36 +0100
Subject: userfault; expand folio use in mfill_atomic_install_pte()

Call page_folio() a little earlier so we can use folio_mapping()
instead of page_mapping(), saving a call to compound_head().

Link: https://lkml.kernel.org/r/20240423225552.4113447-6-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Eric Biggers <ebiggers@google.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/userfaultfd.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'mm/userfaultfd.c')

diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 575ccf90325a..8b1005ef9dfa 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -180,9 +180,9 @@ int mfill_atomic_install_pte(pmd_t *dst_pmd,
 	pte_t _dst_pte, *dst_pte;
 	bool writable = dst_vma->vm_flags & VM_WRITE;
 	bool vm_shared = dst_vma->vm_flags & VM_SHARED;
-	bool page_in_cache = page_mapping(page);
 	spinlock_t *ptl;
-	struct folio *folio;
+	struct folio *folio = page_folio(page);
+	bool page_in_cache = folio_mapping(folio);
 
 	_dst_pte = mk_pte(page, dst_vma->vm_page_prot);
 	_dst_pte = pte_mkdirty(_dst_pte);
@@ -212,7 +212,6 @@ int mfill_atomic_install_pte(pmd_t *dst_pmd,
 	if (!pte_none_mostly(ptep_get(dst_pte)))
 		goto out_unlock;
 
-	folio = page_folio(page);
 	if (page_in_cache) {
 		/* Usually, cache pages are already added to LRU */
 		if (newly_allocated)
-- 
cgit v1.2.3-70-g09d2


From 73b4a0cd8243709870701349611722ba3c351815 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 26 Apr 2024 15:45:02 +0100
Subject: mm: fix some minor per-VMA lock issues in userfaultfd

Rename lock_vma() to uffd_lock_vma() because it really is uffd specific.
Remove comment referencing unlock_vma() which doesn't exist.  Fix the
comment about lock_vma_under_rcu() which I just made incorrect.

Link: https://lkml.kernel.org/r/20240426144506.1290619-4-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jann Horn <jannh@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/userfaultfd.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

(limited to 'mm/userfaultfd.c')

diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 8b1005ef9dfa..d9e82ae68244 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -56,17 +56,16 @@ struct vm_area_struct *find_vma_and_prepare_anon(struct mm_struct *mm,
 
 #ifdef CONFIG_PER_VMA_LOCK
 /*
- * lock_vma() - Lookup and lock vma corresponding to @address.
+ * uffd_lock_vma() - Lookup and lock vma corresponding to @address.
  * @mm: mm to search vma in.
  * @address: address that the vma should contain.
  *
- * Should be called without holding mmap_lock. vma should be unlocked after use
- * with unlock_vma().
+ * Should be called without holding mmap_lock.
  *
  * Return: A locked vma containing @address, -ENOENT if no vma is found, or
  * -ENOMEM if anon_vma couldn't be allocated.
  */
-static struct vm_area_struct *lock_vma(struct mm_struct *mm,
+static struct vm_area_struct *uffd_lock_vma(struct mm_struct *mm,
 				       unsigned long address)
 {
 	struct vm_area_struct *vma;
@@ -74,9 +73,8 @@ static struct vm_area_struct *lock_vma(struct mm_struct *mm,
 	vma = lock_vma_under_rcu(mm, address);
 	if (vma) {
 		/*
-		 * lock_vma_under_rcu() only checks anon_vma for private
-		 * anonymous mappings. But we need to ensure it is assigned in
-		 * private file-backed vmas as well.
+		 * We know we're going to need to use anon_vma, so check
+		 * that early.
 		 */
 		if (!(vma->vm_flags & VM_SHARED) && unlikely(!vma->anon_vma))
 			vma_end_read(vma);
@@ -107,7 +105,7 @@ static struct vm_area_struct *uffd_mfill_lock(struct mm_struct *dst_mm,
 {
 	struct vm_area_struct *dst_vma;
 
-	dst_vma = lock_vma(dst_mm, dst_start);
+	dst_vma = uffd_lock_vma(dst_mm, dst_start);
 	if (IS_ERR(dst_vma) || validate_dst_vma(dst_vma, dst_start + len))
 		return dst_vma;
 
@@ -1401,7 +1399,7 @@ static int uffd_move_lock(struct mm_struct *mm,
 	struct vm_area_struct *vma;
 	int err;
 
-	vma = lock_vma(mm, dst_start);
+	vma = uffd_lock_vma(mm, dst_start);
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
@@ -1416,7 +1414,7 @@ static int uffd_move_lock(struct mm_struct *mm,
 	}
 
 	/*
-	 * Using lock_vma() to get src_vma can lead to following deadlock:
+	 * Using uffd_lock_vma() to get src_vma can lead to following deadlock:
 	 *
 	 * Thread1				Thread2
 	 * -------				-------
@@ -1438,7 +1436,7 @@ static int uffd_move_lock(struct mm_struct *mm,
 	err = find_vmas_mm_locked(mm, dst_start, src_start, dst_vmap, src_vmap);
 	if (!err) {
 		/*
-		 * See comment in lock_vma() as to why not using
+		 * See comment in uffd_lock_vma() as to why not using
 		 * vma_start_read() here.
 		 */
 		down_read(&(*dst_vmap)->vm_lock->lock);
-- 
cgit v1.2.3-70-g09d2