From 5beaee54a324ba1fe307e341ec825d5d099f4091 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 26 Mar 2024 20:28:22 +0000 Subject: mm: add is_huge_zero_folio() This is the folio equivalent of is_huge_zero_page(). It doesn't add any efficiency, but it does prevent the caller from passing a tail page and getting confused when the predicate returns false. Link: https://lkml.kernel.org/r/20240326202833.523759-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton --- mm/userfaultfd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/userfaultfd.c') diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 3c3539c573e7..a0ec14553fbe 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1664,7 +1664,7 @@ ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start, !pmd_none(dst_pmdval)) { struct folio *folio = pfn_folio(pmd_pfn(*src_pmd)); - if (!folio || (!is_huge_zero_page(&folio->page) && + if (!folio || (!is_huge_zero_folio(folio) && !PageAnonExclusive(&folio->page))) { spin_unlock(ptl); err = -EBUSY; -- cgit v1.2.3-70-g09d2 From e06d03d5590ae1c257b8aa2cfbfe6765e0755c14 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 26 Mar 2024 20:28:23 +0000 Subject: mm: add pmd_folio() Convert directly from a pmd to a folio without going through another representation first. For now this is just a slightly shorter way to write it, but it might end up being more efficient later. Link: https://lkml.kernel.org/r/20240326202833.523759-4-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 2 ++ mm/huge_memory.c | 6 +++--- mm/madvise.c | 2 +- mm/mempolicy.c | 2 +- mm/mlock.c | 2 +- mm/userfaultfd.c | 2 +- 6 files changed, 9 insertions(+), 7 deletions(-) (limited to 'mm/userfaultfd.c') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 600e17d03659..09c85c7bf9c2 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -50,6 +50,8 @@ #define pmd_pgtable(pmd) pmd_page(pmd) #endif +#define pmd_folio(pmd) page_folio(pmd_page(pmd)) + /* * A page table page can be thought of an array like this: pXd_t[PTRS_PER_PxD] * diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 5c043c7b5062..712263e3b1f6 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1816,7 +1816,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, goto out; } - folio = pfn_folio(pmd_pfn(orig_pmd)); + folio = pmd_folio(orig_pmd); /* * If other processes are mapping this folio, we couldn't discard * the folio unless they all do MADV_FREE so let's skip the folio. @@ -2086,7 +2086,7 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, if (pmd_protnone(*pmd)) goto unlock; - folio = page_folio(pmd_page(*pmd)); + folio = pmd_folio(*pmd); toptier = node_is_toptier(folio_nid(folio)); /* * Skip scanning top tier node if normal numa @@ -2663,7 +2663,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, * It's safe to call pmd_page when folio is set because it's * guaranteed that pmd is present. */ - if (folio && folio != page_folio(pmd_page(*pmd))) + if (folio && folio != pmd_folio(*pmd)) goto out; __split_huge_pmd_locked(vma, pmd, range.start, freeze); } diff --git a/mm/madvise.c b/mm/madvise.c index 7625830d6ae9..1f77a51baaac 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -363,7 +363,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, goto huge_unlock; } - folio = pfn_folio(pmd_pfn(orig_pmd)); + folio = pmd_folio(orig_pmd); /* Do not interfere with other mappings of this folio */ if (folio_likely_mapped_shared(folio)) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 5743028a63a5..aec756ae5637 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -509,7 +509,7 @@ static void queue_folios_pmd(pmd_t *pmd, struct mm_walk *walk) qp->nr_failed++; return; } - folio = pfn_folio(pmd_pfn(*pmd)); + folio = pmd_folio(*pmd); if (is_huge_zero_folio(folio)) { walk->action = ACTION_CONTINUE; return; diff --git a/mm/mlock.c b/mm/mlock.c index 1ed2f2ab37cd..30b51cdea89d 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -378,7 +378,7 @@ static int mlock_pte_range(pmd_t *pmd, unsigned long addr, goto out; if (is_huge_zero_pmd(*pmd)) goto out; - folio = page_folio(pmd_page(*pmd)); + folio = pmd_folio(*pmd); if (vma->vm_flags & VM_LOCKED) mlock_folio(folio); else diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index a0ec14553fbe..b70618e8dcd2 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1662,7 +1662,7 @@ ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start, /* Check if we can move the pmd without splitting it. */ if (move_splits_huge_pmd(dst_addr, src_addr, src_start + len) || !pmd_none(dst_pmdval)) { - struct folio *folio = pfn_folio(pmd_pfn(*src_pmd)); + struct folio *folio = pmd_folio(*src_pmd); if (!folio || (!is_huge_zero_folio(folio) && !PageAnonExclusive(&folio->page))) { -- cgit v1.2.3-70-g09d2 From b5ba3a64279355731252098d92550e12bf9649e4 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Sun, 14 Apr 2024 19:08:21 -0700 Subject: userfaultfd: remove WRITE_ONCE when setting folio->index during UFFDIO_MOVE When folio is moved with UFFDIO_MOVE it gets locked before the rmap and index are modified. Due to the folio lock being already held, WRITE_ONCE() is not needed when setting the folio index. Remove it. Link: https://lkml.kernel.org/r/20240415020821.1152951-1-surenb@google.com Reported-by: Matthew Wilcox Signed-off-by: Suren Baghdasaryan Reviewed-by: David Hildenbrand Reviewed-by: Peter Xu Cc: Lokesh Gidra Signed-off-by: Andrew Morton --- mm/huge_memory.c | 2 +- mm/userfaultfd.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/userfaultfd.c') diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 264e09043f09..31b6bbffea52 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2200,7 +2200,7 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm } folio_move_anon_rmap(src_folio, dst_vma); - WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr)); + src_folio->index = linear_page_index(dst_vma, dst_addr); _dst_pmd = mk_huge_pmd(&src_folio->page, dst_vma->vm_page_prot); /* Follow mremap() behavior and treat the entry dirty after the move */ diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index b70618e8dcd2..575ccf90325a 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1026,7 +1026,7 @@ static int move_present_pte(struct mm_struct *mm, } folio_move_anon_rmap(src_folio, dst_vma); - WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr)); + src_folio->index = linear_page_index(dst_vma, dst_addr); orig_dst_pte = mk_pte(&src_folio->page, dst_vma->vm_page_prot); /* Follow mremap() behavior and treat the entry dirty after the move */ -- cgit v1.2.3-70-g09d2 From a568b4126b20ebbc01914e12d083379720911799 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 23 Apr 2024 23:55:36 +0100 Subject: userfault; expand folio use in mfill_atomic_install_pte() Call page_folio() a little earlier so we can use folio_mapping() instead of page_mapping(), saving a call to compound_head(). Link: https://lkml.kernel.org/r/20240423225552.4113447-6-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Cc: Eric Biggers Cc: Sidhartha Kumar Signed-off-by: Andrew Morton --- mm/userfaultfd.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'mm/userfaultfd.c') diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 575ccf90325a..8b1005ef9dfa 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -180,9 +180,9 @@ int mfill_atomic_install_pte(pmd_t *dst_pmd, pte_t _dst_pte, *dst_pte; bool writable = dst_vma->vm_flags & VM_WRITE; bool vm_shared = dst_vma->vm_flags & VM_SHARED; - bool page_in_cache = page_mapping(page); spinlock_t *ptl; - struct folio *folio; + struct folio *folio = page_folio(page); + bool page_in_cache = folio_mapping(folio); _dst_pte = mk_pte(page, dst_vma->vm_page_prot); _dst_pte = pte_mkdirty(_dst_pte); @@ -212,7 +212,6 @@ int mfill_atomic_install_pte(pmd_t *dst_pmd, if (!pte_none_mostly(ptep_get(dst_pte))) goto out_unlock; - folio = page_folio(page); if (page_in_cache) { /* Usually, cache pages are already added to LRU */ if (newly_allocated) -- cgit v1.2.3-70-g09d2 From 73b4a0cd8243709870701349611722ba3c351815 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 26 Apr 2024 15:45:02 +0100 Subject: mm: fix some minor per-VMA lock issues in userfaultfd Rename lock_vma() to uffd_lock_vma() because it really is uffd specific. Remove comment referencing unlock_vma() which doesn't exist. Fix the comment about lock_vma_under_rcu() which I just made incorrect. Link: https://lkml.kernel.org/r/20240426144506.1290619-4-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Suren Baghdasaryan Cc: David Hildenbrand Cc: Jann Horn Signed-off-by: Andrew Morton --- mm/userfaultfd.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'mm/userfaultfd.c') diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 8b1005ef9dfa..d9e82ae68244 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -56,17 +56,16 @@ struct vm_area_struct *find_vma_and_prepare_anon(struct mm_struct *mm, #ifdef CONFIG_PER_VMA_LOCK /* - * lock_vma() - Lookup and lock vma corresponding to @address. + * uffd_lock_vma() - Lookup and lock vma corresponding to @address. * @mm: mm to search vma in. * @address: address that the vma should contain. * - * Should be called without holding mmap_lock. vma should be unlocked after use - * with unlock_vma(). + * Should be called without holding mmap_lock. * * Return: A locked vma containing @address, -ENOENT if no vma is found, or * -ENOMEM if anon_vma couldn't be allocated. */ -static struct vm_area_struct *lock_vma(struct mm_struct *mm, +static struct vm_area_struct *uffd_lock_vma(struct mm_struct *mm, unsigned long address) { struct vm_area_struct *vma; @@ -74,9 +73,8 @@ static struct vm_area_struct *lock_vma(struct mm_struct *mm, vma = lock_vma_under_rcu(mm, address); if (vma) { /* - * lock_vma_under_rcu() only checks anon_vma for private - * anonymous mappings. But we need to ensure it is assigned in - * private file-backed vmas as well. + * We know we're going to need to use anon_vma, so check + * that early. */ if (!(vma->vm_flags & VM_SHARED) && unlikely(!vma->anon_vma)) vma_end_read(vma); @@ -107,7 +105,7 @@ static struct vm_area_struct *uffd_mfill_lock(struct mm_struct *dst_mm, { struct vm_area_struct *dst_vma; - dst_vma = lock_vma(dst_mm, dst_start); + dst_vma = uffd_lock_vma(dst_mm, dst_start); if (IS_ERR(dst_vma) || validate_dst_vma(dst_vma, dst_start + len)) return dst_vma; @@ -1401,7 +1399,7 @@ static int uffd_move_lock(struct mm_struct *mm, struct vm_area_struct *vma; int err; - vma = lock_vma(mm, dst_start); + vma = uffd_lock_vma(mm, dst_start); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -1416,7 +1414,7 @@ static int uffd_move_lock(struct mm_struct *mm, } /* - * Using lock_vma() to get src_vma can lead to following deadlock: + * Using uffd_lock_vma() to get src_vma can lead to following deadlock: * * Thread1 Thread2 * ------- ------- @@ -1438,7 +1436,7 @@ static int uffd_move_lock(struct mm_struct *mm, err = find_vmas_mm_locked(mm, dst_start, src_start, dst_vmap, src_vmap); if (!err) { /* - * See comment in lock_vma() as to why not using + * See comment in uffd_lock_vma() as to why not using * vma_start_read() here. */ down_read(&(*dst_vmap)->vm_lock->lock); -- cgit v1.2.3-70-g09d2