diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-28 10:28:11 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-28 10:28:11 -0700 |
commit | 6e17c6de3ddf3073741d9c91a796ee696914d8a0 (patch) | |
tree | 2c425707f78642625dbe2c824c7fded2021e3dc7 /mm/mprotect.c | |
parent | 6aeadf7896bff4ca230702daba8788455e6b866e (diff) | |
parent | acc72d59c7509540c27c49625cb4b5a8db1f1a84 (diff) |
Merge tag 'mm-stable-2023-06-24-19-15' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull mm updates from Andrew Morton:
- Yosry Ahmed brought back some cgroup v1 stats in OOM logs
- Yosry has also eliminated cgroup's atomic rstat flushing
- Nhat Pham adds the new cachestat() syscall. It provides userspace
with the ability to query pagecache status - a similar concept to
mincore() but more powerful and with improved usability
- Mel Gorman provides more optimizations for compaction, reducing the
prevalence of page rescanning
- Lorenzo Stoakes has done some maintanance work on the
get_user_pages() interface
- Liam Howlett continues with cleanups and maintenance work to the
maple tree code. Peng Zhang also does some work on maple tree
- Johannes Weiner has done some cleanup work on the compaction code
- David Hildenbrand has contributed additional selftests for
get_user_pages()
- Thomas Gleixner has contributed some maintenance and optimization
work for the vmalloc code
- Baolin Wang has provided some compaction cleanups,
- SeongJae Park continues maintenance work on the DAMON code
- Huang Ying has done some maintenance on the swap code's usage of
device refcounting
- Christoph Hellwig has some cleanups for the filemap/directio code
- Ryan Roberts provides two patch series which yield some
rationalization of the kernel's access to pte entries - use the
provided APIs rather than open-coding accesses
- Lorenzo Stoakes has some fixes to the interaction between pagecache
and directio access to file mappings
- John Hubbard has a series of fixes to the MM selftesting code
- ZhangPeng continues the folio conversion campaign
- Hugh Dickins has been working on the pagetable handling code, mainly
with a view to reducing the load on the mmap_lock
- Catalin Marinas has reduced the arm64 kmalloc() minimum alignment
from 128 to 8
- Domenico Cerasuolo has improved the zswap reclaim mechanism by
reorganizing the LRU management
- Matthew Wilcox provides some fixups to make gfs2 work better with the
buffer_head code
- Vishal Moola also has done some folio conversion work
- Matthew Wilcox has removed the remnants of the pagevec code - their
functionality is migrated over to struct folio_batch
* tag 'mm-stable-2023-06-24-19-15' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (380 commits)
mm/hugetlb: remove hugetlb_set_page_subpool()
mm: nommu: correct the range of mmap_sem_read_lock in task_mem()
hugetlb: revert use of page_cache_next_miss()
Revert "page cache: fix page_cache_next/prev_miss off by one"
mm/vmscan: fix root proactive reclaim unthrottling unbalanced node
mm: memcg: rename and document global_reclaim()
mm: kill [add|del]_page_to_lru_list()
mm: compaction: convert to use a folio in isolate_migratepages_block()
mm: zswap: fix double invalidate with exclusive loads
mm: remove unnecessary pagevec includes
mm: remove references to pagevec
mm: rename invalidate_mapping_pagevec to mapping_try_invalidate
mm: remove struct pagevec
net: convert sunrpc from pagevec to folio_batch
i915: convert i915_gpu_error to use a folio_batch
pagevec: rename fbatch_count()
mm: remove check_move_unevictable_pages()
drm: convert drm_gem_put_pages() to use a folio_batch
i915: convert shmem_sg_free_table() to use a folio_batch
scatterlist: add sg_set_folio()
...
Diffstat (limited to 'mm/mprotect.c')
-rw-r--r-- | mm/mprotect.c | 87 |
1 files changed, 22 insertions, 65 deletions
diff --git a/mm/mprotect.c b/mm/mprotect.c index c59e7561698c..6f658d483704 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -93,22 +93,9 @@ static long change_pte_range(struct mmu_gather *tlb, bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE; tlb_change_page_size(tlb, PAGE_SIZE); - - /* - * Can be called with only the mmap_lock for reading by - * prot_numa so we must check the pmd isn't constantly - * changing from under us from pmd_none to pmd_trans_huge - * and/or the other way around. - */ - if (pmd_trans_unstable(pmd)) - return 0; - - /* - * The pmd points to a regular pte so the pmd can't change - * from under us even if the mmap_lock is only hold for - * reading. - */ pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + if (!pte) + return -EAGAIN; /* Get target node for single threaded private VMAs */ if (prot_numa && !(vma->vm_flags & VM_SHARED) && @@ -118,7 +105,7 @@ static long change_pte_range(struct mmu_gather *tlb, flush_tlb_batched_pending(vma->vm_mm); arch_enter_lazy_mmu_mode(); do { - oldpte = *pte; + oldpte = ptep_get(pte); if (pte_present(oldpte)) { pte_t ptent; @@ -302,31 +289,6 @@ static long change_pte_range(struct mmu_gather *tlb, } /* - * Used when setting automatic NUMA hinting protection where it is - * critical that a numa hinting PMD is not confused with a bad PMD. - */ -static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd) -{ - pmd_t pmdval = pmdp_get_lockless(pmd); - - /* See pmd_none_or_trans_huge_or_clear_bad for info on barrier */ -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - barrier(); -#endif - - if (pmd_none(pmdval)) - return 1; - if (pmd_trans_huge(pmdval)) - return 0; - if (unlikely(pmd_bad(pmdval))) { - pmd_clear_bad(pmd); - return 1; - } - - return 0; -} - -/* * Return true if we want to split THPs into PTE mappings in change * protection procedure, false otherwise. */ @@ -403,7 +365,8 @@ static inline long change_pmd_range(struct mmu_gather *tlb, pmd = pmd_offset(pud, addr); do { long ret; - + pmd_t _pmd; +again: next = pmd_addr_end(addr, end); ret = change_pmd_prepare(vma, pmd, cp_flags); @@ -411,16 +374,8 @@ static inline long change_pmd_range(struct mmu_gather *tlb, pages = ret; break; } - /* - * Automatic NUMA balancing walks the tables with mmap_lock - * held for read. It's possible a parallel update to occur - * between pmd_trans_huge() and a pmd_none_or_clear_bad() - * check leading to a false positive and clearing. - * Hence, it's necessary to atomically read the PMD value - * for all the checks. - */ - if (!is_swap_pmd(*pmd) && !pmd_devmap(*pmd) && - pmd_none_or_clear_bad_unless_trans_huge(pmd)) + + if (pmd_none(*pmd)) goto next; /* invoke the mmu notifier if the pmd is populated */ @@ -431,7 +386,8 @@ static inline long change_pmd_range(struct mmu_gather *tlb, mmu_notifier_invalidate_range_start(&range); } - if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { + _pmd = pmdp_get_lockless(pmd); + if (is_swap_pmd(_pmd) || pmd_trans_huge(_pmd) || pmd_devmap(_pmd)) { if ((next - addr != HPAGE_PMD_SIZE) || pgtable_split_needed(vma, cp_flags)) { __split_huge_pmd(vma, pmd, addr, false, NULL); @@ -446,15 +402,10 @@ static inline long change_pmd_range(struct mmu_gather *tlb, break; } } else { - /* - * change_huge_pmd() does not defer TLB flushes, - * so no need to propagate the tlb argument. - */ - int nr_ptes = change_huge_pmd(tlb, vma, pmd, + ret = change_huge_pmd(tlb, vma, pmd, addr, newprot, cp_flags); - - if (nr_ptes) { - if (nr_ptes == HPAGE_PMD_NR) { + if (ret) { + if (ret == HPAGE_PMD_NR) { pages += HPAGE_PMD_NR; nr_huge_updates++; } @@ -465,8 +416,12 @@ static inline long change_pmd_range(struct mmu_gather *tlb, } /* fall through, the trans huge pmd just split */ } - pages += change_pte_range(tlb, vma, pmd, addr, next, - newprot, cp_flags); + + ret = change_pte_range(tlb, vma, pmd, addr, next, newprot, + cp_flags); + if (ret < 0) + goto again; + pages += ret; next: cond_resched(); } while (pmd++, addr = next, addr != end); @@ -589,7 +544,8 @@ long change_protection(struct mmu_gather *tlb, static int prot_none_pte_entry(pte_t *pte, unsigned long addr, unsigned long next, struct mm_walk *walk) { - return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ? + return pfn_modify_allowed(pte_pfn(ptep_get(pte)), + *(pgprot_t *)(walk->private)) ? 0 : -EACCES; } @@ -597,7 +553,8 @@ static int prot_none_hugetlb_entry(pte_t *pte, unsigned long hmask, unsigned long addr, unsigned long next, struct mm_walk *walk) { - return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ? + return pfn_modify_allowed(pte_pfn(ptep_get(pte)), + *(pgprot_t *)(walk->private)) ? 0 : -EACCES; } |