diff options
Diffstat (limited to 'mm/rmap.c')
-rw-r--r-- | mm/rmap.c | 169 |
1 files changed, 87 insertions, 82 deletions
diff --git a/mm/rmap.c b/mm/rmap.c index e8fc5ecb59b2..8616308610b9 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1269,33 +1269,42 @@ static void __page_check_anon_rmap(struct folio *folio, struct page *page, page); } +static void __folio_mod_stat(struct folio *folio, int nr, int nr_pmdmapped) +{ + int idx; + + if (nr) { + idx = folio_test_anon(folio) ? NR_ANON_MAPPED : NR_FILE_MAPPED; + __lruvec_stat_mod_folio(folio, idx, nr); + } + if (nr_pmdmapped) { + if (folio_test_anon(folio)) { + idx = NR_ANON_THPS; + __lruvec_stat_mod_folio(folio, idx, nr_pmdmapped); + } else { + /* NR_*_PMDMAPPED are not maintained per-memcg */ + idx = folio_test_swapbacked(folio) ? + NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED; + __mod_node_page_state(folio_pgdat(folio), idx, + nr_pmdmapped); + } + } +} + static __always_inline void __folio_add_anon_rmap(struct folio *folio, struct page *page, int nr_pages, struct vm_area_struct *vma, unsigned long address, rmap_t flags, enum rmap_level level) { int i, nr, nr_pmdmapped = 0; + VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); + nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped); - if (nr_pmdmapped) - __lruvec_stat_mod_folio(folio, NR_ANON_THPS, nr_pmdmapped); - if (nr) - __lruvec_stat_mod_folio(folio, NR_ANON_MAPPED, nr); - if (unlikely(!folio_test_anon(folio))) { - VM_WARN_ON_FOLIO(!folio_test_locked(folio), folio); - /* - * For a PTE-mapped large folio, we only know that the single - * PTE is exclusive. Further, __folio_set_anon() might not get - * folio->index right when not given the address of the head - * page. - */ - VM_WARN_ON_FOLIO(folio_test_large(folio) && - level != RMAP_LEVEL_PMD, folio); - __folio_set_anon(folio, vma, address, - !!(flags & RMAP_EXCLUSIVE)); - } else if (likely(!folio_test_ksm(folio))) { + if (likely(!folio_test_ksm(folio))) __page_check_anon_rmap(folio, page, vma, address); - } + + __folio_mod_stat(folio, nr, nr_pmdmapped); if (flags & RMAP_EXCLUSIVE) { switch (level) { @@ -1381,29 +1390,37 @@ void folio_add_anon_rmap_pmd(struct folio *folio, struct page *page, * @folio: The folio to add the mapping to. * @vma: the vm area in which the mapping is added * @address: the user virtual address mapped + * @flags: The rmap flags * * Like folio_add_anon_rmap_*() but must only be called on *new* folios. * This means the inc-and-test can be bypassed. - * The folio does not have to be locked. + * The folio doesn't necessarily need to be locked while it's exclusive + * unless two threads map it concurrently. However, the folio must be + * locked if it's shared. * - * If the folio is pmd-mappable, it is accounted as a THP. As the folio - * is new, it's assumed to be mapped exclusively by a single process. + * If the folio is pmd-mappable, it is accounted as a THP. */ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma, - unsigned long address) + unsigned long address, rmap_t flags) { - int nr = folio_nr_pages(folio); + const int nr = folio_nr_pages(folio); + const bool exclusive = flags & RMAP_EXCLUSIVE; + int nr_pmdmapped = 0; VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); + VM_WARN_ON_FOLIO(!exclusive && !folio_test_locked(folio), folio); VM_BUG_ON_VMA(address < vma->vm_start || address + (nr << PAGE_SHIFT) > vma->vm_end, vma); - __folio_set_swapbacked(folio); - __folio_set_anon(folio, vma, address, true); + + if (!folio_test_swapbacked(folio)) + __folio_set_swapbacked(folio); + __folio_set_anon(folio, vma, address, exclusive); if (likely(!folio_test_large(folio))) { /* increment count (starts at -1) */ atomic_set(&folio->_mapcount, 0); - SetPageAnonExclusive(&folio->page); + if (exclusive) + SetPageAnonExclusive(&folio->page); } else if (!folio_test_pmd_mappable(folio)) { int i; @@ -1412,7 +1429,8 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma, /* increment count (starts at -1) */ atomic_set(&page->_mapcount, 0); - SetPageAnonExclusive(page); + if (exclusive) + SetPageAnonExclusive(page); } /* increment count (starts at -1) */ @@ -1424,28 +1442,24 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma, /* increment count (starts at -1) */ atomic_set(&folio->_large_mapcount, 0); atomic_set(&folio->_nr_pages_mapped, ENTIRELY_MAPPED); - SetPageAnonExclusive(&folio->page); - __lruvec_stat_mod_folio(folio, NR_ANON_THPS, nr); + if (exclusive) + SetPageAnonExclusive(&folio->page); + nr_pmdmapped = nr; } - __lruvec_stat_mod_folio(folio, NR_ANON_MAPPED, nr); + __folio_mod_stat(folio, nr, nr_pmdmapped); } static __always_inline void __folio_add_file_rmap(struct folio *folio, struct page *page, int nr_pages, struct vm_area_struct *vma, enum rmap_level level) { - pg_data_t *pgdat = folio_pgdat(folio); int nr, nr_pmdmapped = 0; VM_WARN_ON_FOLIO(folio_test_anon(folio), folio); nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped); - if (nr_pmdmapped) - __mod_node_page_state(pgdat, folio_test_swapbacked(folio) ? - NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, nr_pmdmapped); - if (nr) - __lruvec_stat_mod_folio(folio, NR_FILE_MAPPED, nr); + __folio_mod_stat(folio, nr, nr_pmdmapped); /* See comments in folio_add_anon_rmap_*() */ if (!folio_test_large(folio)) @@ -1494,10 +1508,8 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, enum rmap_level level) { atomic_t *mapped = &folio->_nr_pages_mapped; - pg_data_t *pgdat = folio_pgdat(folio); int last, nr = 0, nr_pmdmapped = 0; bool partially_mapped = false; - enum node_stat_item idx; __folio_rmap_sanity_checks(folio, page, nr_pages, level); @@ -1541,20 +1553,7 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, break; } - if (nr_pmdmapped) { - /* NR_{FILE/SHMEM}_PMDMAPPED are not maintained per-memcg */ - if (folio_test_anon(folio)) - __lruvec_stat_mod_folio(folio, NR_ANON_THPS, -nr_pmdmapped); - else - __mod_node_page_state(pgdat, - folio_test_swapbacked(folio) ? - NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, - -nr_pmdmapped); - } if (nr) { - idx = folio_test_anon(folio) ? NR_ANON_MAPPED : NR_FILE_MAPPED; - __lruvec_stat_mod_folio(folio, idx, -nr); - /* * Queue anon large folio for deferred split if at least one * page of the folio is unmapped and at least one page @@ -1566,6 +1565,7 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, list_empty(&folio->_deferred_list)) deferred_split_folio(folio); } + __folio_mod_stat(folio, -nr, -nr_pmdmapped); /* * It would be tidy to reset folio_test_anon mapping when fully @@ -1640,9 +1640,6 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, if (flags & TTU_SYNC) pvmw.flags = PVMW_SYNC; - if (flags & TTU_SPLIT_HUGE_PMD) - split_huge_pmd_address(vma, address, false, folio); - /* * For THP, we have to assume the worse case ie pmd for invalidation. * For hugetlb, it could be much worse if we need to do pud @@ -1668,9 +1665,6 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, mmu_notifier_invalidate_range_start(&range); while (page_vma_mapped_walk(&pvmw)) { - /* Unexpected PMD-mapped THP? */ - VM_BUG_ON_FOLIO(!pvmw.pte, folio); - /* * If the folio is in an mlock()d vma, we must not swap it out. */ @@ -1679,11 +1673,30 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, /* Restore the mlock which got missed */ if (!folio_test_large(folio)) mlock_vma_folio(folio, vma); - page_vma_mapped_walk_done(&pvmw); - ret = false; - break; + goto walk_abort; + } + + if (!pvmw.pte) { + if (unmap_huge_pmd_locked(vma, pvmw.address, pvmw.pmd, + folio)) + goto walk_done; + + if (flags & TTU_SPLIT_HUGE_PMD) { + /* + * We temporarily have to drop the PTL and + * restart so we can process the PTE-mapped THP. + */ + split_huge_pmd_locked(vma, pvmw.address, + pvmw.pmd, false, folio); + flags &= ~TTU_SPLIT_HUGE_PMD; + page_vma_mapped_walk_restart(&pvmw); + continue; + } } + /* Unexpected PMD-mapped THP? */ + VM_BUG_ON_FOLIO(!pvmw.pte, folio); + pfn = pte_pfn(ptep_get(pvmw.pte)); subpage = folio_page(folio, pfn - folio_pfn(folio)); address = pvmw.address; @@ -1719,11 +1732,8 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, */ if (!anon) { VM_BUG_ON(!(flags & TTU_RMAP_LOCKED)); - if (!hugetlb_vma_trylock_write(vma)) { - page_vma_mapped_walk_done(&pvmw); - ret = false; - break; - } + if (!hugetlb_vma_trylock_write(vma)) + goto walk_abort; if (huge_pmd_unshare(mm, vma, address, pvmw.pte)) { hugetlb_vma_unlock_write(vma); flush_tlb_range(vma, @@ -1738,8 +1748,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, * actual page and drop map count * to zero. */ - page_vma_mapped_walk_done(&pvmw); - break; + goto walk_done; } hugetlb_vma_unlock_write(vma); } @@ -1811,9 +1820,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, if (unlikely(folio_test_swapbacked(folio) != folio_test_swapcache(folio))) { WARN_ON_ONCE(1); - ret = false; - page_vma_mapped_walk_done(&pvmw); - break; + goto walk_abort; } /* MADV_FREE page check */ @@ -1852,23 +1859,17 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, */ set_pte_at(mm, address, pvmw.pte, pteval); folio_set_swapbacked(folio); - ret = false; - page_vma_mapped_walk_done(&pvmw); - break; + goto walk_abort; } if (swap_duplicate(entry) < 0) { set_pte_at(mm, address, pvmw.pte, pteval); - ret = false; - page_vma_mapped_walk_done(&pvmw); - break; + goto walk_abort; } if (arch_unmap_one(mm, vma, address, pteval) < 0) { swap_free(entry); set_pte_at(mm, address, pvmw.pte, pteval); - ret = false; - page_vma_mapped_walk_done(&pvmw); - break; + goto walk_abort; } /* See folio_try_share_anon_rmap(): clear PTE first. */ @@ -1876,9 +1877,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, folio_try_share_anon_rmap_pte(folio, subpage)) { swap_free(entry); set_pte_at(mm, address, pvmw.pte, pteval); - ret = false; - page_vma_mapped_walk_done(&pvmw); - break; + goto walk_abort; } if (list_empty(&mm->mmlist)) { spin_lock(&mmlist_lock); @@ -1918,6 +1917,12 @@ discard: if (vma->vm_flags & VM_LOCKED) mlock_drain_local(); folio_put(folio); + continue; +walk_abort: + ret = false; +walk_done: + page_vma_mapped_walk_done(&pvmw); + break; } mmu_notifier_invalidate_range_end(&range); |