summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/debug.c5
-rw-r--r--mm/folio-compat.c6
-rw-r--r--mm/huge_memory.c36
-rw-r--r--mm/hugetlb.c2
-rw-r--r--mm/khugepaged.c11
-rw-r--r--mm/page_alloc.c27
-rw-r--r--mm/rmap.c142
-rw-r--r--mm/util.c79
8 files changed, 112 insertions, 196 deletions
diff --git a/mm/debug.c b/mm/debug.c
index 0fd15ba70d16..7f8e5f744e42 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -94,9 +94,10 @@ static void __dump_page(struct page *page)
page, page_ref_count(head), mapcount, mapping,
page_to_pgoff(page), page_to_pfn(page));
if (compound) {
- pr_warn("head:%p order:%u compound_mapcount:%d compound_pincount:%d\n",
+ pr_warn("head:%p order:%u compound_mapcount:%d subpages_mapcount:%d compound_pincount:%d\n",
head, compound_order(head),
- folio_entire_mapcount(folio),
+ head_compound_mapcount(head),
+ head_subpages_mapcount(head),
head_compound_pincount(head));
}
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index bac2a366aada..cbfe51091c39 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -39,12 +39,6 @@ void wait_for_stable_page(struct page *page)
}
EXPORT_SYMBOL_GPL(wait_for_stable_page);
-bool page_mapped(struct page *page)
-{
- return folio_mapped(page_folio(page));
-}
-EXPORT_SYMBOL(page_mapped);
-
void mark_page_accessed(struct page *page)
{
folio_mark_accessed(page_folio(page));
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index b26998d1845f..7703169107c6 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2142,6 +2142,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
VM_BUG_ON_PAGE(!page_count(page), page);
page_ref_add(page, HPAGE_PMD_NR - 1);
+ atomic_add(HPAGE_PMD_NR, subpages_mapcount_ptr(page));
/*
* Without "freeze", we'll simply split the PMD, propagating the
@@ -2225,33 +2226,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
pte_unmap(pte);
}
- if (!pmd_migration) {
- /*
- * Set PG_double_map before dropping compound_mapcount to avoid
- * false-negative page_mapped().
- */
- if (compound_mapcount(page) > 1 &&
- !TestSetPageDoubleMap(page)) {
- for (i = 0; i < HPAGE_PMD_NR; i++)
- atomic_inc(&page[i]._mapcount);
- }
-
- lock_page_memcg(page);
- if (atomic_add_negative(-1, compound_mapcount_ptr(page))) {
- /* Last compound_mapcount is gone. */
- __mod_lruvec_page_state(page, NR_ANON_THPS,
- -HPAGE_PMD_NR);
- if (TestClearPageDoubleMap(page)) {
- /* No need in mapcount reference anymore */
- for (i = 0; i < HPAGE_PMD_NR; i++)
- atomic_dec(&page[i]._mapcount);
- }
- }
- unlock_page_memcg(page);
-
- /* Above is effectively page_remove_rmap(page, vma, true) */
- munlock_vma_page(page, vma, true);
- }
+ if (!pmd_migration)
+ page_remove_rmap(page, vma, true);
smp_wmb(); /* make pte visible before pmd */
pmd_populate(mm, pmd, pgtable);
@@ -2453,7 +2429,7 @@ static void __split_huge_page_tail(struct page *head, int tail,
(1L << PG_dirty) |
LRU_GEN_MASK | LRU_REFS_MASK));
- /* ->mapping in first tail page is compound_mapcount */
+ /* ->mapping in first and second tail page is replaced by other uses */
VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
page_tail);
page_tail->mapping = head->mapping;
@@ -2463,6 +2439,10 @@ static void __split_huge_page_tail(struct page *head, int tail,
* page->private should not be set in tail pages with the exception
* of swap cache pages that store the swp_entry_t in tail pages.
* Fix up and warn once if private is unexpectedly set.
+ *
+ * What of 32-bit systems, on which head[1].compound_pincount overlays
+ * head[1].private? No problem: THP_SWAP is not enabled on 32-bit, and
+ * compound_pincount must be 0 for folio_ref_freeze() to have succeeded.
*/
if (!folio_test_swapcache(page_folio(head))) {
VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 76ebefe02827..4f1338d82aab 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1333,6 +1333,7 @@ static void __destroy_compound_gigantic_page(struct page *page,
struct page *p;
atomic_set(compound_mapcount_ptr(page), 0);
+ atomic_set(subpages_mapcount_ptr(page), 0);
atomic_set(compound_pincount_ptr(page), 0);
for (i = 1; i < nr_pages; i++) {
@@ -1852,6 +1853,7 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order,
set_compound_head(p, page);
}
atomic_set(compound_mapcount_ptr(page), -1);
+ atomic_set(subpages_mapcount_ptr(page), 0);
atomic_set(compound_pincount_ptr(page), 0);
return true;
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 9c111273bbf9..0d8f548d9d7e 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1238,15 +1238,8 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
/*
* Check if the page has any GUP (or other external) pins.
*
- * Here the check is racy it may see total_mapcount > refcount
- * in some cases.
- * For example, one process with one forked child process.
- * The parent has the PMD split due to MADV_DONTNEED, then
- * the child is trying unmap the whole PMD, but khugepaged
- * may be scanning the parent between the child has
- * PageDoubleMap flag cleared and dec the mapcount. So
- * khugepaged may see total_mapcount > refcount.
- *
+ * Here the check may be racy:
+ * it may see total_mapcount > refcount in some cases?
* But such case is ephemeral we could always retry collapse
* later. However it may report false positive if the page
* has excessive GUP pins (i.e. 512). Anyway the same check
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6e60657875d3..0705917ddf54 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -798,6 +798,7 @@ static void prep_compound_head(struct page *page, unsigned int order)
set_compound_page_dtor(page, COMPOUND_PAGE_DTOR);
set_compound_order(page, order);
atomic_set(compound_mapcount_ptr(page), -1);
+ atomic_set(subpages_mapcount_ptr(page), 0);
atomic_set(compound_pincount_ptr(page), 0);
}
@@ -1324,11 +1325,19 @@ static int free_tail_pages_check(struct page *head_page, struct page *page)
}
switch (page - head_page) {
case 1:
- /* the first tail page: ->mapping may be compound_mapcount() */
- if (unlikely(compound_mapcount(page))) {
+ /* the first tail page: these may be in place of ->mapping */
+ if (unlikely(head_compound_mapcount(head_page))) {
bad_page(page, "nonzero compound_mapcount");
goto out;
}
+ if (unlikely(head_subpages_mapcount(head_page))) {
+ bad_page(page, "nonzero subpages_mapcount");
+ goto out;
+ }
+ if (unlikely(head_compound_pincount(head_page))) {
+ bad_page(page, "nonzero compound_pincount");
+ goto out;
+ }
break;
case 2:
/*
@@ -1431,10 +1440,8 @@ static __always_inline bool free_pages_prepare(struct page *page,
VM_BUG_ON_PAGE(compound && compound_order(page) != order, page);
- if (compound) {
- ClearPageDoubleMap(page);
+ if (compound)
ClearPageHasHWPoisoned(page);
- }
for (i = 1; i < (1 << order); i++) {
if (compound)
bad += free_tail_pages_check(page, page + i);
@@ -6874,13 +6881,11 @@ static void __ref memmap_init_compound(struct page *head,
set_page_count(page, 0);
/*
- * The first tail page stores compound_mapcount_ptr() and
- * compound_order() and the second tail page stores
- * compound_pincount_ptr(). Call prep_compound_head() after
- * the first and second tail pages have been initialized to
- * not have the data overwritten.
+ * The first tail page stores important compound page info.
+ * Call prep_compound_head() after the first tail page has
+ * been initialized, to not have the data overwritten.
*/
- if (pfn == head_pfn + 2)
+ if (pfn == head_pfn + 1)
prep_compound_head(head, order);
}
}
diff --git a/mm/rmap.c b/mm/rmap.c
index 3b2d18bbdc44..f43339ea4970 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1085,6 +1085,24 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
return page_vma_mkclean_one(&pvmw);
}
+/*
+ * When mapping a THP's first pmd, or unmapping its last pmd, if that THP
+ * also has pte mappings, then those must be discounted: in order to maintain
+ * NR_ANON_MAPPED and NR_FILE_MAPPED statistics exactly, without any drift,
+ * and to decide when an anon THP should be put on the deferred split queue.
+ */
+static int nr_subpages_unmapped(struct page *head, int nr_subpages)
+{
+ int nr = nr_subpages;
+ int i;
+
+ /* Discount those subpages mapped by pte */
+ for (i = 0; i < nr_subpages; i++)
+ if (atomic_read(&head[i]._mapcount) >= 0)
+ nr--;
+ return nr;
+}
+
/**
* page_move_anon_rmap - move a page to our anon_vma
* @page: the page to move to our anon_vma
@@ -1194,6 +1212,7 @@ static void __page_check_anon_rmap(struct page *page,
void page_add_anon_rmap(struct page *page,
struct vm_area_struct *vma, unsigned long address, rmap_t flags)
{
+ int nr, nr_pages;
bool compound = flags & RMAP_COMPOUND;
bool first;
@@ -1202,28 +1221,32 @@ void page_add_anon_rmap(struct page *page,
else
VM_BUG_ON_PAGE(!PageLocked(page), page);
- if (compound) {
+ if (compound && PageTransHuge(page)) {
atomic_t *mapcount;
VM_BUG_ON_PAGE(!PageLocked(page), page);
- VM_BUG_ON_PAGE(!PageTransHuge(page), page);
mapcount = compound_mapcount_ptr(page);
first = atomic_inc_and_test(mapcount);
+
+ nr = nr_pages = thp_nr_pages(page);
+ if (first && head_subpages_mapcount(page))
+ nr = nr_subpages_unmapped(page, nr_pages);
} else {
+ nr = 1;
+ if (PageTransCompound(page)) {
+ struct page *head = compound_head(page);
+
+ atomic_inc(subpages_mapcount_ptr(head));
+ nr = !head_compound_mapcount(head);
+ }
first = atomic_inc_and_test(&page->_mapcount);
}
+
VM_BUG_ON_PAGE(!first && (flags & RMAP_EXCLUSIVE), page);
VM_BUG_ON_PAGE(!first && PageAnonExclusive(page), page);
if (first) {
- int nr = compound ? thp_nr_pages(page) : 1;
- /*
- * We use the irq-unsafe __{inc|mod}_zone_page_stat because
- * these counters are not modified in interrupt context, and
- * pte lock(a spinlock) is held, which implies preemption
- * disabled.
- */
if (compound)
- __mod_lruvec_page_state(page, NR_ANON_THPS, nr);
+ __mod_lruvec_page_state(page, NR_ANON_THPS, nr_pages);
__mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
}
@@ -1265,8 +1288,6 @@ void page_add_new_anon_rmap(struct page *page,
VM_BUG_ON_PAGE(!PageTransHuge(page), page);
/* increment count (starts at -1) */
atomic_set(compound_mapcount_ptr(page), 0);
- atomic_set(compound_pincount_ptr(page), 0);
-
__mod_lruvec_page_state(page, NR_ANON_THPS, nr);
} else {
/* increment count (starts at -1) */
@@ -1287,29 +1308,19 @@ void page_add_new_anon_rmap(struct page *page,
void page_add_file_rmap(struct page *page,
struct vm_area_struct *vma, bool compound)
{
- int i, nr = 0;
+ int nr = 0;
VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
lock_page_memcg(page);
if (compound && PageTransHuge(page)) {
- int nr_pages = thp_nr_pages(page);
+ int nr_pages;
- for (i = 0; i < nr_pages; i++) {
- if (atomic_inc_and_test(&page[i]._mapcount))
- nr++;
- }
if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
goto out;
- /*
- * It is racy to ClearPageDoubleMap in page_remove_file_rmap();
- * but page lock is held by all page_add_file_rmap() compound
- * callers, and SetPageDoubleMap below warns if !PageLocked:
- * so here is a place that DoubleMap can be safely cleared.
- */
- VM_WARN_ON_ONCE(!PageLocked(page));
- if (nr == nr_pages && PageDoubleMap(page))
- ClearPageDoubleMap(page);
+ nr = nr_pages = thp_nr_pages(page);
+ if (head_subpages_mapcount(page))
+ nr = nr_subpages_unmapped(page, nr_pages);
if (PageSwapBacked(page))
__mod_lruvec_page_state(page, NR_SHMEM_PMDMAPPED,
@@ -1318,11 +1329,15 @@ void page_add_file_rmap(struct page *page,
__mod_lruvec_page_state(page, NR_FILE_PMDMAPPED,
nr_pages);
} else {
- if (PageTransCompound(page) && page_mapping(page)) {
- VM_WARN_ON_ONCE(!PageLocked(page));
- SetPageDoubleMap(compound_head(page));
+ bool pmd_mapped = false;
+
+ if (PageTransCompound(page)) {
+ struct page *head = compound_head(page);
+
+ atomic_inc(subpages_mapcount_ptr(head));
+ pmd_mapped = head_compound_mapcount(head);
}
- if (atomic_inc_and_test(&page->_mapcount))
+ if (atomic_inc_and_test(&page->_mapcount) && !pmd_mapped)
nr++;
}
out:
@@ -1335,7 +1350,7 @@ out:
static void page_remove_file_rmap(struct page *page, bool compound)
{
- int i, nr = 0;
+ int nr = 0;
VM_BUG_ON_PAGE(compound && !PageHead(page), page);
@@ -1348,14 +1363,15 @@ static void page_remove_file_rmap(struct page *page, bool compound)
/* page still mapped by someone else? */
if (compound && PageTransHuge(page)) {
- int nr_pages = thp_nr_pages(page);
+ int nr_pages;
- for (i = 0; i < nr_pages; i++) {
- if (atomic_add_negative(-1, &page[i]._mapcount))
- nr++;
- }
if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
- goto out;
+ return;
+
+ nr = nr_pages = thp_nr_pages(page);
+ if (head_subpages_mapcount(page))
+ nr = nr_subpages_unmapped(page, nr_pages);
+
if (PageSwapBacked(page))
__mod_lruvec_page_state(page, NR_SHMEM_PMDMAPPED,
-nr_pages);
@@ -1363,17 +1379,25 @@ static void page_remove_file_rmap(struct page *page, bool compound)
__mod_lruvec_page_state(page, NR_FILE_PMDMAPPED,
-nr_pages);
} else {
- if (atomic_add_negative(-1, &page->_mapcount))
+ bool pmd_mapped = false;
+
+ if (PageTransCompound(page)) {
+ struct page *head = compound_head(page);
+
+ atomic_dec(subpages_mapcount_ptr(head));
+ pmd_mapped = head_compound_mapcount(head);
+ }
+ if (atomic_add_negative(-1, &page->_mapcount) && !pmd_mapped)
nr++;
}
-out:
+
if (nr)
__mod_lruvec_page_state(page, NR_FILE_MAPPED, -nr);
}
static void page_remove_anon_compound_rmap(struct page *page)
{
- int i, nr;
+ int nr, nr_pages;
if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
return;
@@ -1385,27 +1409,19 @@ static void page_remove_anon_compound_rmap(struct page *page)
if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
return;
- __mod_lruvec_page_state(page, NR_ANON_THPS, -thp_nr_pages(page));
+ nr = nr_pages = thp_nr_pages(page);
+ __mod_lruvec_page_state(page, NR_ANON_THPS, -nr);
- if (TestClearPageDoubleMap(page)) {
- /*
- * Subpages can be mapped with PTEs too. Check how many of
- * them are still mapped.
- */
- for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
- if (atomic_add_negative(-1, &page[i]._mapcount))
- nr++;
- }
+ if (head_subpages_mapcount(page)) {
+ nr = nr_subpages_unmapped(page, nr_pages);
/*
* Queue the page for deferred split if at least one small
* page of the compound page is unmapped, but at least one
* small page is still mapped.
*/
- if (nr && nr < thp_nr_pages(page))
+ if (nr && nr < nr_pages)
deferred_split_huge_page(page);
- } else {
- nr = thp_nr_pages(page);
}
if (nr)
@@ -1423,6 +1439,8 @@ static void page_remove_anon_compound_rmap(struct page *page)
void page_remove_rmap(struct page *page,
struct vm_area_struct *vma, bool compound)
{
+ bool pmd_mapped = false;
+
lock_page_memcg(page);
if (!PageAnon(page)) {
@@ -1435,15 +1453,17 @@ void page_remove_rmap(struct page *page,
goto out;
}
+ if (PageTransCompound(page)) {
+ struct page *head = compound_head(page);
+
+ atomic_dec(subpages_mapcount_ptr(head));
+ pmd_mapped = head_compound_mapcount(head);
+ }
+
/* page still mapped by someone else? */
- if (!atomic_add_negative(-1, &page->_mapcount))
+ if (!atomic_add_negative(-1, &page->_mapcount) || pmd_mapped)
goto out;
- /*
- * We use the irq-unsafe __{inc|mod}_zone_page_stat because
- * these counters are not modified in interrupt context, and
- * pte lock(a spinlock) is held, which implies preemption disabled.
- */
__dec_lruvec_page_state(page, NR_ANON_MAPPED);
if (PageTransCompound(page))
@@ -2569,8 +2589,8 @@ void hugepage_add_new_anon_rmap(struct page *page,
struct vm_area_struct *vma, unsigned long address)
{
BUG_ON(address < vma->vm_start || address >= vma->vm_end);
+ /* increment count (starts at -1) */
atomic_set(compound_mapcount_ptr(page), 0);
- atomic_set(compound_pincount_ptr(page), 0);
ClearHPageRestoreReserve(page);
__page_set_anon_rmap(page, vma, address, 1);
}
diff --git a/mm/util.c b/mm/util.c
index 12984e76767e..b56c92fb910f 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -717,32 +717,6 @@ void *page_rmapping(struct page *page)
return folio_raw_mapping(page_folio(page));
}
-/**
- * folio_mapped - Is this folio mapped into userspace?
- * @folio: The folio.
- *
- * Return: True if any page in this folio is referenced by user page tables.
- */
-bool folio_mapped(struct folio *folio)
-{
- long i, nr;
-
- if (!folio_test_large(folio))
- return atomic_read(&folio->_mapcount) >= 0;
- if (atomic_read(folio_mapcount_ptr(folio)) >= 0)
- return true;
- if (folio_test_hugetlb(folio))
- return false;
-
- nr = folio_nr_pages(folio);
- for (i = 0; i < nr; i++) {
- if (atomic_read(&folio_page(folio, i)->_mapcount) >= 0)
- return true;
- }
- return false;
-}
-EXPORT_SYMBOL(folio_mapped);
-
struct anon_vma *folio_anon_vma(struct folio *folio)
{
unsigned long mapping = (unsigned long)folio->mapping;
@@ -783,59 +757,6 @@ struct address_space *folio_mapping(struct folio *folio)
}
EXPORT_SYMBOL(folio_mapping);
-/* Slow path of page_mapcount() for compound pages */
-int __page_mapcount(struct page *page)
-{
- int ret;
-
- ret = atomic_read(&page->_mapcount) + 1;
- /*
- * For file THP page->_mapcount contains total number of mapping
- * of the page: no need to look into compound_mapcount.
- */
- if (!PageAnon(page) && !PageHuge(page))
- return ret;
- page = compound_head(page);
- ret += atomic_read(compound_mapcount_ptr(page)) + 1;
- if (PageDoubleMap(page))
- ret--;
- return ret;
-}
-EXPORT_SYMBOL_GPL(__page_mapcount);
-
-/**
- * folio_mapcount() - Calculate the number of mappings of this folio.
- * @folio: The folio.
- *
- * A large folio tracks both how many times the entire folio is mapped,
- * and how many times each individual page in the folio is mapped.
- * This function calculates the total number of times the folio is
- * mapped.
- *
- * Return: The number of times this folio is mapped.
- */
-int folio_mapcount(struct folio *folio)
-{
- int i, compound, nr, ret;
-
- if (likely(!folio_test_large(folio)))
- return atomic_read(&folio->_mapcount) + 1;
-
- compound = folio_entire_mapcount(folio);
- if (folio_test_hugetlb(folio))
- return compound;
- ret = compound;
- nr = folio_nr_pages(folio);
- for (i = 0; i < nr; i++)
- ret += atomic_read(&folio_page(folio, i)->_mapcount) + 1;
- /* File pages has compound_mapcount included in _mapcount */
- if (!folio_test_anon(folio))
- return ret - compound * nr;
- if (folio_test_double_map(folio))
- ret -= nr;
- return ret;
-}
-
/**
* folio_copy - Copy the contents of one folio to another.
* @dst: Folio to copy to.