summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/mm/transhuge.rst12
-rw-r--r--include/linux/mm.h44
-rw-r--r--include/linux/mm_types.h5
-rw-r--r--include/linux/rmap.h10
-rw-r--r--mm/debug.c3
-rw-r--r--mm/hugetlb.c4
-rw-r--r--mm/internal.h3
-rw-r--r--mm/khugepaged.c2
-rw-r--r--mm/page_alloc.c4
-rw-r--r--mm/rmap.c34
10 files changed, 62 insertions, 59 deletions
diff --git a/Documentation/mm/transhuge.rst b/Documentation/mm/transhuge.rst
index 93c9239b9ebe..1ba0ad63246c 100644
--- a/Documentation/mm/transhuge.rst
+++ b/Documentation/mm/transhuge.rst
@@ -116,14 +116,14 @@ pages:
succeeds on tail pages.
- map/unmap of a PMD entry for the whole THP increment/decrement
- folio->_entire_mapcount and also increment/decrement
- folio->_nr_pages_mapped by ENTIRELY_MAPPED when _entire_mapcount
- goes from -1 to 0 or 0 to -1.
+ folio->_entire_mapcount, increment/decrement folio->_large_mapcount
+ and also increment/decrement folio->_nr_pages_mapped by ENTIRELY_MAPPED
+ when _entire_mapcount goes from -1 to 0 or 0 to -1.
- map/unmap of individual pages with PTE entry increment/decrement
- page->_mapcount and also increment/decrement folio->_nr_pages_mapped
- when page->_mapcount goes from -1 to 0 or 0 to -1 as this counts
- the number of pages mapped by PTE.
+ page->_mapcount, increment/decrement folio->_large_mapcount and also
+ increment/decrement folio->_nr_pages_mapped when page->_mapcount goes
+ from -1 to 0 or 0 to -1 as this counts the number of pages mapped by PTE.
split_huge_page internally has to distribute the refcounts in the head
page to the tail pages before clearing all PG_head/tail bits from the page
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8fa4d78bcc09..059477821625 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1240,16 +1240,26 @@ static inline int page_mapcount(struct page *page)
return mapcount;
}
-int folio_total_mapcount(const struct folio *folio);
+static inline int folio_large_mapcount(const struct folio *folio)
+{
+ VM_WARN_ON_FOLIO(!folio_test_large(folio), folio);
+ return atomic_read(&folio->_large_mapcount) + 1;
+}
/**
- * folio_mapcount() - Calculate the number of mappings of this folio.
+ * folio_mapcount() - Number of mappings of this folio.
* @folio: The folio.
*
- * A large folio tracks both how many times the entire folio is mapped,
- * and how many times each individual page in the folio is mapped.
- * This function calculates the total number of times the folio is
- * mapped.
+ * The folio mapcount corresponds to the number of present user page table
+ * entries that reference any part of a folio. Each such present user page
+ * table entry must be paired with exactly on folio reference.
+ *
+ * For ordindary folios, each user page table entry (PTE/PMD/PUD/...) counts
+ * exactly once.
+ *
+ * For hugetlb folios, each abstracted "hugetlb" user page table entry that
+ * references the entire folio counts exactly once, even when such special
+ * page table entries are comprised of multiple ordinary page table entries.
*
* Return: The number of times this folio is mapped.
*/
@@ -1257,17 +1267,7 @@ static inline int folio_mapcount(const struct folio *folio)
{
if (likely(!folio_test_large(folio)))
return atomic_read(&folio->_mapcount) + 1;
- return folio_total_mapcount(folio);
-}
-
-static inline bool folio_large_is_mapped(const struct folio *folio)
-{
- /*
- * Reading _entire_mapcount below could be omitted if hugetlb
- * participated in incrementing nr_pages_mapped when compound mapped.
- */
- return atomic_read(&folio->_nr_pages_mapped) > 0 ||
- atomic_read(&folio->_entire_mapcount) >= 0;
+ return folio_large_mapcount(folio);
}
/**
@@ -1276,11 +1276,9 @@ static inline bool folio_large_is_mapped(const struct folio *folio)
*
* Return: True if any page in this folio is referenced by user page tables.
*/
-static inline bool folio_mapped(struct folio *folio)
+static inline bool folio_mapped(const struct folio *folio)
{
- if (likely(!folio_test_large(folio)))
- return atomic_read(&folio->_mapcount) >= 0;
- return folio_large_is_mapped(folio);
+ return folio_mapcount(folio) >= 1;
}
/*
@@ -1290,9 +1288,7 @@ static inline bool folio_mapped(struct folio *folio)
*/
static inline bool page_mapped(const struct page *page)
{
- if (likely(!PageCompound(page)))
- return atomic_read(&page->_mapcount) >= 0;
- return folio_large_is_mapped(page_folio(page));
+ return folio_mapped(page_folio(page));
}
static inline struct page *virt_to_head_page(const void *x)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index fa0d6995706f..db0adf5721cc 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -289,7 +289,8 @@ typedef struct {
* @virtual: Virtual address in the kernel direct map.
* @_last_cpupid: IDs of last CPU and last process that accessed the folio.
* @_entire_mapcount: Do not use directly, call folio_entire_mapcount().
- * @_nr_pages_mapped: Do not use directly, call folio_mapcount().
+ * @_large_mapcount: Do not use directly, call folio_mapcount().
+ * @_nr_pages_mapped: Do not use outside of rmap and debug code.
* @_pincount: Do not use directly, call folio_maybe_dma_pinned().
* @_folio_nr_pages: Do not use directly, call folio_nr_pages().
* @_hugetlb_subpool: Do not use directly, use accessor in hugetlb.h.
@@ -348,8 +349,8 @@ struct folio {
struct {
unsigned long _flags_1;
unsigned long _head_1;
- unsigned long _folio_avail;
/* public: */
+ atomic_t _large_mapcount;
atomic_t _entire_mapcount;
atomic_t _nr_pages_mapped;
atomic_t _pincount;
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 327f1ca5a487..0f906dc6d280 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -273,6 +273,7 @@ static inline int hugetlb_try_dup_anon_rmap(struct folio *folio,
ClearPageAnonExclusive(&folio->page);
}
atomic_inc(&folio->_entire_mapcount);
+ atomic_inc(&folio->_large_mapcount);
return 0;
}
@@ -306,6 +307,7 @@ static inline void hugetlb_add_file_rmap(struct folio *folio)
VM_WARN_ON_FOLIO(folio_test_anon(folio), folio);
atomic_inc(&folio->_entire_mapcount);
+ atomic_inc(&folio->_large_mapcount);
}
static inline void hugetlb_remove_rmap(struct folio *folio)
@@ -313,11 +315,14 @@ static inline void hugetlb_remove_rmap(struct folio *folio)
VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio);
atomic_dec(&folio->_entire_mapcount);
+ atomic_dec(&folio->_large_mapcount);
}
static __always_inline void __folio_dup_file_rmap(struct folio *folio,
struct page *page, int nr_pages, enum rmap_level level)
{
+ const int orig_nr_pages = nr_pages;
+
__folio_rmap_sanity_checks(folio, page, nr_pages, level);
switch (level) {
@@ -330,9 +335,11 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio,
do {
atomic_inc(&page->_mapcount);
} while (page++, --nr_pages > 0);
+ atomic_add(orig_nr_pages, &folio->_large_mapcount);
break;
case RMAP_LEVEL_PMD:
atomic_inc(&folio->_entire_mapcount);
+ atomic_inc(&folio->_large_mapcount);
break;
}
}
@@ -382,6 +389,7 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio,
struct page *page, int nr_pages, struct vm_area_struct *src_vma,
enum rmap_level level)
{
+ const int orig_nr_pages = nr_pages;
bool maybe_pinned;
int i;
@@ -423,6 +431,7 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio,
ClearPageAnonExclusive(page);
atomic_inc(&page->_mapcount);
} while (page++, --nr_pages > 0);
+ atomic_add(orig_nr_pages, &folio->_large_mapcount);
break;
case RMAP_LEVEL_PMD:
if (PageAnonExclusive(page)) {
@@ -431,6 +440,7 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio,
ClearPageAnonExclusive(page);
}
atomic_inc(&folio->_entire_mapcount);
+ atomic_inc(&folio->_large_mapcount);
break;
}
return 0;
diff --git a/mm/debug.c b/mm/debug.c
index b71186f1fb0b..d064db42af54 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -68,8 +68,9 @@ static void __dump_folio(struct folio *folio, struct page *page,
folio_ref_count(folio), mapcount, mapping,
folio->index + idx, pfn);
if (folio_test_large(folio)) {
- pr_warn("head: order:%u entire_mapcount:%d nr_pages_mapped:%d pincount:%d\n",
+ pr_warn("head: order:%u mapcount:%d entire_mapcount:%d nr_pages_mapped:%d pincount:%d\n",
folio_order(folio),
+ folio_mapcount(folio),
folio_entire_mapcount(folio),
folio_nr_pages_mapped(folio),
atomic_read(&folio->_pincount));
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 5dc3f5ea3a2e..d74289d3f30d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1517,7 +1517,7 @@ static void __destroy_compound_gigantic_folio(struct folio *folio,
struct page *p;
atomic_set(&folio->_entire_mapcount, 0);
- atomic_set(&folio->_nr_pages_mapped, 0);
+ atomic_set(&folio->_large_mapcount, 0);
atomic_set(&folio->_pincount, 0);
for (i = 1; i < nr_pages; i++) {
@@ -2120,7 +2120,7 @@ static bool __prep_compound_gigantic_folio(struct folio *folio,
/* we rely on prep_new_hugetlb_folio to set the hugetlb flag */
folio_set_order(folio, order);
atomic_set(&folio->_entire_mapcount, -1);
- atomic_set(&folio->_nr_pages_mapped, 0);
+ atomic_set(&folio->_large_mapcount, -1);
atomic_set(&folio->_pincount, 0);
return true;
diff --git a/mm/internal.h b/mm/internal.h
index 22152e0c8494..2adc3f616b71 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -72,6 +72,8 @@ void page_writeback_init(void);
/*
* How many individual pages have an elevated _mapcount. Excludes
* the folio's entire_mapcount.
+ *
+ * Don't use this function outside of debugging code.
*/
static inline int folio_nr_pages_mapped(const struct folio *folio)
{
@@ -611,6 +613,7 @@ static inline void prep_compound_head(struct page *page, unsigned int order)
struct folio *folio = (struct folio *)page;
folio_set_order(folio, order);
+ atomic_set(&folio->_large_mapcount, -1);
atomic_set(&folio->_entire_mapcount, -1);
atomic_set(&folio->_nr_pages_mapped, 0);
atomic_set(&folio->_pincount, 0);
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 89e2624fb3ff..2f73d2aa9ae8 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1358,7 +1358,7 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
* Check if the page has any GUP (or other external) pins.
*
* Here the check may be racy:
- * it may see total_mapcount > refcount in some cases?
+ * it may see folio_mapcount() > folio_ref_count().
* But such case is ephemeral we could always retry collapse
* later. However it may report false positive if the page
* has excessive GUP pins (i.e. 512). Anyway the same check
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 22e8b9f1d710..dd4265c760ff 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -935,6 +935,10 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page)
bad_page(page, "nonzero entire_mapcount");
goto out;
}
+ if (unlikely(folio_large_mapcount(folio))) {
+ bad_page(page, "nonzero large_mapcount");
+ goto out;
+ }
if (unlikely(atomic_read(&folio->_nr_pages_mapped))) {
bad_page(page, "nonzero nr_pages_mapped");
goto out;
diff --git a/mm/rmap.c b/mm/rmap.c
index 4bde6d60db6c..2608c40dffad 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1138,34 +1138,12 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
return page_vma_mkclean_one(&pvmw);
}
-int folio_total_mapcount(const struct folio *folio)
-{
- int mapcount = folio_entire_mapcount(folio);
- int nr_pages;
- int i;
-
- /* In the common case, avoid the loop when no pages mapped by PTE */
- if (folio_nr_pages_mapped(folio) == 0)
- return mapcount;
- /*
- * Add all the PTE mappings of those pages mapped by PTE.
- * Limit the loop to folio_nr_pages_mapped()?
- * Perhaps: given all the raciness, that may be a good or a bad idea.
- */
- nr_pages = folio_nr_pages(folio);
- for (i = 0; i < nr_pages; i++)
- mapcount += atomic_read(&folio_page(folio, i)->_mapcount);
-
- /* But each of those _mapcounts was based on -1 */
- mapcount += nr_pages;
- return mapcount;
-}
-
static __always_inline unsigned int __folio_add_rmap(struct folio *folio,
struct page *page, int nr_pages, enum rmap_level level,
int *nr_pmdmapped)
{
atomic_t *mapped = &folio->_nr_pages_mapped;
+ const int orig_nr_pages = nr_pages;
int first, nr = 0;
__folio_rmap_sanity_checks(folio, page, nr_pages, level);
@@ -1185,6 +1163,7 @@ static __always_inline unsigned int __folio_add_rmap(struct folio *folio,
nr++;
}
} while (page++, --nr_pages > 0);
+ atomic_add(orig_nr_pages, &folio->_large_mapcount);
break;
case RMAP_LEVEL_PMD:
first = atomic_inc_and_test(&folio->_entire_mapcount);
@@ -1201,6 +1180,7 @@ static __always_inline unsigned int __folio_add_rmap(struct folio *folio,
nr = 0;
}
}
+ atomic_inc(&folio->_large_mapcount);
break;
}
return nr;
@@ -1436,10 +1416,14 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
SetPageAnonExclusive(page);
}
+ /* increment count (starts at -1) */
+ atomic_set(&folio->_large_mapcount, nr - 1);
atomic_set(&folio->_nr_pages_mapped, nr);
} else {
/* increment count (starts at -1) */
atomic_set(&folio->_entire_mapcount, 0);
+ /* increment count (starts at -1) */
+ atomic_set(&folio->_large_mapcount, 0);
atomic_set(&folio->_nr_pages_mapped, ENTIRELY_MAPPED);
SetPageAnonExclusive(&folio->page);
__lruvec_stat_mod_folio(folio, NR_ANON_THPS, nr);
@@ -1522,6 +1506,7 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
break;
}
+ atomic_sub(nr_pages, &folio->_large_mapcount);
do {
last = atomic_add_negative(-1, &page->_mapcount);
if (last) {
@@ -1532,6 +1517,7 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
} while (page++, --nr_pages > 0);
break;
case RMAP_LEVEL_PMD:
+ atomic_dec(&folio->_large_mapcount);
last = atomic_add_negative(-1, &folio->_entire_mapcount);
if (last) {
nr = atomic_sub_return_relaxed(ENTIRELY_MAPPED, mapped);
@@ -2714,6 +2700,7 @@ void hugetlb_add_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);
atomic_inc(&folio->_entire_mapcount);
+ atomic_inc(&folio->_large_mapcount);
if (flags & RMAP_EXCLUSIVE)
SetPageAnonExclusive(&folio->page);
VM_WARN_ON_FOLIO(folio_entire_mapcount(folio) > 1 &&
@@ -2728,6 +2715,7 @@ void hugetlb_add_new_anon_rmap(struct folio *folio,
BUG_ON(address < vma->vm_start || address >= vma->vm_end);
/* increment count (starts at -1) */
atomic_set(&folio->_entire_mapcount, 0);
+ atomic_set(&folio->_large_mapcount, 0);
folio_clear_hugetlb_restore_reserve(folio);
__folio_set_anon(folio, vma, address, true);
SetPageAnonExclusive(&folio->page);