diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Makefile | 2 | ||||
-rw-r--r-- | mm/compaction.c | 4 | ||||
-rw-r--r-- | mm/filemap.c | 567 | ||||
-rw-r--r-- | mm/folio-compat.c | 142 | ||||
-rw-r--r-- | mm/huge_memory.c | 7 | ||||
-rw-r--r-- | mm/hugetlb.c | 2 | ||||
-rw-r--r-- | mm/internal.h | 36 | ||||
-rw-r--r-- | mm/khugepaged.c | 8 | ||||
-rw-r--r-- | mm/ksm.c | 34 | ||||
-rw-r--r-- | mm/memcontrol.c | 356 | ||||
-rw-r--r-- | mm/memory-failure.c | 2 | ||||
-rw-r--r-- | mm/memory.c | 20 | ||||
-rw-r--r-- | mm/mempolicy.c | 10 | ||||
-rw-r--r-- | mm/memremap.c | 2 | ||||
-rw-r--r-- | mm/migrate.c | 189 | ||||
-rw-r--r-- | mm/mlock.c | 3 | ||||
-rw-r--r-- | mm/page-writeback.c | 476 | ||||
-rw-r--r-- | mm/page_alloc.c | 14 | ||||
-rw-r--r-- | mm/page_io.c | 4 | ||||
-rw-r--r-- | mm/page_owner.c | 10 | ||||
-rw-r--r-- | mm/rmap.c | 14 | ||||
-rw-r--r-- | mm/shmem.c | 7 | ||||
-rw-r--r-- | mm/swap.c | 197 | ||||
-rw-r--r-- | mm/swap_state.c | 2 | ||||
-rw-r--r-- | mm/swapfile.c | 8 | ||||
-rw-r--r-- | mm/userfaultfd.c | 2 | ||||
-rw-r--r-- | mm/util.c | 111 | ||||
-rw-r--r-- | mm/vmscan.c | 8 | ||||
-rw-r--r-- | mm/workingset.c | 52 |
29 files changed, 1269 insertions, 1020 deletions
diff --git a/mm/Makefile b/mm/Makefile index fc60a40ce954..d6c0042e3aa0 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -46,7 +46,7 @@ mmu-$(CONFIG_MMU) += process_vm_access.o endif obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ - maccess.o page-writeback.o \ + maccess.o page-writeback.o folio-compat.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ util.o mmzone.o vmstat.o backing-dev.o \ mm_init.o percpu.o slab_common.o \ diff --git a/mm/compaction.c b/mm/compaction.c index bfc93da1c2c7..fbc60f964c38 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1022,7 +1022,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, if (!TestClearPageLRU(page)) goto isolate_fail_put; - lruvec = mem_cgroup_page_lruvec(page); + lruvec = folio_lruvec(page_folio(page)); /* If we already hold the lock, we can skip some rechecking */ if (lruvec != locked) { @@ -1032,7 +1032,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, compact_lock_irqsave(&lruvec->lru_lock, &flags, cc); locked = lruvec; - lruvec_memcg_debug(lruvec, page); + lruvec_memcg_debug(lruvec, page_folio(page)); /* Try get exclusive access under lock */ if (!skip_updated) { diff --git a/mm/filemap.c b/mm/filemap.c index dae481293b5d..3e9feb5cc570 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -835,6 +835,8 @@ EXPORT_SYMBOL(file_write_and_wait_range); */ void replace_page_cache_page(struct page *old, struct page *new) { + struct folio *fold = page_folio(old); + struct folio *fnew = page_folio(new); struct address_space *mapping = old->mapping; void (*freepage)(struct page *) = mapping->a_ops->freepage; pgoff_t offset = old->index; @@ -848,7 +850,7 @@ void replace_page_cache_page(struct page *old, struct page *new) new->mapping = mapping; new->index = offset; - mem_cgroup_migrate(old, new); + mem_cgroup_migrate(fold, fnew); xas_lock_irq(&xas); xas_store(&xas, new); @@ -870,26 +872,25 @@ void replace_page_cache_page(struct page *old, struct page *new) } EXPORT_SYMBOL_GPL(replace_page_cache_page); -noinline int __add_to_page_cache_locked(struct page *page, - struct address_space *mapping, - pgoff_t offset, gfp_t gfp, - void **shadowp) +noinline int __filemap_add_folio(struct address_space *mapping, + struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp) { - XA_STATE(xas, &mapping->i_pages, offset); - int huge = PageHuge(page); + XA_STATE(xas, &mapping->i_pages, index); + int huge = folio_test_hugetlb(folio); int error; bool charged = false; - VM_BUG_ON_PAGE(!PageLocked(page), page); - VM_BUG_ON_PAGE(PageSwapBacked(page), page); + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio); mapping_set_update(&xas, mapping); - get_page(page); - page->mapping = mapping; - page->index = offset; + folio_get(folio); + folio->mapping = mapping; + folio->index = index; if (!huge) { - error = mem_cgroup_charge(page, NULL, gfp); + error = mem_cgroup_charge(folio, NULL, gfp); + VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio); if (error) goto error; charged = true; @@ -901,7 +902,7 @@ noinline int __add_to_page_cache_locked(struct page *page, unsigned int order = xa_get_order(xas.xa, xas.xa_index); void *entry, *old = NULL; - if (order > thp_order(page)) + if (order > folio_order(folio)) xas_split_alloc(&xas, xa_load(xas.xa, xas.xa_index), order, gfp); xas_lock_irq(&xas); @@ -918,13 +919,13 @@ noinline int __add_to_page_cache_locked(struct page *page, *shadowp = old; /* entry may have been split before we acquired lock */ order = xa_get_order(xas.xa, xas.xa_index); - if (order > thp_order(page)) { + if (order > folio_order(folio)) { xas_split(&xas, old, order); xas_reset(&xas); } } - xas_store(&xas, page); + xas_store(&xas, folio); if (xas_error(&xas)) goto unlock; @@ -932,7 +933,7 @@ noinline int __add_to_page_cache_locked(struct page *page, /* hugetlb pages do not participate in page cache accounting */ if (!huge) - __inc_lruvec_page_state(page, NR_FILE_PAGES); + __lruvec_stat_add_folio(folio, NR_FILE_PAGES); unlock: xas_unlock_irq(&xas); } while (xas_nomem(&xas, gfp)); @@ -940,19 +941,19 @@ unlock: if (xas_error(&xas)) { error = xas_error(&xas); if (charged) - mem_cgroup_uncharge(page); + mem_cgroup_uncharge(folio); goto error; } - trace_mm_filemap_add_to_page_cache(page); + trace_mm_filemap_add_to_page_cache(&folio->page); return 0; error: - page->mapping = NULL; + folio->mapping = NULL; /* Leave page->index set: truncation relies upon it */ - put_page(page); + folio_put(folio); return error; } -ALLOW_ERROR_INJECTION(__add_to_page_cache_locked, ERRNO); +ALLOW_ERROR_INJECTION(__filemap_add_folio, ERRNO); /** * add_to_page_cache_locked - add a locked page to the pagecache @@ -969,59 +970,58 @@ ALLOW_ERROR_INJECTION(__add_to_page_cache_locked, ERRNO); int add_to_page_cache_locked(struct page *page, struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask) { - return __add_to_page_cache_locked(page, mapping, offset, + return __filemap_add_folio(mapping, page_folio(page), offset, gfp_mask, NULL); } EXPORT_SYMBOL(add_to_page_cache_locked); -int add_to_page_cache_lru(struct page *page, struct address_space *mapping, - pgoff_t offset, gfp_t gfp_mask) +int filemap_add_folio(struct address_space *mapping, struct folio *folio, + pgoff_t index, gfp_t gfp) { void *shadow = NULL; int ret; - __SetPageLocked(page); - ret = __add_to_page_cache_locked(page, mapping, offset, - gfp_mask, &shadow); + __folio_set_locked(folio); + ret = __filemap_add_folio(mapping, folio, index, gfp, &shadow); if (unlikely(ret)) - __ClearPageLocked(page); + __folio_clear_locked(folio); else { /* - * The page might have been evicted from cache only + * The folio might have been evicted from cache only * recently, in which case it should be activated like - * any other repeatedly accessed page. - * The exception is pages getting rewritten; evicting other + * any other repeatedly accessed folio. + * The exception is folios getting rewritten; evicting other * data from the working set, only to cache data that will * get overwritten with something else, is a waste of memory. */ - WARN_ON_ONCE(PageActive(page)); - if (!(gfp_mask & __GFP_WRITE) && shadow) - workingset_refault(page, shadow); - lru_cache_add(page); + WARN_ON_ONCE(folio_test_active(folio)); + if (!(gfp & __GFP_WRITE) && shadow) + workingset_refault(folio, shadow); + folio_add_lru(folio); } return ret; } -EXPORT_SYMBOL_GPL(add_to_page_cache_lru); +EXPORT_SYMBOL_GPL(filemap_add_folio); #ifdef CONFIG_NUMA -struct page *__page_cache_alloc(gfp_t gfp) +struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order) { int n; - struct page *page; + struct folio *folio; if (cpuset_do_page_mem_spread()) { unsigned int cpuset_mems_cookie; do { cpuset_mems_cookie = read_mems_allowed_begin(); n = cpuset_mem_spread_node(); - page = __alloc_pages_node(n, gfp, 0); - } while (!page && read_mems_allowed_retry(cpuset_mems_cookie)); + folio = __folio_alloc_node(gfp, order, n); + } while (!folio && read_mems_allowed_retry(cpuset_mems_cookie)); - return page; + return folio; } - return alloc_pages(gfp, 0); + return folio_alloc(gfp, order); } -EXPORT_SYMBOL(__page_cache_alloc); +EXPORT_SYMBOL(filemap_alloc_folio); #endif /* @@ -1074,11 +1074,11 @@ EXPORT_SYMBOL(filemap_invalidate_unlock_two); */ #define PAGE_WAIT_TABLE_BITS 8 #define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS) -static wait_queue_head_t page_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned; +static wait_queue_head_t folio_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned; -static wait_queue_head_t *page_waitqueue(struct page *page) +static wait_queue_head_t *folio_waitqueue(struct folio *folio) { - return &page_wait_table[hash_ptr(page, PAGE_WAIT_TABLE_BITS)]; + return &folio_wait_table[hash_ptr(folio, PAGE_WAIT_TABLE_BITS)]; } void __init pagecache_init(void) @@ -1086,7 +1086,7 @@ void __init pagecache_init(void) int i; for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++) - init_waitqueue_head(&page_wait_table[i]); + init_waitqueue_head(&folio_wait_table[i]); page_writeback_init(); } @@ -1141,10 +1141,10 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, */ flags = wait->flags; if (flags & WQ_FLAG_EXCLUSIVE) { - if (test_bit(key->bit_nr, &key->page->flags)) + if (test_bit(key->bit_nr, &key->folio->flags)) return -1; if (flags & WQ_FLAG_CUSTOM) { - if (test_and_set_bit(key->bit_nr, &key->page->flags)) + if (test_and_set_bit(key->bit_nr, &key->folio->flags)) return -1; flags |= WQ_FLAG_DONE; } @@ -1157,7 +1157,7 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, * * So update the flags atomically, and wake up the waiter * afterwards to avoid any races. This store-release pairs - * with the load-acquire in wait_on_page_bit_common(). + * with the load-acquire in folio_wait_bit_common(). */ smp_store_release(&wait->flags, flags | WQ_FLAG_WOKEN); wake_up_state(wait->private, mode); @@ -1176,14 +1176,14 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, return (flags & WQ_FLAG_EXCLUSIVE) != 0; } -static void wake_up_page_bit(struct page *page, int bit_nr) +static void folio_wake_bit(struct folio *folio, int bit_nr) { - wait_queue_head_t *q = page_waitqueue(page); + wait_queue_head_t *q = folio_waitqueue(folio); struct wait_page_key key; unsigned long flags; wait_queue_entry_t bookmark; - key.page = page; + key.folio = folio; key.bit_nr = bit_nr; key.page_match = 0; @@ -1218,7 +1218,7 @@ static void wake_up_page_bit(struct page *page, int bit_nr) * page waiters. */ if (!waitqueue_active(q) || !key.page_match) { - ClearPageWaiters(page); + folio_clear_waiters(folio); /* * It's possible to miss clearing Waiters here, when we woke * our page waiters, but the hashed waitqueue has waiters for @@ -1230,19 +1230,19 @@ static void wake_up_page_bit(struct page *page, int bit_nr) spin_unlock_irqrestore(&q->lock, flags); } -static void wake_up_page(struct page *page, int bit) +static void folio_wake(struct folio *folio, int bit) { - if (!PageWaiters(page)) + if (!folio_test_waiters(folio)) return; - wake_up_page_bit(page, bit); + folio_wake_bit(folio, bit); } /* - * A choice of three behaviors for wait_on_page_bit_common(): + * A choice of three behaviors for folio_wait_bit_common(): */ enum behavior { EXCLUSIVE, /* Hold ref to page and take the bit when woken, like - * __lock_page() waiting on then setting PG_locked. + * __folio_lock() waiting on then setting PG_locked. */ SHARED, /* Hold ref to page and check the bit when woken, like * wait_on_page_writeback() waiting on PG_writeback. @@ -1253,16 +1253,16 @@ enum behavior { }; /* - * Attempt to check (or get) the page bit, and mark us done + * Attempt to check (or get) the folio flag, and mark us done * if successful. */ -static inline bool trylock_page_bit_common(struct page *page, int bit_nr, +static inline bool folio_trylock_flag(struct folio *folio, int bit_nr, struct wait_queue_entry *wait) { if (wait->flags & WQ_FLAG_EXCLUSIVE) { - if (test_and_set_bit(bit_nr, &page->flags)) + if (test_and_set_bit(bit_nr, &folio->flags)) return false; - } else if (test_bit(bit_nr, &page->flags)) + } else if (test_bit(bit_nr, &folio->flags)) return false; wait->flags |= WQ_FLAG_WOKEN | WQ_FLAG_DONE; @@ -1272,9 +1272,10 @@ static inline bool trylock_page_bit_common(struct page *page, int bit_nr, /* How many times do we accept lock stealing from under a waiter? */ int sysctl_page_lock_unfairness = 5; -static inline int wait_on_page_bit_common(wait_queue_head_t *q, - struct page *page, int bit_nr, int state, enum behavior behavior) +static inline int folio_wait_bit_common(struct folio *folio, int bit_nr, + int state, enum behavior behavior) { + wait_queue_head_t *q = folio_waitqueue(folio); int unfairness = sysctl_page_lock_unfairness; struct wait_page_queue wait_page; wait_queue_entry_t *wait = &wait_page.wait; @@ -1283,8 +1284,8 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, unsigned long pflags; if (bit_nr == PG_locked && - !PageUptodate(page) && PageWorkingset(page)) { - if (!PageSwapBacked(page)) { + !folio_test_uptodate(folio) && folio_test_workingset(folio)) { + if (!folio_test_swapbacked(folio)) { delayacct_thrashing_start(); delayacct = true; } @@ -1294,7 +1295,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, init_wait(wait); wait->func = wake_page_function; - wait_page.page = page; + wait_page.folio = folio; wait_page.bit_nr = bit_nr; repeat: @@ -1309,7 +1310,7 @@ repeat: * Do one last check whether we can get the * page bit synchronously. * - * Do the SetPageWaiters() marking before that + * Do the folio_set_waiters() marking before that * to let any waker we _just_ missed know they * need to wake us up (otherwise they'll never * even go to the slow case that looks at the @@ -1320,8 +1321,8 @@ repeat: * lock to avoid races. */ spin_lock_irq(&q->lock); - SetPageWaiters(page); - if (!trylock_page_bit_common(page, bit_nr, wait)) + folio_set_waiters(folio); + if (!folio_trylock_flag(folio, bit_nr, wait)) __add_wait_queue_entry_tail(q, wait); spin_unlock_irq(&q->lock); @@ -1331,10 +1332,10 @@ repeat: * see whether the page bit testing has already * been done by the wake function. * - * We can drop our reference to the page. + * We can drop our reference to the folio. */ if (behavior == DROP) - put_page(page); + folio_put(folio); /* * Note that until the "finish_wait()", or until @@ -1371,7 +1372,7 @@ repeat: * * And if that fails, we'll have to retry this all. */ - if (unlikely(test_and_set_bit(bit_nr, &page->flags))) + if (unlikely(test_and_set_bit(bit_nr, folio_flags(folio, 0)))) goto repeat; wait->flags |= WQ_FLAG_DONE; @@ -1380,7 +1381,7 @@ repeat: /* * If a signal happened, this 'finish_wait()' may remove the last - * waiter from the wait-queues, but the PageWaiters bit will remain + * waiter from the wait-queues, but the folio waiters bit will remain * set. That's ok. The next wakeup will take care of it, and trying * to do it here would be difficult and prone to races. */ @@ -1411,19 +1412,17 @@ repeat: return wait->flags & WQ_FLAG_WOKEN ? 0 : -EINTR; } -void wait_on_page_bit(struct page *page, int bit_nr) +void folio_wait_bit(struct folio *folio, int bit_nr) { - wait_queue_head_t *q = page_waitqueue(page); - wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, SHARED); + folio_wait_bit_common(folio, bit_nr, TASK_UNINTERRUPTIBLE, SHARED); } -EXPORT_SYMBOL(wait_on_page_bit); +EXPORT_SYMBOL(folio_wait_bit); -int wait_on_page_bit_killable(struct page *page, int bit_nr) +int folio_wait_bit_killable(struct folio *folio, int bit_nr) { - wait_queue_head_t *q = page_waitqueue(page); - return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, SHARED); + return folio_wait_bit_common(folio, bit_nr, TASK_KILLABLE, SHARED); } -EXPORT_SYMBOL(wait_on_page_bit_killable); +EXPORT_SYMBOL(folio_wait_bit_killable); /** * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked @@ -1440,31 +1439,28 @@ EXPORT_SYMBOL(wait_on_page_bit_killable); */ int put_and_wait_on_page_locked(struct page *page, int state) { - wait_queue_head_t *q; - - page = compound_head(page); - q = page_waitqueue(page); - return wait_on_page_bit_common(q, page, PG_locked, state, DROP); + return folio_wait_bit_common(page_folio(page), PG_locked, state, + DROP); } /** - * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue - * @page: Page defining the wait queue of interest + * folio_add_wait_queue - Add an arbitrary waiter to a folio's wait queue + * @folio: Folio defining the wait queue of interest * @waiter: Waiter to add to the queue * - * Add an arbitrary @waiter to the wait queue for the nominated @page. + * Add an arbitrary @waiter to the wait queue for the nominated @folio. */ -void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter) +void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter) { - wait_queue_head_t *q = page_waitqueue(page); + wait_queue_head_t *q = folio_waitqueue(folio); unsigned long flags; spin_lock_irqsave(&q->lock, flags); __add_wait_queue_entry_tail(q, waiter); - SetPageWaiters(page); + folio_set_waiters(folio); spin_unlock_irqrestore(&q->lock, flags); } -EXPORT_SYMBOL_GPL(add_page_wait_queue); +EXPORT_SYMBOL_GPL(folio_add_wait_queue); #ifndef clear_bit_unlock_is_negative_byte @@ -1490,124 +1486,116 @@ static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem #endif /** - * unlock_page - unlock a locked page - * @page: the page + * folio_unlock - Unlock a locked folio. + * @folio: The folio. * - * Unlocks the page and wakes up sleepers in wait_on_page_locked(). - * Also wakes sleepers in wait_on_page_writeback() because the wakeup - * mechanism between PageLocked pages and PageWriteback pages is shared. - * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep. + * Unlocks the folio and wakes up any thread sleeping on the page lock. * - * Note that this depends on PG_waiters being the sign bit in the byte - * that contains PG_locked - thus the BUILD_BUG_ON(). That allows us to - * clear the PG_locked bit and test PG_waiters at the same time fairly - * portably (architectures that do LL/SC can test any bit, while x86 can - * test the sign bit). + * Context: May be called from interrupt or process context. May not be + * called from NMI context. */ -void unlock_page(struct page *page) +void folio_unlock(struct folio *folio) { + /* Bit 7 allows x86 to check the byte's sign bit */ BUILD_BUG_ON(PG_waiters != 7); - page = compound_head(page); - VM_BUG_ON_PAGE(!PageLocked(page), page); - if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags)) - wake_up_page_bit(page, PG_locked); + BUILD_BUG_ON(PG_locked > 7); + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + if (clear_bit_unlock_is_negative_byte(PG_locked, folio_flags(folio, 0))) + folio_wake_bit(folio, PG_locked); } -EXPORT_SYMBOL(unlock_page); +EXPORT_SYMBOL(folio_unlock); /** - * end_page_private_2 - Clear PG_private_2 and release any waiters - * @page: The page + * folio_end_private_2 - Clear PG_private_2 and wake any waiters. + * @folio: The folio. * - * Clear the PG_private_2 bit on a page and wake up any sleepers waiting for - * this. The page ref held for PG_private_2 being set is released. + * Clear the PG_private_2 bit on a folio and wake up any sleepers waiting for + * it. The folio reference held for PG_private_2 being set is released. * - * This is, for example, used when a netfs page is being written to a local - * disk cache, thereby allowing writes to the cache for the same page to be + * This is, for example, used when a netfs folio is being written to a local + * disk cache, thereby allowing writes to the cache for the same folio to be * serialised. */ -void end_page_private_2(struct page *page) +void folio_end_private_2(struct folio *folio) { - page = compound_head(page); - VM_BUG_ON_PAGE(!PagePrivate2(page), page); - clear_bit_unlock(PG_private_2, &page->flags); - wake_up_page_bit(page, PG_private_2); - put_page(page); + VM_BUG_ON_FOLIO(!folio_test_private_2(folio), folio); + clear_bit_unlock(PG_private_2, folio_flags(folio, 0)); + folio_wake_bit(folio, PG_private_2); + folio_put(folio); } -EXPORT_SYMBOL(end_page_private_2); +EXPORT_SYMBOL(folio_end_private_2); /** - * wait_on_page_private_2 - Wait for PG_private_2 to be cleared on a page - * @page: The page to wait on + * folio_wait_private_2 - Wait for PG_private_2 to be cleared on a folio. + * @folio: The folio to wait on. * - * Wait for PG_private_2 (aka PG_fscache) to be cleared on a page. + * Wait for PG_private_2 (aka PG_fscache) to be cleared on a folio. */ -void wait_on_page_private_2(struct page *page) +void folio_wait_private_2(struct folio *folio) { - page = compound_head(page); - while (PagePrivate2(page)) - wait_on_page_bit(page, PG_private_2); + while (folio_test_private_2(folio)) + folio_wait_bit(folio, PG_private_2); } -EXPORT_SYMBOL(wait_on_page_private_2); +EXPORT_SYMBOL(folio_wait_private_2); /** - * wait_on_page_private_2_killable - Wait for PG_private_2 to be cleared on a page - * @page: The page to wait on + * folio_wait_private_2_killable - Wait for PG_private_2 to be cleared on a folio. + * @folio: The folio to wait on. * - * Wait for PG_private_2 (aka PG_fscache) to be cleared on a page or until a + * Wait for PG_private_2 (aka PG_fscache) to be cleared on a folio or until a * fatal signal is received by the calling task. * * Return: * - 0 if successful. * - -EINTR if a fatal signal was encountered. */ -int wait_on_page_private_2_killable(struct page *page) +int folio_wait_private_2_killable(struct folio *folio) { int ret = 0; - page = compound_head(page); - while (PagePrivate2(page)) { - ret = wait_on_page_bit_killable(page, PG_private_2); + while (folio_test_private_2(folio)) { + ret = folio_wait_bit_killable(folio, PG_private_2); if (ret < 0) break; } return ret; } -EXPORT_SYMBOL(wait_on_page_private_2_killable); +EXPORT_SYMBOL(folio_wait_private_2_killable); /** - * end_page_writeback - end writeback against a page - * @page: the page + * folio_end_writeback - End writeback against a folio. + * @folio: The folio. */ -void end_page_writeback(struct page *page) +void folio_end_writeback(struct folio *folio) { /* - * TestClearPageReclaim could be used here but it is an atomic - * operation and overkill in this particular case. Failing to - * shuffle a page marked for immediate reclaim is too mild to - * justify taking an atomic operation penalty at the end of - * ever page writeback. + * folio_test_clear_reclaim() could be used here but it is an + * atomic operation and overkill in this particular case. Failing + * to shuffle a folio marked for immediate reclaim is too mild + * a gain to justify taking an atomic operation penalty at the + * end of every folio writeback. */ - if (PageReclaim(page)) { - ClearPageReclaim(page); - rotate_reclaimable_page(page); + if (folio_test_reclaim(folio)) { + folio_clear_reclaim(folio); + folio_rotate_reclaimable(folio); } /* - * Writeback does not hold a page reference of its own, relying + * Writeback does not hold a folio reference of its own, relying * on truncation to wait for the clearing of PG_writeback. - * But here we must make sure that the page is not freed and - * reused before the wake_up_page(). + * But here we must make sure that the folio is not freed and + * reused before the folio_wake(). */ - get_page(page); - if (!test_clear_page_writeback(page)) + folio_get(folio); + if (!__folio_end_writeback(folio)) BUG(); smp_mb__after_atomic(); - wake_up_page(page, PG_writeback); - put_page(page); + folio_wake(folio, PG_writeback); + folio_put(folio); } -EXPORT_SYMBOL(end_page_writeback); +EXPORT_SYMBOL(folio_end_writeback); /* * After completing I/O on a page, call this routine to update the page @@ -1638,39 +1626,35 @@ void page_endio(struct page *page, bool is_write, int err) EXPORT_SYMBOL_GPL(page_endio); /** - * __lock_page - get a lock on the page, assuming we need to sleep to get it - * @__page: the page to lock + * __folio_lock - Get a lock on the folio, assuming we need to sleep to get it. + * @folio: The folio to lock */ -void __lock_page(struct page *__page) +void __folio_lock(struct folio *folio) { - struct page *page = compound_head(__page); - wait_queue_head_t *q = page_waitqueue(page); - wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, + folio_wait_bit_common(folio, PG_locked, TASK_UNINTERRUPTIBLE, EXCLUSIVE); } -EXPORT_SYMBOL(__lock_page); +EXPORT_SYMBOL(__folio_lock); -int __lock_page_killable(struct page *__page) +int __folio_lock_killable(struct folio *folio) { - struct page *page = compound_head(__page); - wait_queue_head_t *q = page_waitqueue(page); - return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE, + return folio_wait_bit_common(folio, PG_locked, TASK_KILLABLE, EXCLUSIVE); } -EXPORT_SYMBOL_GPL(__lock_page_killable); +EXPORT_SYMBOL_GPL(__folio_lock_killable); -int __lock_page_async(struct page *page, struct wait_page_queue *wait) +static int __folio_lock_async(struct folio *folio, struct wait_page_queue *wait) { - struct wait_queue_head *q = page_waitqueue(page); + struct wait_queue_head *q = folio_waitqueue(folio); int ret = 0; - wait->page = page; + wait->folio = folio; wait->bit_nr = PG_locked; spin_lock_irq(&q->lock); __add_wait_queue_entry_tail(q, &wait->wait); - SetPageWaiters(page); - ret = !trylock_page(page); + folio_set_waiters(folio); + ret = !folio_trylock(folio); /* * If we were successful now, we know we're still on the * waitqueue as we're still under the lock. This means it's @@ -1687,16 +1671,16 @@ int __lock_page_async(struct page *page, struct wait_page_queue *wait) /* * Return values: - * 1 - page is locked; mmap_lock is still held. - * 0 - page is not locked. + * true - folio is locked; mmap_lock is still held. + * false - folio is not locked. * mmap_lock has been released (mmap_read_unlock(), unless flags had both * FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in * which case mmap_lock is still held. * - * If neither ALLOW_RETRY nor KILLABLE are set, will always return 1 - * with the page locked and the mmap_lock unperturbed. + * If neither ALLOW_RETRY nor KILLABLE are set, will always return true + * with the folio locked and the mmap_lock unperturbed. */ -int __lock_page_or_retry(struct page *page, struct mm_struct *mm, +bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm, unsigned int flags) { if (fault_flag_allow_retry_first(flags)) { @@ -1705,28 +1689,28 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm, * even though return 0. */ if (flags & FAULT_FLAG_RETRY_NOWAIT) - return 0; + return false; mmap_read_unlock(mm); if (flags & FAULT_FLAG_KILLABLE) - wait_on_page_locked_killable(page); + folio_wait_locked_killable(folio); else - wait_on_page_locked(page); - return 0; + folio_wait_locked(folio); + return false; } if (flags & FAULT_FLAG_KILLABLE) { - int ret; + bool ret; - ret = __lock_page_killable(page); + ret = __folio_lock_killable(folio); if (ret) { mmap_read_unlock(mm); - return 0; + return false; } } else { - __lock_page(page); + __folio_lock(folio); } - return 1; + return true; } /** @@ -1802,143 +1786,155 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping, EXPORT_SYMBOL(page_cache_prev_miss); /* + * Lockless page cache protocol: + * On the lookup side: + * 1. Load the folio from i_pages + * 2. Increment the refcount if it's not zero + * 3. If the folio is not found by xas_reload(), put the refcount and retry + * + * On the removal side: + * A. Freeze the page (by zeroing the refcount if nobody else has a reference) + * B. Remove the page from i_pages + * C. Return the page to the page allocator + * + * This means that any page may have its reference count temporarily + * increased by a speculative page cache (or fast GUP) lookup as it can + * be allocated by another user before the RCU grace period expires. + * Because the refcount temporarily acquired here may end up being the + * last refcount on the page, any page allocation must be freeable by + * folio_put(). + */ + +/* * mapping_get_entry - Get a page cache entry. * @mapping: the address_space to search * @index: The page cache index. * - * Looks up the page cache slot at @mapping & @index. If there is a - * page cache page, the head page is returned with an increased refcount. + * Looks up the page cache entry at @mapping & @index. If it is a folio, + * it is returned with an increased refcount. If it is a shadow entry + * of a previously evicted folio, or a swap entry from shmem/tmpfs, + * it is returned without further action. * - * If the slot holds a shadow entry of a previously evicted page, or a - * swap entry from shmem/tmpfs, it is returned. - * - * Return: The head page or shadow entry, %NULL if nothing is found. + * Return: The folio, swap or shadow entry, %NULL if nothing is found. */ -static struct page *mapping_get_entry(struct address_space *mapping, - pgoff_t index) +static void *mapping_get_entry(struct address_space *mapping, pgoff_t index) { XA_STATE(xas, &mapping->i_pages, index); - struct page *page; + struct folio *folio; rcu_read_lock(); repeat: xas_reset(&xas); - page = xas_load(&xas); - if (xas_retry(&xas, page)) + folio = xas_load(&xas); + if (xas_retry(&xas, folio)) goto repeat; /* * A shadow entry of a recently evicted page, or a swap entry from * shmem/tmpfs. Return it without attempting to raise page count. */ - if (!page || xa_is_value(page)) + if (!folio || xa_is_value(folio)) goto out; - if (!page_cache_get_speculative(page)) + if (!folio_try_get_rcu(folio)) goto repeat; - /* - * Has the page moved or been split? - * This is part of the lockless pagecache protocol. See - * include/linux/pagemap.h for details. - */ - if (unlikely(page != xas_reload(&xas))) { - put_page(page); + if (unlikely(folio != xas_reload(&xas))) { + folio_put(folio); goto repeat; } out: rcu_read_unlock(); - return page; + return folio; } /** - * pagecache_get_page - Find and get a reference to a page. + * __filemap_get_folio - Find and get a reference to a folio. * @mapping: The address_space to search. * @index: The page index. - * @fgp_flags: %FGP flags modify how the page is returned. - * @gfp_mask: Memory allocation flags to use if %FGP_CREAT is specified. + * @fgp_flags: %FGP flags modify how the folio is returned. + * @gfp: Memory allocation flags to use if %FGP_CREAT is specified. * * Looks up the page cache entry at @mapping & @index. * * @fgp_flags can be zero or more of these flags: * - * * %FGP_ACCESSED - The page will be marked accessed. - * * %FGP_LOCK - The page is returned locked. - * * %FGP_HEAD - If the page is present and a THP, return the head page - * rather than the exact page specified by the index. + * * %FGP_ACCESSED - The folio will be marked accessed. + * * %FGP_LOCK - The folio is returned locked. * * %FGP_ENTRY - If there is a shadow / swap / DAX entry, return it - * instead of allocating a new page to replace it. + * instead of allocating a new folio to replace it. * * %FGP_CREAT - If no page is present then a new page is allocated using - * @gfp_mask and added to the page cache and the VM's LRU list. + * @gfp and added to the page cache and the VM's LRU list. * The page is returned locked and with an increased refcount. * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the * page is already in cache. If the page was allocated, unlock it before * returning so the caller can do the same dance. - * * %FGP_WRITE - The page will be written - * * %FGP_NOFS - __GFP_FS will get cleared in gfp mask - * * %FGP_NOWAIT - Don't get blocked by page lock + * * %FGP_WRITE - The page will be written to by the caller. + * * %FGP_NOFS - __GFP_FS will get cleared in gfp. + * * %FGP_NOWAIT - Don't get blocked by page lock. + * * %FGP_STABLE - Wait for the folio to be stable (finished writeback) * * If %FGP_LOCK or %FGP_CREAT are specified then the function may sleep even * if the %GFP flags specified for %FGP_CREAT are atomic. * * If there is a page cache page, it is returned with an increased refcount. * - * Return: The found page or %NULL otherwise. + * Return: The found folio or %NULL otherwise. */ -struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, - int fgp_flags, gfp_t gfp_mask) +struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, + int fgp_flags, gfp_t gfp) { - struct page *page; + struct folio *folio; repeat: - page = mapping_get_entry(mapping, index); - if (xa_is_value(page)) { + folio = mapping_get_entry(mapping, index); + if (xa_is_value(folio)) { if (fgp_flags & FGP_ENTRY) - return page; - page = NULL; + return folio; + folio = NULL; } - if (!page) + if (!folio) goto no_page; if (fgp_flags & FGP_LOCK) { if (fgp_flags & FGP_NOWAIT) { - if (!trylock_page(page)) { - put_page(page); + if (!folio_trylock(folio)) { + folio_put(folio); return NULL; } } else { - lock_page(page); + folio_lock(folio); } /* Has the page been truncated? */ - if (unlikely(page->mapping != mapping)) { - unlock_page(page); - put_page(page); + if (unlikely(folio->mapping != mapping)) { + folio_unlock(folio); + folio_put(folio); goto repeat; } - VM_BUG_ON_PAGE(!thp_contains(page, index), page); + VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio); } if (fgp_flags & FGP_ACCESSED) - mark_page_accessed(page); + folio_mark_accessed(folio); else if (fgp_flags & FGP_WRITE) { /* Clear idle flag for buffer write */ - if (page_is_idle(page)) - clear_page_idle(page); + if (folio_test_idle(folio)) + folio_clear_idle(folio); } - if (!(fgp_flags & FGP_HEAD)) - page = find_subpage(page, index); + if (fgp_flags & FGP_STABLE) + folio_wait_stable(folio); no_page: - if (!page && (fgp_flags & FGP_CREAT)) { + if (!folio && (fgp_flags & FGP_CREAT)) { int err; if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping)) - gfp_mask |= __GFP_WRITE; + gfp |= __GFP_WRITE; if (fgp_flags & FGP_NOFS) - gfp_mask &= ~__GFP_FS; + gfp &= ~__GFP_FS; - page = __page_cache_alloc(gfp_mask); - if (!page) + folio = filemap_alloc_folio(gfp, 0); + if (!folio) return NULL; if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP)))) @@ -1946,27 +1942,27 @@ no_page: /* Init accessed so avoid atomic mark_page_accessed later */ if (fgp_flags & FGP_ACCESSED) - __SetPageReferenced(page); + __folio_set_referenced(folio); - err = add_to_page_cache_lru(page, mapping, index, gfp_mask); + err = filemap_add_folio(mapping, folio, index, gfp); if (unlikely(err)) { - put_page(page); - page = NULL; + folio_put(folio); + folio = NULL; if (err == -EEXIST) goto repeat; } /* - * add_to_page_cache_lru locks the page, and for mmap we expect - * an unlocked page. + * filemap_add_folio locks the page, and for mmap + * we expect an unlocked page. */ - if (page && (fgp_flags & FGP_FOR_MMAP)) - unlock_page(page); + if (folio && (fgp_flags & FGP_FOR_MMAP)) + folio_unlock(folio); } - return page; + return folio; } -EXPORT_SYMBOL(pagecache_get_page); +EXPORT_SYMBOL(__filemap_get_folio); static inline struct page *find_get_entry(struct xa_state *xas, pgoff_t max, xa_mark_t mark) @@ -2421,6 +2417,7 @@ static int filemap_update_page(struct kiocb *iocb, struct address_space *mapping, struct iov_iter *iter, struct page *page) { + struct folio *folio = page_folio(page); int error; if (iocb->ki_flags & IOCB_NOWAIT) { @@ -2430,40 +2427,40 @@ static int filemap_update_page(struct kiocb *iocb, filemap_invalidate_lock_shared(mapping); } - if (!trylock_page(page)) { + if (!folio_trylock(folio)) { error = -EAGAIN; if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO)) goto unlock_mapping; if (!(iocb->ki_flags & IOCB_WAITQ)) { filemap_invalidate_unlock_shared(mapping); - put_and_wait_on_page_locked(page, TASK_KILLABLE); + put_and_wait_on_page_locked(&folio->page, TASK_KILLABLE); return AOP_TRUNCATED_PAGE; } - error = __lock_page_async(page, iocb->ki_waitq); + error = __folio_lock_async(folio, iocb->ki_waitq); if (error) goto unlock_mapping; } error = AOP_TRUNCATED_PAGE; - if (!page->mapping) + if (!folio->mapping) goto unlock; error = 0; - if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, page)) + if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, &folio->page)) goto unlock; error = -EAGAIN; if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ)) goto unlock; - error = filemap_read_page(iocb->ki_filp, mapping, page); + error = filemap_read_page(iocb->ki_filp, mapping, &folio->page); goto unlock_mapping; unlock: - unlock_page(page); + folio_unlock(folio); unlock_mapping: filemap_invalidate_unlock_shared(mapping); if (error == AOP_TRUNCATED_PAGE) - put_page(page); + folio_put(folio); return error; } @@ -2900,7 +2897,9 @@ unlock: static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page, struct file **fpin) { - if (trylock_page(page)) + struct folio *folio = page_folio(page); + + if (folio_trylock(folio)) return 1; /* @@ -2913,7 +2912,7 @@ static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page, *fpin = maybe_unlock_mmap_for_io(vmf, *fpin); if (vmf->flags & FAULT_FLAG_KILLABLE) { - if (__lock_page_killable(page)) { + if (__folio_lock_killable(folio)) { /* * We didn't have the right flags to drop the mmap_lock, * but all fault_handlers only check for fatal signals @@ -2925,11 +2924,11 @@ static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page, return 0; } } else - __lock_page(page); + __folio_lock(folio); + return 1; } - /* * Synchronous readahead happens when we don't even find a page in the page * cache at all. We don't want to perform IO under the mmap sem, so if we have @@ -3708,28 +3707,6 @@ out: } EXPORT_SYMBOL(generic_file_direct_write); -/* - * Find or create a page at the given pagecache position. Return the locked - * page. This function is specifically for buffered writes. - */ -struct page *grab_cache_page_write_begin(struct address_space *mapping, - pgoff_t index, unsigned flags) -{ - struct page *page; - int fgp_flags = FGP_LOCK|FGP_WRITE|FGP_CREAT; - - if (flags & AOP_FLAG_NOFS) - fgp_flags |= FGP_NOFS; - - page = pagecache_get_page(mapping, index, fgp_flags, - mapping_gfp_mask(mapping)); - if (page) - wait_for_stable_page(page); - - return page; -} -EXPORT_SYMBOL(grab_cache_page_write_begin); - ssize_t generic_perform_write(struct file *file, struct iov_iter *i, loff_t pos) { diff --git a/mm/folio-compat.c b/mm/folio-compat.c new file mode 100644 index 000000000000..5b6ae1da314e --- /dev/null +++ b/mm/folio-compat.c @@ -0,0 +1,142 @@ +/* + * Compatibility functions which bloat the callers too much to make inline. + * All of the callers of these functions should be converted to use folios + * eventually. + */ + +#include <linux/migrate.h> +#include <linux/pagemap.h> +#include <linux/swap.h> + +struct address_space *page_mapping(struct page *page) +{ + return folio_mapping(page_folio(page)); +} +EXPORT_SYMBOL(page_mapping); + +void unlock_page(struct page *page) +{ + return folio_unlock(page_folio(page)); +} +EXPORT_SYMBOL(unlock_page); + +void end_page_writeback(struct page *page) +{ + return folio_end_writeback(page_folio(page)); +} +EXPORT_SYMBOL(end_page_writeback); + +void wait_on_page_writeback(struct page *page) +{ + return folio_wait_writeback(page_folio(page)); +} +EXPORT_SYMBOL_GPL(wait_on_page_writeback); + +void wait_for_stable_page(struct page *page) +{ + return folio_wait_stable(page_folio(page)); +} +EXPORT_SYMBOL_GPL(wait_for_stable_page); + +bool page_mapped(struct page *page) +{ + return folio_mapped(page_folio(page)); +} +EXPORT_SYMBOL(page_mapped); + +void mark_page_accessed(struct page *page) +{ + folio_mark_accessed(page_folio(page)); +} +EXPORT_SYMBOL(mark_page_accessed); + +#ifdef CONFIG_MIGRATION +int migrate_page_move_mapping(struct address_space *mapping, + struct page *newpage, struct page *page, int extra_count) +{ + return folio_migrate_mapping(mapping, page_folio(newpage), + page_folio(page), extra_count); +} +EXPORT_SYMBOL(migrate_page_move_mapping); + +void migrate_page_states(struct page *newpage, struct page *page) +{ + folio_migrate_flags(page_folio(newpage), page_folio(page)); +} +EXPORT_SYMBOL(migrate_page_states); + +void migrate_page_copy(struct page *newpage, struct page *page) +{ + folio_migrate_copy(page_folio(newpage), page_folio(page)); +} +EXPORT_SYMBOL(migrate_page_copy); +#endif + +bool set_page_writeback(struct page *page) +{ + return folio_start_writeback(page_folio(page)); +} +EXPORT_SYMBOL(set_page_writeback); + +bool set_page_dirty(struct page *page) +{ + return folio_mark_dirty(page_folio(page)); +} +EXPORT_SYMBOL(set_page_dirty); + +int __set_page_dirty_nobuffers(struct page *page) +{ + return filemap_dirty_folio(page_mapping(page), page_folio(page)); +} +EXPORT_SYMBOL(__set_page_dirty_nobuffers); + +bool clear_page_dirty_for_io(struct page *page) +{ + return folio_clear_dirty_for_io(page_folio(page)); +} +EXPORT_SYMBOL(clear_page_dirty_for_io); + +bool redirty_page_for_writepage(struct writeback_control *wbc, + struct page *page) +{ + return folio_redirty_for_writepage(wbc, page_folio(page)); +} +EXPORT_SYMBOL(redirty_page_for_writepage); + +void lru_cache_add(struct page *page) +{ + folio_add_lru(page_folio(page)); +} +EXPORT_SYMBOL(lru_cache_add); + +int add_to_page_cache_lru(struct page *page, struct address_space *mapping, + pgoff_t index, gfp_t gfp) +{ + return filemap_add_folio(mapping, page_folio(page), index, gfp); +} +EXPORT_SYMBOL(add_to_page_cache_lru); + +noinline +struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, + int fgp_flags, gfp_t gfp) +{ + struct folio *folio; + + folio = __filemap_get_folio(mapping, index, fgp_flags, gfp); + if ((fgp_flags & FGP_HEAD) || !folio || xa_is_value(folio)) + return &folio->page; + return folio_file_page(folio, index); +} +EXPORT_SYMBOL(pagecache_get_page); + +struct page *grab_cache_page_write_begin(struct address_space *mapping, + pgoff_t index, unsigned flags) +{ + unsigned fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE; + + if (flags & AOP_FLAG_NOFS) + fgp_flags |= FGP_NOFS; + return pagecache_get_page(mapping, index, fgp_flags, + mapping_gfp_mask(mapping)); +} +EXPORT_SYMBOL(grab_cache_page_write_begin); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c5142d237e48..e5483347291c 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -603,7 +603,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf, VM_BUG_ON_PAGE(!PageCompound(page), page); - if (mem_cgroup_charge(page, vma->vm_mm, gfp)) { + if (mem_cgroup_charge(page_folio(page), vma->vm_mm, gfp)) { put_page(page); count_vm_event(THP_FAULT_FALLBACK); count_vm_event(THP_FAULT_FALLBACK_CHARGE); @@ -2405,7 +2405,8 @@ static void __split_huge_page_tail(struct page *head, int tail, static void __split_huge_page(struct page *page, struct list_head *list, pgoff_t end) { - struct page *head = compound_head(page); + struct folio *folio = page_folio(page); + struct page *head = &folio->page; struct lruvec *lruvec; struct address_space *swap_cache = NULL; unsigned long offset = 0; @@ -2424,7 +2425,7 @@ static void __split_huge_page(struct page *page, struct list_head *list, } /* lock lru list/PageCompound, ref frozen by page_ref_freeze */ - lruvec = lock_page_lruvec(head); + lruvec = folio_lruvec_lock(folio); ClearPageHasHWPoisoned(head); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 95dc7b83381f..6378c1066459 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5302,7 +5302,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, *pagep = NULL; goto out; } - copy_huge_page(page, *pagep); + folio_copy(page_folio(page), page_folio(*pagep)); put_page(*pagep); *pagep = NULL; } diff --git a/mm/internal.h b/mm/internal.h index cf3cb933eba3..b1001ebeb286 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -34,7 +34,16 @@ void page_writeback_init(void); +static inline void *folio_raw_mapping(struct folio *folio) +{ + unsigned long mapping = (unsigned long)folio->mapping; + + return (void *)(mapping & ~PAGE_MAPPING_FLAGS); +} + vm_fault_t do_swap_page(struct vm_fault *vmf); +void folio_rotate_reclaimable(struct folio *folio); +bool __folio_end_writeback(struct folio *folio); void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, unsigned long floor, unsigned long ceiling); @@ -63,17 +72,28 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t start, pgoff_t end, struct pagevec *pvec, pgoff_t *indices); /** - * page_evictable - test whether a page is evictable - * @page: the page to test + * folio_evictable - Test whether a folio is evictable. + * @folio: The folio to test. * - * Test whether page is evictable--i.e., should be placed on active/inactive - * lists vs unevictable list. - * - * Reasons page might not be evictable: - * (1) page's mapping marked unevictable - * (2) page is part of an mlocked VMA + * Test whether @folio is evictable -- i.e., should be placed on + * active/inactive lists vs unevictable list. * + * Reasons folio might not be evictable: + * 1. folio's mapping marked unevictable + * 2. One of the pages in the folio is part of an mlocked VMA */ +static inline bool folio_evictable(struct folio *folio) +{ + bool ret; + + /* Prevent address_space of inode and swap cache from being freed */ + rcu_read_lock(); + ret = !mapping_unevictable(folio_mapping(folio)) && + !folio_test_mlocked(folio); + rcu_read_unlock(); + return ret; +} + static inline bool page_evictable(struct page *page) { bool ret; diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 8a8b3aa92937..5f02fda6f265 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1090,7 +1090,7 @@ static void collapse_huge_page(struct mm_struct *mm, goto out_nolock; } - if (unlikely(mem_cgroup_charge(new_page, mm, gfp))) { + if (unlikely(mem_cgroup_charge(page_folio(new_page), mm, gfp))) { result = SCAN_CGROUP_CHARGE_FAIL; goto out_nolock; } @@ -1214,7 +1214,7 @@ out_up_write: mmap_write_unlock(mm); out_nolock: if (!IS_ERR_OR_NULL(*hpage)) - mem_cgroup_uncharge(*hpage); + mem_cgroup_uncharge(page_folio(*hpage)); trace_mm_collapse_huge_page(mm, isolated, result); return; } @@ -1661,7 +1661,7 @@ static void collapse_file(struct mm_struct *mm, goto out; } - if (unlikely(mem_cgroup_charge(new_page, mm, gfp))) { + if (unlikely(mem_cgroup_charge(page_folio(new_page), mm, gfp))) { result = SCAN_CGROUP_CHARGE_FAIL; goto out; } @@ -1983,7 +1983,7 @@ xa_unlocked: out: VM_BUG_ON(!list_empty(&pagelist)); if (!IS_ERR_OR_NULL(*hpage)) - mem_cgroup_uncharge(*hpage); + mem_cgroup_uncharge(page_folio(*hpage)); /* TODO: tracepoints */ } @@ -751,7 +751,7 @@ stale: /* * We come here from above when page->mapping or !PageSwapCache * suggests that the node is stale; but it might be under migration. - * We need smp_rmb(), matching the smp_wmb() in ksm_migrate_page(), + * We need smp_rmb(), matching the smp_wmb() in folio_migrate_ksm(), * before checking whether node->kpfn has been changed. */ smp_rmb(); @@ -852,9 +852,14 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma, return err; } +static inline struct stable_node *folio_stable_node(struct folio *folio) +{ + return folio_test_ksm(folio) ? folio_raw_mapping(folio) : NULL; +} + static inline struct stable_node *page_stable_node(struct page *page) { - return PageKsm(page) ? page_rmapping(page) : NULL; + return folio_stable_node(page_folio(page)); } static inline void set_page_stable_node(struct page *page, @@ -2578,7 +2583,8 @@ struct page *ksm_might_need_to_copy(struct page *page, return page; /* let do_swap_page report the error */ new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); - if (new_page && mem_cgroup_charge(new_page, vma->vm_mm, GFP_KERNEL)) { + if (new_page && + mem_cgroup_charge(page_folio(new_page), vma->vm_mm, GFP_KERNEL)) { put_page(new_page); new_page = NULL; } @@ -2658,26 +2664,26 @@ again: } #ifdef CONFIG_MIGRATION -void ksm_migrate_page(struct page *newpage, struct page *oldpage) +void folio_migrate_ksm(struct folio *newfolio, struct folio *folio) { struct stable_node *stable_node; - VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage); - VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); - VM_BUG_ON_PAGE(newpage->mapping != oldpage->mapping, newpage); + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + VM_BUG_ON_FOLIO(!folio_test_locked(newfolio), newfolio); + VM_BUG_ON_FOLIO(newfolio->mapping != folio->mapping, newfolio); - stable_node = page_stable_node(newpage); + stable_node = folio_stable_node(folio); if (stable_node) { - VM_BUG_ON_PAGE(stable_node->kpfn != page_to_pfn(oldpage), oldpage); - stable_node->kpfn = page_to_pfn(newpage); + VM_BUG_ON_FOLIO(stable_node->kpfn != folio_pfn(folio), folio); + stable_node->kpfn = folio_pfn(newfolio); /* - * newpage->mapping was set in advance; now we need smp_wmb() + * newfolio->mapping was set in advance; now we need smp_wmb() * to make sure that the new stable_node->kpfn is visible - * to get_ksm_page() before it can see that oldpage->mapping - * has gone stale (or that PageSwapCache has been cleared). + * to get_ksm_page() before it can see that folio->mapping + * has gone stale (or that folio_test_swapcache has been cleared). */ smp_wmb(); - set_page_stable_node(oldpage, NULL); + set_page_stable_node(&folio->page, NULL); } } #endif /* CONFIG_MIGRATION */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6da5020a8656..8dab23a71fc4 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -456,28 +456,6 @@ ino_t page_cgroup_ino(struct page *page) return ino; } -static struct mem_cgroup_per_node * -mem_cgroup_page_nodeinfo(struct mem_cgroup *memcg, struct page *page) -{ - int nid = page_to_nid(page); - - return memcg->nodeinfo[nid]; -} - -static struct mem_cgroup_tree_per_node * -soft_limit_tree_node(int nid) -{ - return soft_limit_tree.rb_tree_per_node[nid]; -} - -static struct mem_cgroup_tree_per_node * -soft_limit_tree_from_page(struct page *page) -{ - int nid = page_to_nid(page); - - return soft_limit_tree.rb_tree_per_node[nid]; -} - static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_node *mz, struct mem_cgroup_tree_per_node *mctz, unsigned long new_usage_in_excess) @@ -548,13 +526,13 @@ static unsigned long soft_limit_excess(struct mem_cgroup *memcg) return excess; } -static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) +static void mem_cgroup_update_tree(struct mem_cgroup *memcg, int nid) { unsigned long excess; struct mem_cgroup_per_node *mz; struct mem_cgroup_tree_per_node *mctz; - mctz = soft_limit_tree_from_page(page); + mctz = soft_limit_tree.rb_tree_per_node[nid]; if (!mctz) return; /* @@ -562,7 +540,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) * because their event counter is not touched. */ for (; memcg; memcg = parent_mem_cgroup(memcg)) { - mz = mem_cgroup_page_nodeinfo(memcg, page); + mz = memcg->nodeinfo[nid]; excess = soft_limit_excess(memcg); /* * We have to update the tree if mz is on RB-tree or @@ -593,7 +571,7 @@ static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg) for_each_node(nid) { mz = memcg->nodeinfo[nid]; - mctz = soft_limit_tree_node(nid); + mctz = soft_limit_tree.rb_tree_per_node[nid]; if (mctz) mem_cgroup_remove_exceeded(mz, mctz); } @@ -799,7 +777,6 @@ static unsigned long memcg_events_local(struct mem_cgroup *memcg, int event) } static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, - struct page *page, int nr_pages) { /* pagein of a big page is an event. So, ignore page size */ @@ -842,7 +819,7 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, * Check events in order. * */ -static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) +static void memcg_check_events(struct mem_cgroup *memcg, int nid) { /* threshold event is triggered in finer grain than soft limit */ if (unlikely(mem_cgroup_event_ratelimit(memcg, @@ -853,7 +830,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) MEM_CGROUP_TARGET_SOFTLIMIT); mem_cgroup_threshold(memcg); if (unlikely(do_softlimit)) - mem_cgroup_update_tree(memcg, page); + mem_cgroup_update_tree(memcg, nid); } } @@ -1149,64 +1126,88 @@ int mem_cgroup_scan_tasks(struct mem_cgroup *memcg, } #ifdef CONFIG_DEBUG_VM -void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) +void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio) { struct mem_cgroup *memcg; if (mem_cgroup_disabled()) return; - memcg = page_memcg(page); + memcg = folio_memcg(folio); if (!memcg) - VM_BUG_ON_PAGE(lruvec_memcg(lruvec) != root_mem_cgroup, page); + VM_BUG_ON_FOLIO(lruvec_memcg(lruvec) != root_mem_cgroup, folio); else - VM_BUG_ON_PAGE(lruvec_memcg(lruvec) != memcg, page); + VM_BUG_ON_FOLIO(lruvec_memcg(lruvec) != memcg, folio); } #endif /** - * lock_page_lruvec - lock and return lruvec for a given page. - * @page: the page + * folio_lruvec_lock - Lock the lruvec for a folio. + * @folio: Pointer to the folio. * * These functions are safe to use under any of the following conditions: - * - page locked - * - PageLRU cleared - * - lock_page_memcg() - * - page->_refcount is zero + * - folio locked + * - folio_test_lru false + * - folio_memcg_lock() + * - folio frozen (refcount of 0) + * + * Return: The lruvec this folio is on with its lock held. */ -struct lruvec *lock_page_lruvec(struct page *page) +struct lruvec *folio_lruvec_lock(struct folio *folio) { - struct lruvec *lruvec; + struct lruvec *lruvec = folio_lruvec(folio); - lruvec = mem_cgroup_page_lruvec(page); spin_lock(&lruvec->lru_lock); - - lruvec_memcg_debug(lruvec, page); + lruvec_memcg_debug(lruvec, folio); return lruvec; } -struct lruvec *lock_page_lruvec_irq(struct page *page) +/** + * folio_lruvec_lock_irq - Lock the lruvec for a folio. + * @folio: Pointer to the folio. + * + * These functions are safe to use under any of the following conditions: + * - folio locked + * - folio_test_lru false + * - folio_memcg_lock() + * - folio frozen (refcount of 0) + * + * Return: The lruvec this folio is on with its lock held and interrupts + * disabled. + */ +struct lruvec *folio_lruvec_lock_irq(struct folio *folio) { - struct lruvec *lruvec; + struct lruvec *lruvec = folio_lruvec(folio); - lruvec = mem_cgroup_page_lruvec(page); spin_lock_irq(&lruvec->lru_lock); - - lruvec_memcg_debug(lruvec, page); + lruvec_memcg_debug(lruvec, folio); return lruvec; } -struct lruvec *lock_page_lruvec_irqsave(struct page *page, unsigned long *flags) +/** + * folio_lruvec_lock_irqsave - Lock the lruvec for a folio. + * @folio: Pointer to the folio. + * @flags: Pointer to irqsave flags. + * + * These functions are safe to use under any of the following conditions: + * - folio locked + * - folio_test_lru false + * - folio_memcg_lock() + * - folio frozen (refcount of 0) + * + * Return: The lruvec this folio is on with its lock held and interrupts + * disabled. + */ +struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio, + unsigned long *flags) { - struct lruvec *lruvec; + struct lruvec *lruvec = folio_lruvec(folio); - lruvec = mem_cgroup_page_lruvec(page); spin_lock_irqsave(&lruvec->lru_lock, *flags); - - lruvec_memcg_debug(lruvec, page); + lruvec_memcg_debug(lruvec, folio); return lruvec; } @@ -1956,18 +1957,17 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg) } /** - * lock_page_memcg - lock a page and memcg binding - * @page: the page + * folio_memcg_lock - Bind a folio to its memcg. + * @folio: The folio. * - * This function protects unlocked LRU pages from being moved to + * This function prevents unlocked LRU folios from being moved to * another cgroup. * - * It ensures lifetime of the locked memcg. Caller is responsible - * for the lifetime of the page. + * It ensures lifetime of the bound memcg. The caller is responsible + * for the lifetime of the folio. */ -void lock_page_memcg(struct page *page) +void folio_memcg_lock(struct folio *folio) { - struct page *head = compound_head(page); /* rmap on tail pages */ struct mem_cgroup *memcg; unsigned long flags; @@ -1981,7 +1981,7 @@ void lock_page_memcg(struct page *page) if (mem_cgroup_disabled()) return; again: - memcg = page_memcg(head); + memcg = folio_memcg(folio); if (unlikely(!memcg)) return; @@ -1995,7 +1995,7 @@ again: return; spin_lock_irqsave(&memcg->move_lock, flags); - if (memcg != page_memcg(head)) { + if (memcg != folio_memcg(folio)) { spin_unlock_irqrestore(&memcg->move_lock, flags); goto again; } @@ -2009,9 +2009,15 @@ again: memcg->move_lock_task = current; memcg->move_lock_flags = flags; } +EXPORT_SYMBOL(folio_memcg_lock); + +void lock_page_memcg(struct page *page) +{ + folio_memcg_lock(page_folio(page)); +} EXPORT_SYMBOL(lock_page_memcg); -static void __unlock_page_memcg(struct mem_cgroup *memcg) +static void __folio_memcg_unlock(struct mem_cgroup *memcg) { if (memcg && memcg->move_lock_task == current) { unsigned long flags = memcg->move_lock_flags; @@ -2026,14 +2032,22 @@ static void __unlock_page_memcg(struct mem_cgroup *memcg) } /** - * unlock_page_memcg - unlock a page and memcg binding - * @page: the page + * folio_memcg_unlock - Release the binding between a folio and its memcg. + * @folio: The folio. + * + * This releases the binding created by folio_memcg_lock(). This does + * not change the accounting of this folio to its memcg, but it does + * permit others to change it. */ -void unlock_page_memcg(struct page *page) +void folio_memcg_unlock(struct folio *folio) { - struct page *head = compound_head(page); + __folio_memcg_unlock(folio_memcg(folio)); +} +EXPORT_SYMBOL(folio_memcg_unlock); - __unlock_page_memcg(page_memcg(head)); +void unlock_page_memcg(struct page *page) +{ + folio_memcg_unlock(page_folio(page)); } EXPORT_SYMBOL(unlock_page_memcg); @@ -2734,9 +2748,9 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages) } #endif -static void commit_charge(struct page *page, struct mem_cgroup *memcg) +static void commit_charge(struct folio *folio, struct mem_cgroup *memcg) { - VM_BUG_ON_PAGE(page_memcg(page), page); + VM_BUG_ON_FOLIO(folio_memcg(folio), folio); /* * Any of the following ensures page's memcg stability: * @@ -2745,7 +2759,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg) * - lock_page_memcg() * - exclusive reference */ - page->memcg_data = (unsigned long)memcg; + folio->memcg_data = (unsigned long)memcg; } static struct mem_cgroup *get_mem_cgroup_from_objcg(struct obj_cgroup *objcg) @@ -3015,15 +3029,16 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order) */ void __memcg_kmem_uncharge_page(struct page *page, int order) { + struct folio *folio = page_folio(page); struct obj_cgroup *objcg; unsigned int nr_pages = 1 << order; - if (!PageMemcgKmem(page)) + if (!folio_memcg_kmem(folio)) return; - objcg = __page_objcg(page); + objcg = __folio_objcg(folio); obj_cgroup_uncharge_pages(objcg, nr_pages); - page->memcg_data = 0; + folio->memcg_data = 0; obj_cgroup_put(objcg); } @@ -3257,17 +3272,18 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size) */ void split_page_memcg(struct page *head, unsigned int nr) { - struct mem_cgroup *memcg = page_memcg(head); + struct folio *folio = page_folio(head); + struct mem_cgroup *memcg = folio_memcg(folio); int i; if (mem_cgroup_disabled() || !memcg) return; for (i = 1; i < nr; i++) - head[i].memcg_data = head->memcg_data; + folio_page(folio, i)->memcg_data = folio->memcg_data; - if (PageMemcgKmem(head)) - obj_cgroup_get_many(__page_objcg(head), nr - 1); + if (folio_memcg_kmem(folio)) + obj_cgroup_get_many(__folio_objcg(folio), nr - 1); else css_get_many(&memcg->css, nr - 1); } @@ -3381,7 +3397,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, if (order > 0) return 0; - mctz = soft_limit_tree_node(pgdat->node_id); + mctz = soft_limit_tree.rb_tree_per_node[pgdat->node_id]; /* * Do not even bother to check the largest node if the root @@ -4537,17 +4553,17 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, * As being wrong occasionally doesn't matter, updates and accesses to the * records are lockless and racy. */ -void mem_cgroup_track_foreign_dirty_slowpath(struct page *page, +void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio, struct bdi_writeback *wb) { - struct mem_cgroup *memcg = page_memcg(page); + struct mem_cgroup *memcg = folio_memcg(folio); struct memcg_cgwb_frn *frn; u64 now = get_jiffies_64(); u64 oldest_at = now; int oldest = -1; int i; - trace_track_foreign_dirty(page, wb); + trace_track_foreign_dirty(folio, wb); /* * Pick the slot to use. If there is already a slot for @wb, keep @@ -5575,38 +5591,39 @@ static int mem_cgroup_move_account(struct page *page, struct mem_cgroup *from, struct mem_cgroup *to) { + struct folio *folio = page_folio(page); struct lruvec *from_vec, *to_vec; struct pglist_data *pgdat; - unsigned int nr_pages = compound ? thp_nr_pages(page) : 1; - int ret; + unsigned int nr_pages = compound ? folio_nr_pages(folio) : 1; + int nid, ret; VM_BUG_ON(from == to); - VM_BUG_ON_PAGE(PageLRU(page), page); - VM_BUG_ON(compound && !PageTransHuge(page)); + VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); + VM_BUG_ON(compound && !folio_test_multi(folio)); /* * Prevent mem_cgroup_migrate() from looking at * page's memory cgroup of its source page while we change it. */ ret = -EBUSY; - if (!trylock_page(page)) + if (!folio_trylock(folio)) goto out; ret = -EINVAL; - if (page_memcg(page) != from) + if (folio_memcg(folio) != from) goto out_unlock; - pgdat = page_pgdat(page); + pgdat = folio_pgdat(folio); from_vec = mem_cgroup_lruvec(from, pgdat); to_vec = mem_cgroup_lruvec(to, pgdat); - lock_page_memcg(page); + folio_memcg_lock(folio); - if (PageAnon(page)) { - if (page_mapped(page)) { + if (folio_test_anon(folio)) { + if (folio_mapped(folio)) { __mod_lruvec_state(from_vec, NR_ANON_MAPPED, -nr_pages); __mod_lruvec_state(to_vec, NR_ANON_MAPPED, nr_pages); - if (PageTransHuge(page)) { + if (folio_test_transhuge(folio)) { __mod_lruvec_state(from_vec, NR_ANON_THPS, -nr_pages); __mod_lruvec_state(to_vec, NR_ANON_THPS, @@ -5617,18 +5634,18 @@ static int mem_cgroup_move_account(struct page *page, __mod_lruvec_state(from_vec, NR_FILE_PAGES, -nr_pages); __mod_lruvec_state(to_vec, NR_FILE_PAGES, nr_pages); - if (PageSwapBacked(page)) { + if (folio_test_swapbacked(folio)) { __mod_lruvec_state(from_vec, NR_SHMEM, -nr_pages); __mod_lruvec_state(to_vec, NR_SHMEM, nr_pages); } - if (page_mapped(page)) { + if (folio_mapped(folio)) { __mod_lruvec_state(from_vec, NR_FILE_MAPPED, -nr_pages); __mod_lruvec_state(to_vec, NR_FILE_MAPPED, nr_pages); } - if (PageDirty(page)) { - struct address_space *mapping = page_mapping(page); + if (folio_test_dirty(folio)) { + struct address_space *mapping = folio_mapping(folio); if (mapping_can_writeback(mapping)) { __mod_lruvec_state(from_vec, NR_FILE_DIRTY, @@ -5639,7 +5656,7 @@ static int mem_cgroup_move_account(struct page *page, } } - if (PageWriteback(page)) { + if (folio_test_writeback(folio)) { __mod_lruvec_state(from_vec, NR_WRITEBACK, -nr_pages); __mod_lruvec_state(to_vec, NR_WRITEBACK, nr_pages); } @@ -5662,20 +5679,21 @@ static int mem_cgroup_move_account(struct page *page, css_get(&to->css); css_put(&from->css); - page->memcg_data = (unsigned long)to; + folio->memcg_data = (unsigned long)to; - __unlock_page_memcg(from); + __folio_memcg_unlock(from); ret = 0; + nid = folio_nid(folio); local_irq_disable(); - mem_cgroup_charge_statistics(to, page, nr_pages); - memcg_check_events(to, page); - mem_cgroup_charge_statistics(from, page, -nr_pages); - memcg_check_events(from, page); + mem_cgroup_charge_statistics(to, nr_pages); + memcg_check_events(to, nid); + mem_cgroup_charge_statistics(from, -nr_pages); + memcg_check_events(from, nid); local_irq_enable(); out_unlock: - unlock_page(page); + folio_unlock(folio); out: return ret; } @@ -6680,9 +6698,10 @@ void mem_cgroup_calculate_protection(struct mem_cgroup *root, atomic_long_read(&parent->memory.children_low_usage))); } -static int charge_memcg(struct page *page, struct mem_cgroup *memcg, gfp_t gfp) +static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg, + gfp_t gfp) { - unsigned int nr_pages = thp_nr_pages(page); + long nr_pages = folio_nr_pages(folio); int ret; ret = try_charge(memcg, gfp, nr_pages); @@ -6690,38 +6709,23 @@ static int charge_memcg(struct page *page, struct mem_cgroup *memcg, gfp_t gfp) goto out; css_get(&memcg->css); - commit_charge(page, memcg); + commit_charge(folio, memcg); local_irq_disable(); - mem_cgroup_charge_statistics(memcg, page, nr_pages); - memcg_check_events(memcg, page); + mem_cgroup_charge_statistics(memcg, nr_pages); + memcg_check_events(memcg, folio_nid(folio)); local_irq_enable(); out: return ret; } -/** - * __mem_cgroup_charge - charge a newly allocated page to a cgroup - * @page: page to charge - * @mm: mm context of the victim - * @gfp_mask: reclaim mode - * - * Try to charge @page to the memcg that @mm belongs to, reclaiming - * pages according to @gfp_mask if necessary. if @mm is NULL, try to - * charge to the active memcg. - * - * Do not use this for pages allocated for swapin. - * - * Returns 0 on success. Otherwise, an error code is returned. - */ -int __mem_cgroup_charge(struct page *page, struct mm_struct *mm, - gfp_t gfp_mask) +int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp) { struct mem_cgroup *memcg; int ret; memcg = get_mem_cgroup_from_mm(mm); - ret = charge_memcg(page, memcg, gfp_mask); + ret = charge_memcg(folio, memcg, gfp); css_put(&memcg->css); return ret; @@ -6742,6 +6746,7 @@ int __mem_cgroup_charge(struct page *page, struct mm_struct *mm, int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry) { + struct folio *folio = page_folio(page); struct mem_cgroup *memcg; unsigned short id; int ret; @@ -6756,7 +6761,7 @@ int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm, memcg = get_mem_cgroup_from_mm(mm); rcu_read_unlock(); - ret = charge_memcg(page, memcg, gfp); + ret = charge_memcg(folio, memcg, gfp); css_put(&memcg->css); return ret; @@ -6800,7 +6805,7 @@ struct uncharge_gather { unsigned long nr_memory; unsigned long pgpgout; unsigned long nr_kmem; - struct page *dummy_page; + int nid; }; static inline void uncharge_gather_clear(struct uncharge_gather *ug) @@ -6824,36 +6829,36 @@ static void uncharge_batch(const struct uncharge_gather *ug) local_irq_save(flags); __count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout); __this_cpu_add(ug->memcg->vmstats_percpu->nr_page_events, ug->nr_memory); - memcg_check_events(ug->memcg, ug->dummy_page); + memcg_check_events(ug->memcg, ug->nid); local_irq_restore(flags); - /* drop reference from uncharge_page */ + /* drop reference from uncharge_folio */ css_put(&ug->memcg->css); } -static void uncharge_page(struct page *page, struct uncharge_gather *ug) +static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug) { - unsigned long nr_pages; + long nr_pages; struct mem_cgroup *memcg; struct obj_cgroup *objcg; - bool use_objcg = PageMemcgKmem(page); + bool use_objcg = folio_memcg_kmem(folio); - VM_BUG_ON_PAGE(PageLRU(page), page); + VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); /* * Nobody should be changing or seriously looking at - * page memcg or objcg at this point, we have fully - * exclusive access to the page. + * folio memcg or objcg at this point, we have fully + * exclusive access to the folio. */ if (use_objcg) { - objcg = __page_objcg(page); + objcg = __folio_objcg(folio); /* * This get matches the put at the end of the function and * kmem pages do not hold memcg references anymore. */ memcg = get_mem_cgroup_from_objcg(objcg); } else { - memcg = __page_memcg(page); + memcg = __folio_memcg(folio); } if (!memcg) @@ -6865,19 +6870,19 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug) uncharge_gather_clear(ug); } ug->memcg = memcg; - ug->dummy_page = page; + ug->nid = folio_nid(folio); /* pairs with css_put in uncharge_batch */ css_get(&memcg->css); } - nr_pages = compound_nr(page); + nr_pages = folio_nr_pages(folio); if (use_objcg) { ug->nr_memory += nr_pages; ug->nr_kmem += nr_pages; - page->memcg_data = 0; + folio->memcg_data = 0; obj_cgroup_put(objcg); } else { /* LRU pages aren't accounted at the root level */ @@ -6885,28 +6890,22 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug) ug->nr_memory += nr_pages; ug->pgpgout++; - page->memcg_data = 0; + folio->memcg_data = 0; } css_put(&memcg->css); } -/** - * __mem_cgroup_uncharge - uncharge a page - * @page: page to uncharge - * - * Uncharge a page previously charged with __mem_cgroup_charge(). - */ -void __mem_cgroup_uncharge(struct page *page) +void __mem_cgroup_uncharge(struct folio *folio) { struct uncharge_gather ug; - /* Don't touch page->lru of any random page, pre-check: */ - if (!page_memcg(page)) + /* Don't touch folio->lru of any random page, pre-check: */ + if (!folio_memcg(folio)) return; uncharge_gather_clear(&ug); - uncharge_page(page, &ug); + uncharge_folio(folio, &ug); uncharge_batch(&ug); } @@ -6920,52 +6919,49 @@ void __mem_cgroup_uncharge(struct page *page) void __mem_cgroup_uncharge_list(struct list_head *page_list) { struct uncharge_gather ug; - struct page *page; + struct folio *folio; uncharge_gather_clear(&ug); - list_for_each_entry(page, page_list, lru) - uncharge_page(page, &ug); + list_for_each_entry(folio, page_list, lru) + uncharge_folio(folio, &ug); if (ug.memcg) uncharge_batch(&ug); } /** - * mem_cgroup_migrate - charge a page's replacement - * @oldpage: currently circulating page - * @newpage: replacement page + * mem_cgroup_migrate - Charge a folio's replacement. + * @old: Currently circulating folio. + * @new: Replacement folio. * - * Charge @newpage as a replacement page for @oldpage. @oldpage will + * Charge @new as a replacement folio for @old. @old will * be uncharged upon free. * - * Both pages must be locked, @newpage->mapping must be set up. + * Both folios must be locked, @new->mapping must be set up. */ -void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) +void mem_cgroup_migrate(struct folio *old, struct folio *new) { struct mem_cgroup *memcg; - unsigned int nr_pages; + long nr_pages = folio_nr_pages(new); unsigned long flags; - VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage); - VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); - VM_BUG_ON_PAGE(PageAnon(oldpage) != PageAnon(newpage), newpage); - VM_BUG_ON_PAGE(PageTransHuge(oldpage) != PageTransHuge(newpage), - newpage); + VM_BUG_ON_FOLIO(!folio_test_locked(old), old); + VM_BUG_ON_FOLIO(!folio_test_locked(new), new); + VM_BUG_ON_FOLIO(folio_test_anon(old) != folio_test_anon(new), new); + VM_BUG_ON_FOLIO(folio_nr_pages(old) != nr_pages, new); if (mem_cgroup_disabled()) return; - /* Page cache replacement: new page already charged? */ - if (page_memcg(newpage)) + /* Page cache replacement: new folio already charged? */ + if (folio_memcg(new)) return; - memcg = page_memcg(oldpage); - VM_WARN_ON_ONCE_PAGE(!memcg, oldpage); + memcg = folio_memcg(old); + VM_WARN_ON_ONCE_FOLIO(!memcg, old); if (!memcg) return; /* Force-charge the new page. The old one will be freed soon */ - nr_pages = thp_nr_pages(newpage); - if (!mem_cgroup_is_root(memcg)) { page_counter_charge(&memcg->memory, nr_pages); if (do_memsw_account()) @@ -6973,11 +6969,11 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) } css_get(&memcg->css); - commit_charge(newpage, memcg); + commit_charge(new, memcg); local_irq_save(flags); - mem_cgroup_charge_statistics(memcg, newpage, nr_pages); - memcg_check_events(memcg, newpage); + mem_cgroup_charge_statistics(memcg, nr_pages); + memcg_check_events(memcg, folio_nid(new)); local_irq_restore(flags); } @@ -7204,8 +7200,8 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) * only synchronisation we have for updating the per-CPU variables. */ VM_BUG_ON(!irqs_disabled()); - mem_cgroup_charge_statistics(memcg, page, -nr_entries); - memcg_check_events(memcg, page); + mem_cgroup_charge_statistics(memcg, -nr_entries); + memcg_check_events(memcg, page_to_nid(page)); css_put(&memcg->css); } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index bdbbb32211a5..93078a2859a7 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -762,7 +762,7 @@ static int delete_from_lru_cache(struct page *p) * Poisoned page might never drop its ref count to 0 so we have * to uncharge it manually from its memcg. */ - mem_cgroup_uncharge(p); + mem_cgroup_uncharge(page_folio(p)); /* * drop the page count elevated by isolate_lru_page() diff --git a/mm/memory.c b/mm/memory.c index c52be6d6b605..4b1de80c2a9c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -990,7 +990,7 @@ page_copy_prealloc(struct mm_struct *src_mm, struct vm_area_struct *vma, if (!new_page) return NULL; - if (mem_cgroup_charge(new_page, src_mm, GFP_KERNEL)) { + if (mem_cgroup_charge(page_folio(new_page), src_mm, GFP_KERNEL)) { put_page(new_page); return NULL; } @@ -3019,7 +3019,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) } } - if (mem_cgroup_charge(new_page, mm, GFP_KERNEL)) + if (mem_cgroup_charge(page_folio(new_page), mm, GFP_KERNEL)) goto oom_free_new; cgroup_throttle_swaprate(new_page, GFP_KERNEL); @@ -3539,7 +3539,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) shadow = get_shadow_from_swap_cache(entry); if (shadow) - workingset_refault(page, shadow); + workingset_refault(page_folio(page), + shadow); lru_cache_add(page); @@ -3769,7 +3770,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) if (!page) goto oom; - if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL)) + if (mem_cgroup_charge(page_folio(page), vma->vm_mm, GFP_KERNEL)) goto oom_free_page; cgroup_throttle_swaprate(page, GFP_KERNEL); @@ -4202,7 +4203,8 @@ static vm_fault_t do_cow_fault(struct vm_fault *vmf) if (!vmf->cow_page) return VM_FAULT_OOM; - if (mem_cgroup_charge(vmf->cow_page, vma->vm_mm, GFP_KERNEL)) { + if (mem_cgroup_charge(page_folio(vmf->cow_page), vma->vm_mm, + GFP_KERNEL)) { put_page(vmf->cow_page); return VM_FAULT_OOM; } @@ -4267,7 +4269,7 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf) * We enter with non-exclusive mmap_lock (to exclude vma changes, * but allow concurrent faults). * The mmap_lock may have been released depending on flags and our - * return value. See filemap_fault() and __lock_page_or_retry(). + * return value. See filemap_fault() and __folio_lock_or_retry(). * If mmap_lock is released, vma may become invalid (for example * by other thread calling munmap()). */ @@ -4508,7 +4510,7 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) * concurrent faults). * * The mmap_lock may have been released depending on flags and our return value. - * See filemap_fault() and __lock_page_or_retry(). + * See filemap_fault() and __folio_lock_or_retry(). */ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) { @@ -4612,7 +4614,7 @@ unlock: * By the time we get here, we already hold the mm semaphore * * The mmap_lock may have been released depending on flags and our - * return value. See filemap_fault() and __lock_page_or_retry(). + * return value. See filemap_fault() and __folio_lock_or_retry(). */ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, unsigned int flags) @@ -4768,7 +4770,7 @@ static inline void mm_account_fault(struct pt_regs *regs, * By the time we get here, we already hold the mm semaphore * * The mmap_lock may have been released depending on flags and our - * return value. See filemap_fault() and __lock_page_or_retry(). + * return value. See filemap_fault() and __folio_lock_or_retry(). */ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, unsigned int flags, struct pt_regs *regs) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d12e0608fced..f4b4be7af4d3 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2196,6 +2196,16 @@ struct page *alloc_pages(gfp_t gfp, unsigned order) } EXPORT_SYMBOL(alloc_pages); +struct folio *folio_alloc(gfp_t gfp, unsigned order) +{ + struct page *page = alloc_pages(gfp | __GFP_COMP, order); + + if (page && order > 1) + prep_transhuge_page(page); + return (struct folio *)page; +} +EXPORT_SYMBOL(folio_alloc); + int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) { struct mempolicy *pol = mpol_dup(vma_policy(src)); diff --git a/mm/memremap.c b/mm/memremap.c index ed593bf87109..5a66a71ab591 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -505,7 +505,7 @@ void free_devmap_managed_page(struct page *page) __ClearPageWaiters(page); - mem_cgroup_uncharge(page); + mem_cgroup_uncharge(page_folio(page)); /* * When a device_private page is freed, the page->mapping field diff --git a/mm/migrate.c b/mm/migrate.c index 1852d787e6ab..efa9941ebe03 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -364,7 +364,7 @@ static int expected_page_refs(struct address_space *mapping, struct page *page) */ expected_count += is_device_private_page(page); if (mapping) - expected_count += thp_nr_pages(page) + page_has_private(page); + expected_count += compound_nr(page) + page_has_private(page); return expected_count; } @@ -377,74 +377,75 @@ static int expected_page_refs(struct address_space *mapping, struct page *page) * 2 for pages with a mapping * 3 for pages with a mapping and PagePrivate/PagePrivate2 set. */ -int migrate_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page, int extra_count) +int folio_migrate_mapping(struct address_space *mapping, + struct folio *newfolio, struct folio *folio, int extra_count) { - XA_STATE(xas, &mapping->i_pages, page_index(page)); + XA_STATE(xas, &mapping->i_pages, folio_index(folio)); struct zone *oldzone, *newzone; int dirty; - int expected_count = expected_page_refs(mapping, page) + extra_count; - int nr = thp_nr_pages(page); + int expected_count = expected_page_refs(mapping, &folio->page) + extra_count; + long nr = folio_nr_pages(folio); if (!mapping) { /* Anonymous page without mapping */ - if (page_count(page) != expected_count) + if (folio_ref_count(folio) != expected_count) return -EAGAIN; /* No turning back from here */ - newpage->index = page->index; - newpage->mapping = page->mapping; - if (PageSwapBacked(page)) - __SetPageSwapBacked(newpage); + newfolio->index = folio->index; + newfolio->mapping = folio->mapping; + if (folio_test_swapbacked(folio)) + __folio_set_swapbacked(newfolio); return MIGRATEPAGE_SUCCESS; } - oldzone = page_zone(page); - newzone = page_zone(newpage); + oldzone = folio_zone(folio); + newzone = folio_zone(newfolio); xas_lock_irq(&xas); - if (page_count(page) != expected_count || xas_load(&xas) != page) { + if (folio_ref_count(folio) != expected_count || + xas_load(&xas) != folio) { xas_unlock_irq(&xas); return -EAGAIN; } - if (!page_ref_freeze(page, expected_count)) { + if (!folio_ref_freeze(folio, expected_count)) { xas_unlock_irq(&xas); return -EAGAIN; } /* - * Now we know that no one else is looking at the page: + * Now we know that no one else is looking at the folio: * no turning back from here. */ - newpage->index = page->index; - newpage->mapping = page->mapping; - page_ref_add(newpage, nr); /* add cache reference */ - if (PageSwapBacked(page)) { - __SetPageSwapBacked(newpage); - if (PageSwapCache(page)) { - SetPageSwapCache(newpage); - set_page_private(newpage, page_private(page)); + newfolio->index = folio->index; + newfolio->mapping = folio->mapping; + folio_ref_add(newfolio, nr); /* add cache reference */ + if (folio_test_swapbacked(folio)) { + __folio_set_swapbacked(newfolio); + if (folio_test_swapcache(folio)) { + folio_set_swapcache(newfolio); + newfolio->private = folio_get_private(folio); } } else { - VM_BUG_ON_PAGE(PageSwapCache(page), page); + VM_BUG_ON_FOLIO(folio_test_swapcache(folio), folio); } /* Move dirty while page refs frozen and newpage not yet exposed */ - dirty = PageDirty(page); + dirty = folio_test_dirty(folio); if (dirty) { - ClearPageDirty(page); - SetPageDirty(newpage); + folio_clear_dirty(folio); + folio_set_dirty(newfolio); } - xas_store(&xas, newpage); - if (PageTransHuge(page)) { + xas_store(&xas, newfolio); + if (nr > 1) { int i; for (i = 1; i < nr; i++) { xas_next(&xas); - xas_store(&xas, newpage); + xas_store(&xas, newfolio); } } @@ -453,7 +454,7 @@ int migrate_page_move_mapping(struct address_space *mapping, * to one less reference. * We know this isn't the last reference. */ - page_ref_unfreeze(page, expected_count - nr); + folio_ref_unfreeze(folio, expected_count - nr); xas_unlock(&xas); /* Leave irq disabled to prevent preemption while updating stats */ @@ -472,18 +473,18 @@ int migrate_page_move_mapping(struct address_space *mapping, struct lruvec *old_lruvec, *new_lruvec; struct mem_cgroup *memcg; - memcg = page_memcg(page); + memcg = folio_memcg(folio); old_lruvec = mem_cgroup_lruvec(memcg, oldzone->zone_pgdat); new_lruvec = mem_cgroup_lruvec(memcg, newzone->zone_pgdat); __mod_lruvec_state(old_lruvec, NR_FILE_PAGES, -nr); __mod_lruvec_state(new_lruvec, NR_FILE_PAGES, nr); - if (PageSwapBacked(page) && !PageSwapCache(page)) { + if (folio_test_swapbacked(folio) && !folio_test_swapcache(folio)) { __mod_lruvec_state(old_lruvec, NR_SHMEM, -nr); __mod_lruvec_state(new_lruvec, NR_SHMEM, nr); } #ifdef CONFIG_SWAP - if (PageSwapCache(page)) { + if (folio_test_swapcache(folio)) { __mod_lruvec_state(old_lruvec, NR_SWAPCACHE, -nr); __mod_lruvec_state(new_lruvec, NR_SWAPCACHE, nr); } @@ -499,11 +500,11 @@ int migrate_page_move_mapping(struct address_space *mapping, return MIGRATEPAGE_SUCCESS; } -EXPORT_SYMBOL(migrate_page_move_mapping); +EXPORT_SYMBOL(folio_migrate_mapping); /* * The expected number of remaining references is the same as that - * of migrate_page_move_mapping(). + * of folio_migrate_mapping(). */ int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page) @@ -538,91 +539,87 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, } /* - * Copy the page to its new location + * Copy the flags and some other ancillary information */ -void migrate_page_states(struct page *newpage, struct page *page) +void folio_migrate_flags(struct folio *newfolio, struct folio *folio) { int cpupid; - if (PageError(page)) - SetPageError(newpage); - if (PageReferenced(page)) - SetPageReferenced(newpage); - if (PageUptodate(page)) - SetPageUptodate(newpage); - if (TestClearPageActive(page)) { - VM_BUG_ON_PAGE(PageUnevictable(page), page); - SetPageActive(newpage); - } else if (TestClearPageUnevictable(page)) - SetPageUnevictable(newpage); - if (PageWorkingset(page)) - SetPageWorkingset(newpage); - if (PageChecked(page)) - SetPageChecked(newpage); - if (PageMappedToDisk(page)) - SetPageMappedToDisk(newpage); - - /* Move dirty on pages not done by migrate_page_move_mapping() */ - if (PageDirty(page)) - SetPageDirty(newpage); - - if (page_is_young(page)) - set_page_young(newpage); - if (page_is_idle(page)) - set_page_idle(newpage); + if (folio_test_error(folio)) + folio_set_error(newfolio); + if (folio_test_referenced(folio)) + folio_set_referenced(newfolio); + if (folio_test_uptodate(folio)) + folio_mark_uptodate(newfolio); + if (folio_test_clear_active(folio)) { + VM_BUG_ON_FOLIO(folio_test_unevictable(folio), folio); + folio_set_active(newfolio); + } else if (folio_test_clear_unevictable(folio)) + folio_set_unevictable(newfolio); + if (folio_test_workingset(folio)) + folio_set_workingset(newfolio); + if (folio_test_checked(folio)) + folio_set_checked(newfolio); + if (folio_test_mappedtodisk(folio)) + folio_set_mappedtodisk(newfolio); + + /* Move dirty on pages not done by folio_migrate_mapping() */ + if (folio_test_dirty(folio)) + folio_set_dirty(newfolio); + + if (folio_test_young(folio)) + folio_set_young(newfolio); + if (folio_test_idle(folio)) + folio_set_idle(newfolio); /* * Copy NUMA information to the new page, to prevent over-eager * future migrations of this same page. */ - cpupid = page_cpupid_xchg_last(page, -1); - page_cpupid_xchg_last(newpage, cpupid); + cpupid = page_cpupid_xchg_last(&folio->page, -1); + page_cpupid_xchg_last(&newfolio->page, cpupid); - ksm_migrate_page(newpage, page); + folio_migrate_ksm(newfolio, folio); /* * Please do not reorder this without considering how mm/ksm.c's * get_ksm_page() depends upon ksm_migrate_page() and PageSwapCache(). */ - if (PageSwapCache(page)) - ClearPageSwapCache(page); - ClearPagePrivate(page); + if (folio_test_swapcache(folio)) + folio_clear_swapcache(folio); + folio_clear_private(folio); /* page->private contains hugetlb specific flags */ - if (!PageHuge(page)) - set_page_private(page, 0); + if (!folio_test_hugetlb(folio)) + folio->private = NULL; /* * If any waiters have accumulated on the new page then * wake them up. */ - if (PageWriteback(newpage)) - end_page_writeback(newpage); + if (folio_test_writeback(newfolio)) + folio_end_writeback(newfolio); /* * PG_readahead shares the same bit with PG_reclaim. The above * end_page_writeback() may clear PG_readahead mistakenly, so set the * bit after that. */ - if (PageReadahead(page)) - SetPageReadahead(newpage); + if (folio_test_readahead(folio)) + folio_set_readahead(newfolio); - copy_page_owner(page, newpage); + folio_copy_owner(newfolio, folio); - if (!PageHuge(page)) - mem_cgroup_migrate(page, newpage); + if (!folio_test_hugetlb(folio)) + mem_cgroup_migrate(folio, newfolio); } -EXPORT_SYMBOL(migrate_page_states); +EXPORT_SYMBOL(folio_migrate_flags); -void migrate_page_copy(struct page *newpage, struct page *page) +void folio_migrate_copy(struct folio *newfolio, struct folio *folio) { - if (PageHuge(page) || PageTransHuge(page)) - copy_huge_page(newpage, page); - else - copy_highpage(newpage, page); - - migrate_page_states(newpage, page); + folio_copy(newfolio, folio); + folio_migrate_flags(newfolio, folio); } -EXPORT_SYMBOL(migrate_page_copy); +EXPORT_SYMBOL(folio_migrate_copy); /************************************************************ * Migration functions @@ -638,19 +635,21 @@ int migrate_page(struct address_space *mapping, struct page *newpage, struct page *page, enum migrate_mode mode) { + struct folio *newfolio = page_folio(newpage); + struct folio *folio = page_folio(page); int rc; - BUG_ON(PageWriteback(page)); /* Writeback must be complete */ + BUG_ON(folio_test_writeback(folio)); /* Writeback must be complete */ - rc = migrate_page_move_mapping(mapping, newpage, page, 0); + rc = folio_migrate_mapping(mapping, newfolio, folio, 0); if (rc != MIGRATEPAGE_SUCCESS) return rc; if (mode != MIGRATE_SYNC_NO_COPY) - migrate_page_copy(newpage, page); + folio_migrate_copy(newfolio, folio); else - migrate_page_states(newpage, page); + folio_migrate_flags(newfolio, folio); return MIGRATEPAGE_SUCCESS; } EXPORT_SYMBOL(migrate_page); @@ -2468,7 +2467,7 @@ static void migrate_vma_collect(struct migrate_vma *migrate) * @page: struct page to check * * Pinned pages cannot be migrated. This is the same test as in - * migrate_page_move_mapping(), except that here we allow migration of a + * folio_migrate_mapping(), except that here we allow migration of a * ZONE_DEVICE page. */ static bool migrate_vma_check_page(struct page *page) @@ -2846,7 +2845,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate, if (unlikely(anon_vma_prepare(vma))) goto abort; - if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL)) + if (mem_cgroup_charge(page_folio(page), vma->vm_mm, GFP_KERNEL)) goto abort; /* diff --git a/mm/mlock.c b/mm/mlock.c index 16d2ee160d43..e263d62ae2d0 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -271,6 +271,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) /* Phase 1: page isolation */ for (i = 0; i < nr; i++) { struct page *page = pvec->pages[i]; + struct folio *folio = page_folio(page); if (TestClearPageMlocked(page)) { /* @@ -278,7 +279,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) * so we can spare the get_page() here. */ if (TestClearPageLRU(page)) { - lruvec = relock_page_lruvec_irq(page, lruvec); + lruvec = folio_lruvec_relock_irq(folio, lruvec); del_page_from_lru_list(page, lruvec); continue; } else diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 4812a17b288c..9c64490171e0 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -562,12 +562,12 @@ static unsigned long wp_next_time(unsigned long cur_time) return cur_time; } -static void wb_domain_writeout_inc(struct wb_domain *dom, +static void wb_domain_writeout_add(struct wb_domain *dom, struct fprop_local_percpu *completions, - unsigned int max_prop_frac) + unsigned int max_prop_frac, long nr) { - __fprop_inc_percpu_max(&dom->completions, completions, - max_prop_frac); + __fprop_add_percpu_max(&dom->completions, completions, + max_prop_frac, nr); /* First event after period switching was turned off? */ if (unlikely(!dom->period_time)) { /* @@ -583,20 +583,20 @@ static void wb_domain_writeout_inc(struct wb_domain *dom, /* * Increment @wb's writeout completion count and the global writeout - * completion count. Called from test_clear_page_writeback(). + * completion count. Called from __folio_end_writeback(). */ -static inline void __wb_writeout_inc(struct bdi_writeback *wb) +static inline void __wb_writeout_add(struct bdi_writeback *wb, long nr) { struct wb_domain *cgdom; - inc_wb_stat(wb, WB_WRITTEN); - wb_domain_writeout_inc(&global_wb_domain, &wb->completions, - wb->bdi->max_prop_frac); + wb_stat_mod(wb, WB_WRITTEN, nr); + wb_domain_writeout_add(&global_wb_domain, &wb->completions, + wb->bdi->max_prop_frac, nr); cgdom = mem_cgroup_wb_domain(wb); if (cgdom) - wb_domain_writeout_inc(cgdom, wb_memcg_completions(wb), - wb->bdi->max_prop_frac); + wb_domain_writeout_add(cgdom, wb_memcg_completions(wb), + wb->bdi->max_prop_frac, nr); } void wb_writeout_inc(struct bdi_writeback *wb) @@ -604,7 +604,7 @@ void wb_writeout_inc(struct bdi_writeback *wb) unsigned long flags; local_irq_save(flags); - __wb_writeout_inc(wb); + __wb_writeout_add(wb, 1); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(wb_writeout_inc); @@ -1084,7 +1084,7 @@ static void wb_update_write_bandwidth(struct bdi_writeback *wb, * write_bandwidth = --------------------------------------------------- * period * - * @written may have decreased due to account_page_redirty(). + * @written may have decreased due to folio_account_redirty(). * Avoid underflowing @bw calculation. */ bw = written - min(written, wb->written_stamp); @@ -2381,44 +2381,44 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc) } /** - * write_one_page - write out a single page and wait on I/O - * @page: the page to write + * folio_write_one - write out a single folio and wait on I/O. + * @folio: The folio to write. * - * The page must be locked by the caller and will be unlocked upon return. + * The folio must be locked by the caller and will be unlocked upon return. * * Note that the mapping's AS_EIO/AS_ENOSPC flags will be cleared when this * function returns. * * Return: %0 on success, negative error code otherwise */ -int write_one_page(struct page *page) +int folio_write_one(struct folio *folio) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = folio->mapping; int ret = 0; struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, - .nr_to_write = 1, + .nr_to_write = folio_nr_pages(folio), }; - BUG_ON(!PageLocked(page)); + BUG_ON(!folio_test_locked(folio)); - wait_on_page_writeback(page); + folio_wait_writeback(folio); - if (clear_page_dirty_for_io(page)) { - get_page(page); - ret = mapping->a_ops->writepage(page, &wbc); + if (folio_clear_dirty_for_io(folio)) { + folio_get(folio); + ret = mapping->a_ops->writepage(&folio->page, &wbc); if (ret == 0) - wait_on_page_writeback(page); - put_page(page); + folio_wait_writeback(folio); + folio_put(folio); } else { - unlock_page(page); + folio_unlock(folio); } if (!ret) ret = filemap_check_errors(mapping); return ret; } -EXPORT_SYMBOL(write_one_page); +EXPORT_SYMBOL(folio_write_one); /* * For address_spaces which do not use buffers nor write back. @@ -2438,29 +2438,30 @@ EXPORT_SYMBOL(__set_page_dirty_no_writeback); * * NOTE: This relies on being atomic wrt interrupts. */ -static void account_page_dirtied(struct page *page, +static void folio_account_dirtied(struct folio *folio, struct address_space *mapping) { struct inode *inode = mapping->host; - trace_writeback_dirty_page(page, mapping); + trace_writeback_dirty_folio(folio, mapping); if (mapping_can_writeback(mapping)) { struct bdi_writeback *wb; + long nr = folio_nr_pages(folio); - inode_attach_wb(inode, page); + inode_attach_wb(inode, &folio->page); wb = inode_to_wb(inode); - __inc_lruvec_page_state(page, NR_FILE_DIRTY); - __inc_zone_page_state(page, NR_ZONE_WRITE_PENDING); - __inc_node_page_state(page, NR_DIRTIED); - inc_wb_stat(wb, WB_RECLAIMABLE); - inc_wb_stat(wb, WB_DIRTIED); - task_io_account_write(PAGE_SIZE); - current->nr_dirtied++; - __this_cpu_inc(bdp_ratelimits); + __lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, nr); + __zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, nr); + __node_stat_mod_folio(folio, NR_DIRTIED, nr); + wb_stat_mod(wb, WB_RECLAIMABLE, nr); + wb_stat_mod(wb, WB_DIRTIED, nr); + task_io_account_write(nr * PAGE_SIZE); + current->nr_dirtied += nr; + __this_cpu_add(bdp_ratelimits, nr); - mem_cgroup_track_foreign_dirty(page, wb); + mem_cgroup_track_foreign_dirty(folio, wb); } } @@ -2469,130 +2470,152 @@ static void account_page_dirtied(struct page *page, * * Caller must hold lock_page_memcg(). */ -void account_page_cleaned(struct page *page, struct address_space *mapping, +void folio_account_cleaned(struct folio *folio, struct address_space *mapping, struct bdi_writeback *wb) { if (mapping_can_writeback(mapping)) { - dec_lruvec_page_state(page, NR_FILE_DIRTY); - dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); - dec_wb_stat(wb, WB_RECLAIMABLE); - task_io_account_cancelled_write(PAGE_SIZE); + long nr = folio_nr_pages(folio); + lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr); + zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr); + wb_stat_mod(wb, WB_RECLAIMABLE, -nr); + task_io_account_cancelled_write(nr * PAGE_SIZE); } } /* - * Mark the page dirty, and set it dirty in the page cache, and mark the inode - * dirty. + * Mark the folio dirty, and set it dirty in the page cache, and mark + * the inode dirty. * - * If warn is true, then emit a warning if the page is not uptodate and has + * If warn is true, then emit a warning if the folio is not uptodate and has * not been truncated. * * The caller must hold lock_page_memcg(). */ -void __set_page_dirty(struct page *page, struct address_space *mapping, +void __folio_mark_dirty(struct folio *folio, struct address_space *mapping, int warn) { unsigned long flags; xa_lock_irqsave(&mapping->i_pages, flags); - if (page->mapping) { /* Race with truncate? */ - WARN_ON_ONCE(warn && !PageUptodate(page)); - account_page_dirtied(page, mapping); - __xa_set_mark(&mapping->i_pages, page_index(page), + if (folio->mapping) { /* Race with truncate? */ + WARN_ON_ONCE(warn && !folio_test_uptodate(folio)); + folio_account_dirtied(folio, mapping); + __xa_set_mark(&mapping->i_pages, folio_index(folio), PAGECACHE_TAG_DIRTY); } xa_unlock_irqrestore(&mapping->i_pages, flags); } -/* - * For address_spaces which do not use buffers. Just tag the page as dirty in - * the xarray. +/** + * filemap_dirty_folio - Mark a folio dirty for filesystems which do not use buffer_heads. + * @mapping: Address space this folio belongs to. + * @folio: Folio to be marked as dirty. + * + * Filesystems which do not use buffer heads should call this function + * from their set_page_dirty address space operation. It ignores the + * contents of folio_get_private(), so if the filesystem marks individual + * blocks as dirty, the filesystem should handle that itself. * - * This is also used when a single buffer is being dirtied: we want to set the - * page dirty in that case, but not all the buffers. This is a "bottom-up" - * dirtying, whereas __set_page_dirty_buffers() is a "top-down" dirtying. + * This is also sometimes used by filesystems which use buffer_heads when + * a single buffer is being dirtied: we want to set the folio dirty in + * that case, but not all the buffers. This is a "bottom-up" dirtying, + * whereas __set_page_dirty_buffers() is a "top-down" dirtying. * - * The caller must ensure this doesn't race with truncation. Most will simply - * hold the page lock, but e.g. zap_pte_range() calls with the page mapped and - * the pte lock held, which also locks out truncation. + * The caller must ensure this doesn't race with truncation. Most will + * simply hold the folio lock, but e.g. zap_pte_range() calls with the + * folio mapped and the pte lock held, which also locks out truncation. */ -int __set_page_dirty_nobuffers(struct page *page) +bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio) { - lock_page_memcg(page); - if (!TestSetPageDirty(page)) { - struct address_space *mapping = page_mapping(page); + folio_memcg_lock(folio); + if (folio_test_set_dirty(folio)) { + folio_memcg_unlock(folio); + return false; + } - if (!mapping) { - unlock_page_memcg(page); - return 1; - } - __set_page_dirty(page, mapping, !PagePrivate(page)); - unlock_page_memcg(page); + __folio_mark_dirty(folio, mapping, !folio_test_private(folio)); + folio_memcg_unlock(folio); - if (mapping->host) { - /* !PageAnon && !swapper_space */ - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - } - return 1; + if (mapping->host) { + /* !PageAnon && !swapper_space */ + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); } - unlock_page_memcg(page); - return 0; + return true; } -EXPORT_SYMBOL(__set_page_dirty_nobuffers); +EXPORT_SYMBOL(filemap_dirty_folio); -/* - * Call this whenever redirtying a page, to de-account the dirty counters - * (NR_DIRTIED, WB_DIRTIED, tsk->nr_dirtied), so that they match the written - * counters (NR_WRITTEN, WB_WRITTEN) in long term. The mismatches will lead to - * systematic errors in balanced_dirty_ratelimit and the dirty pages position - * control. +/** + * folio_account_redirty - Manually account for redirtying a page. + * @folio: The folio which is being redirtied. + * + * Most filesystems should call folio_redirty_for_writepage() instead + * of this fuction. If your filesystem is doing writeback outside the + * context of a writeback_control(), it can call this when redirtying + * a folio, to de-account the dirty counters (NR_DIRTIED, WB_DIRTIED, + * tsk->nr_dirtied), so that they match the written counters (NR_WRITTEN, + * WB_WRITTEN) in long term. The mismatches will lead to systematic errors + * in balanced_dirty_ratelimit and the dirty pages position control. */ -void account_page_redirty(struct page *page) +void folio_account_redirty(struct folio *folio) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = folio->mapping; if (mapping && mapping_can_writeback(mapping)) { struct inode *inode = mapping->host; struct bdi_writeback *wb; struct wb_lock_cookie cookie = {}; + long nr = folio_nr_pages(folio); wb = unlocked_inode_to_wb_begin(inode, &cookie); - current->nr_dirtied--; - dec_node_page_state(page, NR_DIRTIED); - dec_wb_stat(wb, WB_DIRTIED); + current->nr_dirtied -= nr; + node_stat_mod_folio(folio, NR_DIRTIED, -nr); + wb_stat_mod(wb, WB_DIRTIED, -nr); unlocked_inode_to_wb_end(inode, &cookie); } } -EXPORT_SYMBOL(account_page_redirty); +EXPORT_SYMBOL(folio_account_redirty); -/* - * When a writepage implementation decides that it doesn't want to write this - * page for some reason, it should redirty the locked page via - * redirty_page_for_writepage() and it should then unlock the page and return 0 +/** + * folio_redirty_for_writepage - Decline to write a dirty folio. + * @wbc: The writeback control. + * @folio: The folio. + * + * When a writepage implementation decides that it doesn't want to write + * @folio for some reason, it should call this function, unlock @folio and + * return 0. + * + * Return: True if we redirtied the folio. False if someone else dirtied + * it first. */ -int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page) +bool folio_redirty_for_writepage(struct writeback_control *wbc, + struct folio *folio) { - int ret; + bool ret; + long nr = folio_nr_pages(folio); + + wbc->pages_skipped += nr; + ret = filemap_dirty_folio(folio->mapping, folio); + folio_account_redirty(folio); - wbc->pages_skipped++; - ret = __set_page_dirty_nobuffers(page); - account_page_redirty(page); return ret; } -EXPORT_SYMBOL(redirty_page_for_writepage); +EXPORT_SYMBOL(folio_redirty_for_writepage); -/* - * Dirty a page. +/** + * folio_mark_dirty - Mark a folio as being modified. + * @folio: The folio. * - * For pages with a mapping this should be done under the page lock for the - * benefit of asynchronous memory errors who prefer a consistent dirty state. - * This rule can be broken in some special cases, but should be better not to. + * For folios with a mapping this should be done under the page lock + * for the benefit of asynchronous memory errors who prefer a consistent + * dirty state. This rule can be broken in some special cases, + * but should be better not to. + * + * Return: True if the folio was newly dirtied, false if it was already dirty. */ -int set_page_dirty(struct page *page) +bool folio_mark_dirty(struct folio *folio) { - struct address_space *mapping = page_mapping(page); + struct address_space *mapping = folio_mapping(folio); - page = compound_head(page); if (likely(mapping)) { /* * readahead/lru_deactivate_page could remain @@ -2604,17 +2627,17 @@ int set_page_dirty(struct page *page) * it will confuse readahead and make it restart the size rampup * process. But it's a trivial problem. */ - if (PageReclaim(page)) - ClearPageReclaim(page); - return mapping->a_ops->set_page_dirty(page); + if (folio_test_reclaim(folio)) + folio_clear_reclaim(folio); + return mapping->a_ops->set_page_dirty(&folio->page); } - if (!PageDirty(page)) { - if (!TestSetPageDirty(page)) - return 1; + if (!folio_test_dirty(folio)) { + if (!folio_test_set_dirty(folio)) + return true; } - return 0; + return false; } -EXPORT_SYMBOL(set_page_dirty); +EXPORT_SYMBOL(folio_mark_dirty); /* * set_page_dirty() is racy if the caller has no reference against @@ -2650,49 +2673,49 @@ EXPORT_SYMBOL(set_page_dirty_lock); * page without actually doing it through the VM. Can you say "ext3 is * horribly ugly"? Thought you could. */ -void __cancel_dirty_page(struct page *page) +void __folio_cancel_dirty(struct folio *folio) { - struct address_space *mapping = page_mapping(page); + struct address_space *mapping = folio_mapping(folio); if (mapping_can_writeback(mapping)) { struct inode *inode = mapping->host; struct bdi_writeback *wb; struct wb_lock_cookie cookie = {}; - lock_page_memcg(page); + folio_memcg_lock(folio); wb = unlocked_inode_to_wb_begin(inode, &cookie); - if (TestClearPageDirty(page)) - account_page_cleaned(page, mapping, wb); + if (folio_test_clear_dirty(folio)) + folio_account_cleaned(folio, mapping, wb); unlocked_inode_to_wb_end(inode, &cookie); - unlock_page_memcg(page); + folio_memcg_unlock(folio); } else { - ClearPageDirty(page); + folio_clear_dirty(folio); } } -EXPORT_SYMBOL(__cancel_dirty_page); +EXPORT_SYMBOL(__folio_cancel_dirty); /* - * Clear a page's dirty flag, while caring for dirty memory accounting. - * Returns true if the page was previously dirty. - * - * This is for preparing to put the page under writeout. We leave the page - * tagged as dirty in the xarray so that a concurrent write-for-sync - * can discover it via a PAGECACHE_TAG_DIRTY walk. The ->writepage - * implementation will run either set_page_writeback() or set_page_dirty(), - * at which stage we bring the page's dirty flag and xarray dirty tag - * back into sync. - * - * This incoherency between the page's dirty flag and xarray tag is - * unfortunate, but it only exists while the page is locked. + * Clear a folio's dirty flag, while caring for dirty memory accounting. + * Returns true if the folio was previously dirty. + * + * This is for preparing to put the folio under writeout. We leave + * the folio tagged as dirty in the xarray so that a concurrent + * write-for-sync can discover it via a PAGECACHE_TAG_DIRTY walk. + * The ->writepage implementation will run either folio_start_writeback() + * or folio_mark_dirty(), at which stage we bring the folio's dirty flag + * and xarray dirty tag back into sync. + * + * This incoherency between the folio's dirty flag and xarray tag is + * unfortunate, but it only exists while the folio is locked. */ -int clear_page_dirty_for_io(struct page *page) +bool folio_clear_dirty_for_io(struct folio *folio) { - struct address_space *mapping = page_mapping(page); - int ret = 0; + struct address_space *mapping = folio_mapping(folio); + bool ret = false; - VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); if (mapping && mapping_can_writeback(mapping)) { struct inode *inode = mapping->host; @@ -2705,48 +2728,49 @@ int clear_page_dirty_for_io(struct page *page) * We use this sequence to make sure that * (a) we account for dirty stats properly * (b) we tell the low-level filesystem to - * mark the whole page dirty if it was + * mark the whole folio dirty if it was * dirty in a pagetable. Only to then - * (c) clean the page again and return 1 to + * (c) clean the folio again and return 1 to * cause the writeback. * * This way we avoid all nasty races with the * dirty bit in multiple places and clearing * them concurrently from different threads. * - * Note! Normally the "set_page_dirty(page)" + * Note! Normally the "folio_mark_dirty(folio)" * has no effect on the actual dirty bit - since * that will already usually be set. But we * need the side effects, and it can help us * avoid races. * - * We basically use the page "master dirty bit" + * We basically use the folio "master dirty bit" * as a serialization point for all the different * threads doing their things. */ - if (page_mkclean(page)) - set_page_dirty(page); + if (folio_mkclean(folio)) + folio_mark_dirty(folio); /* * We carefully synchronise fault handlers against - * installing a dirty pte and marking the page dirty + * installing a dirty pte and marking the folio dirty * at this point. We do this by having them hold the - * page lock while dirtying the page, and pages are + * page lock while dirtying the folio, and folios are * always locked coming in here, so we get the desired * exclusion. */ wb = unlocked_inode_to_wb_begin(inode, &cookie); - if (TestClearPageDirty(page)) { - dec_lruvec_page_state(page, NR_FILE_DIRTY); - dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); - dec_wb_stat(wb, WB_RECLAIMABLE); - ret = 1; + if (folio_test_clear_dirty(folio)) { + long nr = folio_nr_pages(folio); + lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr); + zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr); + wb_stat_mod(wb, WB_RECLAIMABLE, -nr); + ret = true; } unlocked_inode_to_wb_end(inode, &cookie); return ret; } - return TestClearPageDirty(page); + return folio_test_clear_dirty(folio); } -EXPORT_SYMBOL(clear_page_dirty_for_io); +EXPORT_SYMBOL(folio_clear_dirty_for_io); static void wb_inode_writeback_start(struct bdi_writeback *wb) { @@ -2766,27 +2790,28 @@ static void wb_inode_writeback_end(struct bdi_writeback *wb) queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL); } -int test_clear_page_writeback(struct page *page) +bool __folio_end_writeback(struct folio *folio) { - struct address_space *mapping = page_mapping(page); - int ret; + long nr = folio_nr_pages(folio); + struct address_space *mapping = folio_mapping(folio); + bool ret; - lock_page_memcg(page); + folio_memcg_lock(folio); if (mapping && mapping_use_writeback_tags(mapping)) { struct inode *inode = mapping->host; struct backing_dev_info *bdi = inode_to_bdi(inode); unsigned long flags; xa_lock_irqsave(&mapping->i_pages, flags); - ret = TestClearPageWriteback(page); + ret = folio_test_clear_writeback(folio); if (ret) { - __xa_clear_mark(&mapping->i_pages, page_index(page), + __xa_clear_mark(&mapping->i_pages, folio_index(folio), PAGECACHE_TAG_WRITEBACK); if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) { struct bdi_writeback *wb = inode_to_wb(inode); - dec_wb_stat(wb, WB_WRITEBACK); - __wb_writeout_inc(wb); + wb_stat_mod(wb, WB_WRITEBACK, -nr); + __wb_writeout_add(wb, nr); if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) wb_inode_writeback_end(wb); @@ -2799,32 +2824,34 @@ int test_clear_page_writeback(struct page *page) xa_unlock_irqrestore(&mapping->i_pages, flags); } else { - ret = TestClearPageWriteback(page); + ret = folio_test_clear_writeback(folio); } if (ret) { - dec_lruvec_page_state(page, NR_WRITEBACK); - dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); - inc_node_page_state(page, NR_WRITTEN); + lruvec_stat_mod_folio(folio, NR_WRITEBACK, -nr); + zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr); + node_stat_mod_folio(folio, NR_WRITTEN, nr); } - unlock_page_memcg(page); + folio_memcg_unlock(folio); return ret; } -int __test_set_page_writeback(struct page *page, bool keep_write) +bool __folio_start_writeback(struct folio *folio, bool keep_write) { - struct address_space *mapping = page_mapping(page); - int ret, access_ret; + long nr = folio_nr_pages(folio); + struct address_space *mapping = folio_mapping(folio); + bool ret; + int access_ret; - lock_page_memcg(page); + folio_memcg_lock(folio); if (mapping && mapping_use_writeback_tags(mapping)) { - XA_STATE(xas, &mapping->i_pages, page_index(page)); + XA_STATE(xas, &mapping->i_pages, folio_index(folio)); struct inode *inode = mapping->host; struct backing_dev_info *bdi = inode_to_bdi(inode); unsigned long flags; xas_lock_irqsave(&xas, flags); xas_load(&xas); - ret = TestSetPageWriteback(page); + ret = folio_test_set_writeback(folio); if (!ret) { bool on_wblist; @@ -2835,84 +2862,105 @@ int __test_set_page_writeback(struct page *page, bool keep_write) if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) { struct bdi_writeback *wb = inode_to_wb(inode); - inc_wb_stat(wb, WB_WRITEBACK); + wb_stat_mod(wb, WB_WRITEBACK, nr); if (!on_wblist) wb_inode_writeback_start(wb); } /* - * We can come through here when swapping anonymous - * pages, so we don't necessarily have an inode to track - * for sync. + * We can come through here when swapping + * anonymous folios, so we don't necessarily + * have an inode to track for sync. */ if (mapping->host && !on_wblist) sb_mark_inode_writeback(mapping->host); } - if (!PageDirty(page)) + if (!folio_test_dirty(folio)) xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY); if (!keep_write) xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE); xas_unlock_irqrestore(&xas, flags); } else { - ret = TestSetPageWriteback(page); + ret = folio_test_set_writeback(folio); } if (!ret) { - inc_lruvec_page_state(page, NR_WRITEBACK); - inc_zone_page_state(page, NR_ZONE_WRITE_PENDING); + lruvec_stat_mod_folio(folio, NR_WRITEBACK, nr); + zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, nr); } - unlock_page_memcg(page); - access_ret = arch_make_page_accessible(page); + folio_memcg_unlock(folio); + access_ret = arch_make_folio_accessible(folio); /* * If writeback has been triggered on a page that cannot be made * accessible, it is too late to recover here. */ - VM_BUG_ON_PAGE(access_ret != 0, page); + VM_BUG_ON_FOLIO(access_ret != 0, folio); return ret; - } -EXPORT_SYMBOL(__test_set_page_writeback); +EXPORT_SYMBOL(__folio_start_writeback); -/* - * Wait for a page to complete writeback +/** + * folio_wait_writeback - Wait for a folio to finish writeback. + * @folio: The folio to wait for. + * + * If the folio is currently being written back to storage, wait for the + * I/O to complete. + * + * Context: Sleeps. Must be called in process context and with + * no spinlocks held. Caller should hold a reference on the folio. + * If the folio is not locked, writeback may start again after writeback + * has finished. */ -void wait_on_page_writeback(struct page *page) +void folio_wait_writeback(struct folio *folio) { - while (PageWriteback(page)) { - trace_wait_on_page_writeback(page, page_mapping(page)); - wait_on_page_bit(page, PG_writeback); + while (folio_test_writeback(folio)) { + trace_folio_wait_writeback(folio, folio_mapping(folio)); + folio_wait_bit(folio, PG_writeback); } } -EXPORT_SYMBOL_GPL(wait_on_page_writeback); +EXPORT_SYMBOL_GPL(folio_wait_writeback); -/* - * Wait for a page to complete writeback. Returns -EINTR if we get a - * fatal signal while waiting. +/** + * folio_wait_writeback_killable - Wait for a folio to finish writeback. + * @folio: The folio to wait for. + * + * If the folio is currently being written back to storage, wait for the + * I/O to complete or a fatal signal to arrive. + * + * Context: Sleeps. Must be called in process context and with + * no spinlocks held. Caller should hold a reference on the folio. + * If the folio is not locked, writeback may start again after writeback + * has finished. + * Return: 0 on success, -EINTR if we get a fatal signal while waiting. */ -int wait_on_page_writeback_killable(struct page *page) +int folio_wait_writeback_killable(struct folio *folio) { - while (PageWriteback(page)) { - trace_wait_on_page_writeback(page, page_mapping(page)); - if (wait_on_page_bit_killable(page, PG_writeback)) + while (folio_test_writeback(folio)) { + trace_folio_wait_writeback(folio, folio_mapping(folio)); + if (folio_wait_bit_killable(folio, PG_writeback)) return -EINTR; } return 0; } -EXPORT_SYMBOL_GPL(wait_on_page_writeback_killable); +EXPORT_SYMBOL_GPL(folio_wait_writeback_killable); /** - * wait_for_stable_page() - wait for writeback to finish, if necessary. - * @page: The page to wait on. + * folio_wait_stable() - wait for writeback to finish, if necessary. + * @folio: The folio to wait on. + * + * This function determines if the given folio is related to a backing + * device that requires folio contents to be held stable during writeback. + * If so, then it will wait for any pending writeback to complete. * - * This function determines if the given page is related to a backing device - * that requires page contents to be held stable during writeback. If so, then - * it will wait for any pending writeback to complete. + * Context: Sleeps. Must be called in process context and with + * no spinlocks held. Caller should hold a reference on the folio. + * If the folio is not locked, writeback may start again after writeback + * has finished. */ -void wait_for_stable_page(struct page *page) +void folio_wait_stable(struct folio *folio) { - page = thp_head(page); - if (page->mapping->host->i_sb->s_iflags & SB_I_STABLE_WRITES) - wait_on_page_writeback(page); + if (folio->mapping->host->i_sb->s_iflags & SB_I_STABLE_WRITES) + folio_wait_writeback(folio); } -EXPORT_SYMBOL_GPL(wait_for_stable_page); +EXPORT_SYMBOL_GPL(folio_wait_stable); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 23d3339ac4e8..fee18ada46a2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -724,7 +724,7 @@ static inline void free_the_page(struct page *page, unsigned int order) void free_compound_page(struct page *page) { - mem_cgroup_uncharge(page); + mem_cgroup_uncharge(page_folio(page)); free_the_page(page, compound_order(page)); } @@ -5406,6 +5406,18 @@ out: } EXPORT_SYMBOL(__alloc_pages); +struct folio *__folio_alloc(gfp_t gfp, unsigned int order, int preferred_nid, + nodemask_t *nodemask) +{ + struct page *page = __alloc_pages(gfp | __GFP_COMP, order, + preferred_nid, nodemask); + + if (page && order > 1) + prep_transhuge_page(page); + return (struct folio *)page; +} +EXPORT_SYMBOL(__folio_alloc); + /* * Common helper functions. Never use with __GFP_HIGHMEM because the returned * address cannot represent highmem pages. Use alloc_pages and then kmap if diff --git a/mm/page_io.c b/mm/page_io.c index c493ce9ebcf5..d597bc6e6e45 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -38,7 +38,7 @@ void end_swap_bio_write(struct bio *bio) * Also print a dire warning that things will go BAD (tm) * very quickly. * - * Also clear PG_reclaim to avoid rotate_reclaimable_page() + * Also clear PG_reclaim to avoid folio_rotate_reclaimable() */ set_page_dirty(page); pr_alert_ratelimited("Write-error on swap-device (%u:%u:%llu)\n", @@ -317,7 +317,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, * temporary failure if the system has limited * memory for allocating transmit buffers. * Mark the page dirty and avoid - * rotate_reclaimable_page but rate-limit the + * folio_rotate_reclaimable but rate-limit the * messages but do not flag PageError like * the normal direct-to-bio case as it could * be temporary. diff --git a/mm/page_owner.c b/mm/page_owner.c index 62402d22539b..d24ed221357c 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -210,10 +210,10 @@ void __split_page_owner(struct page *page, unsigned int nr) } } -void __copy_page_owner(struct page *oldpage, struct page *newpage) +void __folio_copy_owner(struct folio *newfolio, struct folio *old) { - struct page_ext *old_ext = lookup_page_ext(oldpage); - struct page_ext *new_ext = lookup_page_ext(newpage); + struct page_ext *old_ext = lookup_page_ext(&old->page); + struct page_ext *new_ext = lookup_page_ext(&newfolio->page); struct page_owner *old_page_owner, *new_page_owner; if (unlikely(!old_ext || !new_ext)) @@ -231,11 +231,11 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage) new_page_owner->free_ts_nsec = old_page_owner->ts_nsec; /* - * We don't clear the bit on the oldpage as it's going to be freed + * We don't clear the bit on the old folio as it's going to be freed * after migration. Until then, the info can be useful in case of * a bug, and the overall stats will be off a bit only temporarily. * Also, migrate_misplaced_transhuge_page() can still fail the - * migration and then we want the oldpage to retain the info. But + * migration and then we want the old folio to retain the info. But * in that case we also don't need to explicitly clear the info from * the new page, which will be freed. */ diff --git a/mm/rmap.c b/mm/rmap.c index 6aebd1747251..3a1059c284c3 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -34,7 +34,7 @@ * mapping->private_lock (in __set_page_dirty_buffers) * lock_page_memcg move_lock (in __set_page_dirty_buffers) * i_pages lock (widely used) - * lruvec->lru_lock (in lock_page_lruvec_irq) + * lruvec->lru_lock (in folio_lruvec_lock_irq) * inode->i_lock (in set_page_dirty's __mark_inode_dirty) * bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty) * sb_lock (within inode_lock in fs/fs-writeback.c) @@ -981,7 +981,7 @@ static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg) return true; } -int page_mkclean(struct page *page) +int folio_mkclean(struct folio *folio) { int cleaned = 0; struct address_space *mapping; @@ -991,20 +991,20 @@ int page_mkclean(struct page *page) .invalid_vma = invalid_mkclean_vma, }; - BUG_ON(!PageLocked(page)); + BUG_ON(!folio_test_locked(folio)); - if (!page_mapped(page)) + if (!folio_mapped(folio)) return 0; - mapping = page_mapping(page); + mapping = folio_mapping(folio); if (!mapping) return 0; - rmap_walk(page, &rwc); + rmap_walk(&folio->page, &rwc); return cleaned; } -EXPORT_SYMBOL_GPL(page_mkclean); +EXPORT_SYMBOL_GPL(folio_mkclean); /** * page_move_anon_rmap - move a page to our anon_vma diff --git a/mm/shmem.c b/mm/shmem.c index b5860f4a2738..1588f33d009a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -710,7 +710,7 @@ static int shmem_add_to_page_cache(struct page *page, page->index = index; if (!PageSwapCache(page)) { - error = mem_cgroup_charge(page, charge_mm, gfp); + error = mem_cgroup_charge(page_folio(page), charge_mm, gfp); if (error) { if (PageTransHuge(page)) { count_vm_event(THP_FILE_FALLBACK); @@ -1637,6 +1637,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, struct shmem_inode_info *info, pgoff_t index) { struct page *oldpage, *newpage; + struct folio *old, *new; struct address_space *swap_mapping; swp_entry_t entry; pgoff_t swap_index; @@ -1673,7 +1674,9 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, xa_lock_irq(&swap_mapping->i_pages); error = shmem_replace_entry(swap_mapping, swap_index, oldpage, newpage); if (!error) { - mem_cgroup_migrate(oldpage, newpage); + old = page_folio(oldpage); + new = page_folio(newpage); + mem_cgroup_migrate(old, new); __inc_lruvec_page_state(newpage, NR_FILE_PAGES); __dec_lruvec_page_state(oldpage, NR_FILE_PAGES); } diff --git a/mm/swap.c b/mm/swap.c index af3cad4e5378..8ff9ba7cf2de 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -80,10 +80,11 @@ static DEFINE_PER_CPU(struct lru_pvecs, lru_pvecs) = { static void __page_cache_release(struct page *page) { if (PageLRU(page)) { + struct folio *folio = page_folio(page); struct lruvec *lruvec; unsigned long flags; - lruvec = lock_page_lruvec_irqsave(page, &flags); + lruvec = folio_lruvec_lock_irqsave(folio, &flags); del_page_from_lru_list(page, lruvec); __clear_page_lru_flags(page); unlock_page_lruvec_irqrestore(lruvec, flags); @@ -94,7 +95,7 @@ static void __page_cache_release(struct page *page) static void __put_single_page(struct page *page) { __page_cache_release(page); - mem_cgroup_uncharge(page); + mem_cgroup_uncharge(page_folio(page)); free_unref_page(page, 0); } @@ -188,12 +189,13 @@ static void pagevec_lru_move_fn(struct pagevec *pvec, for (i = 0; i < pagevec_count(pvec); i++) { struct page *page = pvec->pages[i]; + struct folio *folio = page_folio(page); /* block memcg migration during page moving between lru */ if (!TestClearPageLRU(page)) continue; - lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags); + lruvec = folio_lruvec_relock_irqsave(folio, lruvec, &flags); (*move_fn)(page, lruvec); SetPageLRU(page); @@ -206,11 +208,13 @@ static void pagevec_lru_move_fn(struct pagevec *pvec, static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec) { - if (!PageUnevictable(page)) { - del_page_from_lru_list(page, lruvec); - ClearPageActive(page); - add_page_to_lru_list_tail(page, lruvec); - __count_vm_events(PGROTATED, thp_nr_pages(page)); + struct folio *folio = page_folio(page); + + if (!folio_test_unevictable(folio)) { + lruvec_del_folio(lruvec, folio); + folio_clear_active(folio); + lruvec_add_folio_tail(lruvec, folio); + __count_vm_events(PGROTATED, folio_nr_pages(folio)); } } @@ -227,23 +231,23 @@ static bool pagevec_add_and_need_flush(struct pagevec *pvec, struct page *page) } /* - * Writeback is about to end against a page which has been marked for immediate - * reclaim. If it still appears to be reclaimable, move it to the tail of the - * inactive list. + * Writeback is about to end against a folio which has been marked for + * immediate reclaim. If it still appears to be reclaimable, move it + * to the tail of the inactive list. * - * rotate_reclaimable_page() must disable IRQs, to prevent nasty races. + * folio_rotate_reclaimable() must disable IRQs, to prevent nasty races. */ -void rotate_reclaimable_page(struct page *page) +void folio_rotate_reclaimable(struct folio *folio) { - if (!PageLocked(page) && !PageDirty(page) && - !PageUnevictable(page) && PageLRU(page)) { + if (!folio_test_locked(folio) && !folio_test_dirty(folio) && + !folio_test_unevictable(folio) && folio_test_lru(folio)) { struct pagevec *pvec; unsigned long flags; - get_page(page); + folio_get(folio); local_lock_irqsave(&lru_rotate.lock, flags); pvec = this_cpu_ptr(&lru_rotate.pvec); - if (pagevec_add_and_need_flush(pvec, page)) + if (pagevec_add_and_need_flush(pvec, &folio->page)) pagevec_lru_move_fn(pvec, pagevec_move_tail_fn); local_unlock_irqrestore(&lru_rotate.lock, flags); } @@ -289,21 +293,21 @@ void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages) } while ((lruvec = parent_lruvec(lruvec))); } -void lru_note_cost_page(struct page *page) +void lru_note_cost_folio(struct folio *folio) { - lru_note_cost(mem_cgroup_page_lruvec(page), - page_is_file_lru(page), thp_nr_pages(page)); + lru_note_cost(folio_lruvec(folio), folio_is_file_lru(folio), + folio_nr_pages(folio)); } -static void __activate_page(struct page *page, struct lruvec *lruvec) +static void __folio_activate(struct folio *folio, struct lruvec *lruvec) { - if (!PageActive(page) && !PageUnevictable(page)) { - int nr_pages = thp_nr_pages(page); + if (!folio_test_active(folio) && !folio_test_unevictable(folio)) { + long nr_pages = folio_nr_pages(folio); - del_page_from_lru_list(page, lruvec); - SetPageActive(page); - add_page_to_lru_list(page, lruvec); - trace_mm_lru_activate(page); + lruvec_del_folio(lruvec, folio); + folio_set_active(folio); + lruvec_add_folio(lruvec, folio); + trace_mm_lru_activate(folio); __count_vm_events(PGACTIVATE, nr_pages); __count_memcg_events(lruvec_memcg(lruvec), PGACTIVATE, @@ -312,6 +316,11 @@ static void __activate_page(struct page *page, struct lruvec *lruvec) } #ifdef CONFIG_SMP +static void __activate_page(struct page *page, struct lruvec *lruvec) +{ + return __folio_activate(page_folio(page), lruvec); +} + static void activate_page_drain(int cpu) { struct pagevec *pvec = &per_cpu(lru_pvecs.activate_page, cpu); @@ -325,16 +334,16 @@ static bool need_activate_page_drain(int cpu) return pagevec_count(&per_cpu(lru_pvecs.activate_page, cpu)) != 0; } -static void activate_page(struct page *page) +static void folio_activate(struct folio *folio) { - page = compound_head(page); - if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { + if (folio_test_lru(folio) && !folio_test_active(folio) && + !folio_test_unevictable(folio)) { struct pagevec *pvec; + folio_get(folio); local_lock(&lru_pvecs.lock); pvec = this_cpu_ptr(&lru_pvecs.activate_page); - get_page(page); - if (pagevec_add_and_need_flush(pvec, page)) + if (pagevec_add_and_need_flush(pvec, &folio->page)) pagevec_lru_move_fn(pvec, __activate_page); local_unlock(&lru_pvecs.lock); } @@ -345,21 +354,20 @@ static inline void activate_page_drain(int cpu) { } -static void activate_page(struct page *page) +static void folio_activate(struct folio *folio) { struct lruvec *lruvec; - page = compound_head(page); - if (TestClearPageLRU(page)) { - lruvec = lock_page_lruvec_irq(page); - __activate_page(page, lruvec); + if (folio_test_clear_lru(folio)) { + lruvec = folio_lruvec_lock_irq(folio); + __folio_activate(folio, lruvec); unlock_page_lruvec_irq(lruvec); - SetPageLRU(page); + folio_set_lru(folio); } } #endif -static void __lru_cache_activate_page(struct page *page) +static void __lru_cache_activate_folio(struct folio *folio) { struct pagevec *pvec; int i; @@ -380,8 +388,8 @@ static void __lru_cache_activate_page(struct page *page) for (i = pagevec_count(pvec) - 1; i >= 0; i--) { struct page *pagevec_page = pvec->pages[i]; - if (pagevec_page == page) { - SetPageActive(page); + if (pagevec_page == &folio->page) { + folio_set_active(folio); break; } } @@ -399,61 +407,59 @@ static void __lru_cache_activate_page(struct page *page) * When a newly allocated page is not yet visible, so safe for non-atomic ops, * __SetPageReferenced(page) may be substituted for mark_page_accessed(page). */ -void mark_page_accessed(struct page *page) +void folio_mark_accessed(struct folio *folio) { - page = compound_head(page); - - if (!PageReferenced(page)) { - SetPageReferenced(page); - } else if (PageUnevictable(page)) { + if (!folio_test_referenced(folio)) { + folio_set_referenced(folio); + } else if (folio_test_unevictable(folio)) { /* * Unevictable pages are on the "LRU_UNEVICTABLE" list. But, * this list is never rotated or maintained, so marking an * evictable page accessed has no effect. */ - } else if (!PageActive(page)) { + } else if (!folio_test_active(folio)) { /* * If the page is on the LRU, queue it for activation via * lru_pvecs.activate_page. Otherwise, assume the page is on a * pagevec, mark it active and it'll be moved to the active * LRU on the next drain. */ - if (PageLRU(page)) - activate_page(page); + if (folio_test_lru(folio)) + folio_activate(folio); else - __lru_cache_activate_page(page); - ClearPageReferenced(page); - workingset_activation(page); + __lru_cache_activate_folio(folio); + folio_clear_referenced(folio); + workingset_activation(folio); } - if (page_is_idle(page)) - clear_page_idle(page); + if (folio_test_idle(folio)) + folio_clear_idle(folio); } -EXPORT_SYMBOL(mark_page_accessed); +EXPORT_SYMBOL(folio_mark_accessed); /** - * lru_cache_add - add a page to a page list - * @page: the page to be added to the LRU. + * folio_add_lru - Add a folio to an LRU list. + * @folio: The folio to be added to the LRU. * - * Queue the page for addition to the LRU via pagevec. The decision on whether + * Queue the folio for addition to the LRU. The decision on whether * to add the page to the [in]active [file|anon] list is deferred until the - * pagevec is drained. This gives a chance for the caller of lru_cache_add() - * have the page added to the active list using mark_page_accessed(). + * pagevec is drained. This gives a chance for the caller of folio_add_lru() + * have the folio added to the active list using folio_mark_accessed(). */ -void lru_cache_add(struct page *page) +void folio_add_lru(struct folio *folio) { struct pagevec *pvec; - VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page); - VM_BUG_ON_PAGE(PageLRU(page), page); + VM_BUG_ON_FOLIO(folio_test_active(folio) && folio_test_unevictable(folio), folio); + VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); - get_page(page); + folio_get(folio); local_lock(&lru_pvecs.lock); pvec = this_cpu_ptr(&lru_pvecs.lru_add); - if (pagevec_add_and_need_flush(pvec, page)) + if (pagevec_add_and_need_flush(pvec, &folio->page)) __pagevec_lru_add(pvec); local_unlock(&lru_pvecs.lock); } -EXPORT_SYMBOL(lru_cache_add); +EXPORT_SYMBOL(folio_add_lru); /** * lru_cache_add_inactive_or_unevictable @@ -888,11 +894,12 @@ void release_pages(struct page **pages, int nr) int i; LIST_HEAD(pages_to_free); struct lruvec *lruvec = NULL; - unsigned long flags; + unsigned long flags = 0; unsigned int lock_batch; for (i = 0; i < nr; i++) { struct page *page = pages[i]; + struct folio *folio = page_folio(page); /* * Make sure the IRQ-safe lock-holding time does not get @@ -904,7 +911,7 @@ void release_pages(struct page **pages, int nr) lruvec = NULL; } - page = compound_head(page); + page = &folio->page; if (is_huge_zero_page(page)) continue; @@ -943,7 +950,7 @@ void release_pages(struct page **pages, int nr) if (PageLRU(page)) { struct lruvec *prev_lruvec = lruvec; - lruvec = relock_page_lruvec_irqsave(page, lruvec, + lruvec = folio_lruvec_relock_irqsave(folio, lruvec, &flags); if (prev_lruvec != lruvec) lock_batch = 0; @@ -985,17 +992,18 @@ void __pagevec_release(struct pagevec *pvec) } EXPORT_SYMBOL(__pagevec_release); -static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec) +static void __pagevec_lru_add_fn(struct folio *folio, struct lruvec *lruvec) { - int was_unevictable = TestClearPageUnevictable(page); - int nr_pages = thp_nr_pages(page); + int was_unevictable = folio_test_clear_unevictable(folio); + long nr_pages = folio_nr_pages(folio); - VM_BUG_ON_PAGE(PageLRU(page), page); + VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); /* - * Page becomes evictable in two ways: + * A folio becomes evictable in two ways: * 1) Within LRU lock [munlock_vma_page() and __munlock_pagevec()]. - * 2) Before acquiring LRU lock to put the page to correct LRU and then + * 2) Before acquiring LRU lock to put the folio on the correct LRU + * and then * a) do PageLRU check with lock [check_move_unevictable_pages] * b) do PageLRU check before lock [clear_page_mlock] * @@ -1004,35 +1012,36 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec) * * #0: __pagevec_lru_add_fn #1: clear_page_mlock * - * SetPageLRU() TestClearPageMlocked() + * folio_set_lru() folio_test_clear_mlocked() * smp_mb() // explicit ordering // above provides strict * // ordering - * PageMlocked() PageLRU() + * folio_test_mlocked() folio_test_lru() * * - * if '#1' does not observe setting of PG_lru by '#0' and fails - * isolation, the explicit barrier will make sure that page_evictable - * check will put the page in correct LRU. Without smp_mb(), SetPageLRU - * can be reordered after PageMlocked check and can make '#1' to fail - * the isolation of the page whose Mlocked bit is cleared (#0 is also - * looking at the same page) and the evictable page will be stranded - * in an unevictable LRU. + * if '#1' does not observe setting of PG_lru by '#0' and + * fails isolation, the explicit barrier will make sure that + * folio_evictable check will put the folio on the correct + * LRU. Without smp_mb(), folio_set_lru() can be reordered + * after folio_test_mlocked() check and can make '#1' fail the + * isolation of the folio whose mlocked bit is cleared (#0 is + * also looking at the same folio) and the evictable folio will + * be stranded on an unevictable LRU. */ - SetPageLRU(page); + folio_set_lru(folio); smp_mb__after_atomic(); - if (page_evictable(page)) { + if (folio_evictable(folio)) { if (was_unevictable) __count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages); } else { - ClearPageActive(page); - SetPageUnevictable(page); + folio_clear_active(folio); + folio_set_unevictable(folio); if (!was_unevictable) __count_vm_events(UNEVICTABLE_PGCULLED, nr_pages); } - add_page_to_lru_list(page, lruvec); - trace_mm_lru_insertion(page); + lruvec_add_folio(lruvec, folio); + trace_mm_lru_insertion(folio); } /* @@ -1046,10 +1055,10 @@ void __pagevec_lru_add(struct pagevec *pvec) unsigned long flags = 0; for (i = 0; i < pagevec_count(pvec); i++) { - struct page *page = pvec->pages[i]; + struct folio *folio = page_folio(pvec->pages[i]); - lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags); - __pagevec_lru_add_fn(page, lruvec); + lruvec = folio_lruvec_relock_irqsave(folio, lruvec, &flags); + __pagevec_lru_add_fn(folio, lruvec); } if (lruvec) unlock_page_lruvec_irqrestore(lruvec, flags); diff --git a/mm/swap_state.c b/mm/swap_state.c index bc7cee6b2ec5..8d4104242100 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -498,7 +498,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, mem_cgroup_swapin_uncharge_swap(entry); if (shadow) - workingset_refault(page, shadow); + workingset_refault(page_folio(page), shadow); /* Caller will initiate read into locked page */ lru_cache_add(page); diff --git a/mm/swapfile.c b/mm/swapfile.c index 22d10f713848..e3dcaeecc50f 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3534,13 +3534,13 @@ struct swap_info_struct *page_swap_info(struct page *page) } /* - * out-of-line __page_file_ methods to avoid include hell. + * out-of-line methods to avoid include hell. */ -struct address_space *__page_file_mapping(struct page *page) +struct address_space *swapcache_mapping(struct folio *folio) { - return page_swap_info(page)->swap_file->f_mapping; + return page_swap_info(&folio->page)->swap_file->f_mapping; } -EXPORT_SYMBOL_GPL(__page_file_mapping); +EXPORT_SYMBOL_GPL(swapcache_mapping); pgoff_t __page_file_index(struct page *page) { diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 7a9008415534..36e5f6ab976f 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -164,7 +164,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm, __SetPageUptodate(page); ret = -ENOMEM; - if (mem_cgroup_charge(page, dst_mm, GFP_KERNEL)) + if (mem_cgroup_charge(page_folio(page), dst_mm, GFP_KERNEL)) goto out_release; ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr, diff --git a/mm/util.c b/mm/util.c index bacabe446906..e58151a61255 100644 --- a/mm/util.c +++ b/mm/util.c @@ -654,81 +654,78 @@ void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags) } EXPORT_SYMBOL(kvrealloc); -static inline void *__page_rmapping(struct page *page) -{ - unsigned long mapping; - - mapping = (unsigned long)page->mapping; - mapping &= ~PAGE_MAPPING_FLAGS; - - return (void *)mapping; -} - /* Neutral page->mapping pointer to address_space or anon_vma or other */ void *page_rmapping(struct page *page) { - page = compound_head(page); - return __page_rmapping(page); + return folio_raw_mapping(page_folio(page)); } -/* - * Return true if this page is mapped into pagetables. - * For compound page it returns true if any subpage of compound page is mapped. +/** + * folio_mapped - Is this folio mapped into userspace? + * @folio: The folio. + * + * Return: True if any page in this folio is referenced by user page tables. */ -bool page_mapped(struct page *page) +bool folio_mapped(struct folio *folio) { - int i; + long i, nr; - if (likely(!PageCompound(page))) - return atomic_read(&page->_mapcount) >= 0; - page = compound_head(page); - if (atomic_read(compound_mapcount_ptr(page)) >= 0) + if (folio_test_single(folio)) + return atomic_read(&folio->_mapcount) >= 0; + if (atomic_read(folio_mapcount_ptr(folio)) >= 0) return true; - if (PageHuge(page)) + if (folio_test_hugetlb(folio)) return false; - for (i = 0; i < compound_nr(page); i++) { - if (atomic_read(&page[i]._mapcount) >= 0) + + nr = folio_nr_pages(folio); + for (i = 0; i < nr; i++) { + if (atomic_read(&folio_page(folio, i)->_mapcount) >= 0) return true; } return false; } -EXPORT_SYMBOL(page_mapped); +EXPORT_SYMBOL(folio_mapped); struct anon_vma *page_anon_vma(struct page *page) { - unsigned long mapping; + struct folio *folio = page_folio(page); + unsigned long mapping = (unsigned long)folio->mapping; - page = compound_head(page); - mapping = (unsigned long)page->mapping; if ((mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON) return NULL; - return __page_rmapping(page); + return (void *)(mapping - PAGE_MAPPING_ANON); } -struct address_space *page_mapping(struct page *page) +/** + * folio_mapping - Find the mapping where this folio is stored. + * @folio: The folio. + * + * For folios which are in the page cache, return the mapping that this + * page belongs to. Folios in the swap cache return the swap mapping + * this page is stored in (which is different from the mapping for the + * swap file or swap device where the data is stored). + * + * You can call this for folios which aren't in the swap cache or page + * cache and it will return NULL. + */ +struct address_space *folio_mapping(struct folio *folio) { struct address_space *mapping; - page = compound_head(page); - /* This happens if someone calls flush_dcache_page on slab page */ - if (unlikely(PageSlab(page))) + if (unlikely(folio_test_slab(folio))) return NULL; - if (unlikely(PageSwapCache(page))) { - swp_entry_t entry; + if (unlikely(folio_test_swapcache(folio))) + return swap_address_space(folio_swap_entry(folio)); - entry.val = page_private(page); - return swap_address_space(entry); - } - - mapping = page->mapping; + mapping = folio->mapping; if ((unsigned long)mapping & PAGE_MAPPING_ANON) return NULL; return (void *)((unsigned long)mapping & ~PAGE_MAPPING_FLAGS); } -EXPORT_SYMBOL(page_mapping); +EXPORT_SYMBOL(folio_mapping); /* Slow path of page_mapcount() for compound pages */ int __page_mapcount(struct page *page) @@ -750,13 +747,26 @@ int __page_mapcount(struct page *page) } EXPORT_SYMBOL_GPL(__page_mapcount); -void copy_huge_page(struct page *dst, struct page *src) +/** + * folio_copy - Copy the contents of one folio to another. + * @dst: Folio to copy to. + * @src: Folio to copy from. + * + * The bytes in the folio represented by @src are copied to @dst. + * Assumes the caller has validated that @dst is at least as large as @src. + * Can be called in atomic context for order-0 folios, but if the folio is + * larger, it may sleep. + */ +void folio_copy(struct folio *dst, struct folio *src) { - unsigned i, nr = compound_nr(src); + long i = 0; + long nr = folio_nr_pages(src); - for (i = 0; i < nr; i++) { + for (;;) { + copy_highpage(folio_page(dst, i), folio_page(src, i)); + if (++i == nr) + break; cond_resched(); - copy_highpage(nth_page(dst, i), nth_page(src, i)); } } @@ -1079,3 +1089,14 @@ void page_offline_end(void) up_write(&page_offline_rwsem); } EXPORT_SYMBOL(page_offline_end); + +#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO +void flush_dcache_folio(struct folio *folio) +{ + long i, nr = folio_nr_pages(folio); + + for (i = 0; i < nr; i++) + flush_dcache_page(folio_page(folio, i)); +} +EXPORT_SYMBOL(flush_dcache_folio); +#endif diff --git a/mm/vmscan.c b/mm/vmscan.c index 74296c2d1fed..306229c4313f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2090,6 +2090,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, */ int isolate_lru_page(struct page *page) { + struct folio *folio = page_folio(page); int ret = -EBUSY; VM_BUG_ON_PAGE(!page_count(page), page); @@ -2099,7 +2100,7 @@ int isolate_lru_page(struct page *page) struct lruvec *lruvec; get_page(page); - lruvec = lock_page_lruvec_irq(page); + lruvec = folio_lruvec_lock_irq(folio); del_page_from_lru_list(page, lruvec); unlock_page_lruvec_irq(lruvec); ret = 0; @@ -2199,7 +2200,7 @@ static unsigned int move_pages_to_lru(struct lruvec *lruvec, * All pages were isolated from the same lruvec (and isolation * inhibits memcg migration). */ - VM_BUG_ON_PAGE(!page_matches_lruvec(page, lruvec), page); + VM_BUG_ON_PAGE(!folio_matches_lruvec(page_folio(page), lruvec), page); add_page_to_lru_list(page, lruvec); nr_pages = thp_nr_pages(page); nr_moved += nr_pages; @@ -4665,6 +4666,7 @@ void check_move_unevictable_pages(struct pagevec *pvec) for (i = 0; i < pvec->nr; i++) { struct page *page = pvec->pages[i]; + struct folio *folio = page_folio(page); int nr_pages; if (PageTransTail(page)) @@ -4677,7 +4679,7 @@ void check_move_unevictable_pages(struct pagevec *pvec) if (!TestClearPageLRU(page)) continue; - lruvec = relock_page_lruvec_irq(page, lruvec); + lruvec = folio_lruvec_relock_irq(folio, lruvec); if (page_evictable(page) && PageUnevictable(page)) { del_page_from_lru_list(page, lruvec); ClearPageUnevictable(page); diff --git a/mm/workingset.c b/mm/workingset.c index d5b81e4f4cbe..109ab978251a 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -273,17 +273,17 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg) } /** - * workingset_refault - evaluate the refault of a previously evicted page - * @page: the freshly allocated replacement page - * @shadow: shadow entry of the evicted page + * workingset_refault - Evaluate the refault of a previously evicted folio. + * @folio: The freshly allocated replacement folio. + * @shadow: Shadow entry of the evicted folio. * * Calculates and evaluates the refault distance of the previously - * evicted page in the context of the node and the memcg whose memory + * evicted folio in the context of the node and the memcg whose memory * pressure caused the eviction. */ -void workingset_refault(struct page *page, void *shadow) +void workingset_refault(struct folio *folio, void *shadow) { - bool file = page_is_file_lru(page); + bool file = folio_is_file_lru(folio); struct mem_cgroup *eviction_memcg; struct lruvec *eviction_lruvec; unsigned long refault_distance; @@ -295,16 +295,17 @@ void workingset_refault(struct page *page, void *shadow) unsigned long refault; bool workingset; int memcgid; + long nr; unpack_shadow(shadow, &memcgid, &pgdat, &eviction, &workingset); rcu_read_lock(); /* * Look up the memcg associated with the stored ID. It might - * have been deleted since the page's eviction. + * have been deleted since the folio's eviction. * * Note that in rare events the ID could have been recycled - * for a new cgroup that refaults a shared page. This is + * for a new cgroup that refaults a shared folio. This is * impossible to tell from the available data. However, this * should be a rare and limited disturbance, and activations * are always speculative anyway. Ultimately, it's the aging @@ -340,17 +341,18 @@ void workingset_refault(struct page *page, void *shadow) refault_distance = (refault - eviction) & EVICTION_MASK; /* - * The activation decision for this page is made at the level + * The activation decision for this folio is made at the level * where the eviction occurred, as that is where the LRU order - * during page reclaim is being determined. + * during folio reclaim is being determined. * - * However, the cgroup that will own the page is the one that + * However, the cgroup that will own the folio is the one that * is actually experiencing the refault event. */ - memcg = page_memcg(page); + nr = folio_nr_pages(folio); + memcg = folio_memcg(folio); lruvec = mem_cgroup_lruvec(memcg, pgdat); - inc_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file); + mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr); mem_cgroup_flush_stats(); /* @@ -376,16 +378,16 @@ void workingset_refault(struct page *page, void *shadow) if (refault_distance > workingset_size) goto out; - SetPageActive(page); - workingset_age_nonresident(lruvec, thp_nr_pages(page)); - inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file); + folio_set_active(folio); + workingset_age_nonresident(lruvec, nr); + mod_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file, nr); - /* Page was active prior to eviction */ + /* Folio was active prior to eviction */ if (workingset) { - SetPageWorkingset(page); + folio_set_workingset(folio); /* XXX: Move to lru_cache_add() when it supports new vs putback */ - lru_note_cost_page(page); - inc_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file); + lru_note_cost_folio(folio); + mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file, nr); } out: rcu_read_unlock(); @@ -393,12 +395,11 @@ out: /** * workingset_activation - note a page activation - * @page: page that is being activated + * @folio: Folio that is being activated. */ -void workingset_activation(struct page *page) +void workingset_activation(struct folio *folio) { struct mem_cgroup *memcg; - struct lruvec *lruvec; rcu_read_lock(); /* @@ -408,11 +409,10 @@ void workingset_activation(struct page *page) * XXX: See workingset_refault() - this should return * root_mem_cgroup even for !CONFIG_MEMCG. */ - memcg = page_memcg_rcu(page); + memcg = folio_memcg_rcu(folio); if (!mem_cgroup_disabled() && !memcg) goto out; - lruvec = mem_cgroup_page_lruvec(page); - workingset_age_nonresident(lruvec, thp_nr_pages(page)); + workingset_age_nonresident(folio_lruvec(folio), folio_nr_pages(folio)); out: rcu_read_unlock(); } |