From 4c85c0be3d7a9a7ffe48bfe0954eacc0ba9d3c75 Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Mon, 30 Jan 2023 13:25:13 +0900 Subject: mm, printk: introduce new format %pGt for page_type %pGp format is used to display 'flags' field of a struct page. However, some page flags (i.e. PG_buddy, see page-flags.h for more details) are stored in page_type field. To display human-readable output of page_type, introduce %pGt format. It is important to note the meaning of bits are different in page_type. if page_type is 0xffffffff, no flags are set. Setting PG_buddy (0x00000080) flag results in a page_type of 0xffffff7f. Clearing a bit actually means setting a flag. Bits in page_type are inverted when displaying type names. Only values for which page_type_has_type() returns true are considered as page_type, to avoid confusion with mapcount values. if it returns false, only raw values are displayed and not page type names. Link: https://lkml.kernel.org/r/20230130042514.2418-3-42.hyeyoo@gmail.com Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Petr Mladek [vsprintf part] Cc: Andy Shevchenko Cc: David Hildenbrand Cc: Joe Perches Cc: John Ogness Cc: Matthew Wilcox Cc: Sergey Senozhatsky Cc: Steven Rostedt (Google) Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux/page-flags.h') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index a7e3a3405520..57287102c5bd 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -926,9 +926,14 @@ static inline bool is_page_hwpoison(struct page *page) #define PageType(page, flag) \ ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) +static inline int page_type_has_type(unsigned int page_type) +{ + return (int)page_type < PAGE_MAPCOUNT_RESERVE; +} + static inline int page_has_type(struct page *page) { - return (int)page->page_type < PAGE_MAPCOUNT_RESERVE; + return page_type_has_type(page->page_type); } #define PAGE_TYPE_OPS(uname, lname) \ -- cgit v1.2.3-70-g09d2 From 0a54864f8dfb64b64c84c9db6ff70e0e93690a33 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 9 Mar 2023 20:29:14 -0800 Subject: kasan: remove PG_skip_kasan_poison flag Code inspection reveals that PG_skip_kasan_poison is redundant with kasantag, because the former is intended to be set iff the latter is the match-all tag. It can also be observed that it's basically pointless to poison pages which have kasantag=0, because any pages with this tag would have been pointed to by pointers with match-all tags, so poisoning the pages would have little to no effect in terms of bug detection. Therefore, change the condition in should_skip_kasan_poison() to check kasantag instead, and remove PG_skip_kasan_poison and associated flags. Link: https://lkml.kernel.org/r/20230310042914.3805818-3-pcc@google.com Link: https://linux-review.googlesource.com/id/I57f825f2eaeaf7e8389d6cf4597c8a5821359838 Signed-off-by: Peter Collingbourne Reviewed-by: Andrey Konovalov Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Evgenii Stepanov Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/gfp_types.h | 30 +++++++--------- include/linux/page-flags.h | 9 ----- include/trace/events/mmflags.h | 13 ++----- mm/kasan/hw_tags.c | 2 +- mm/page_alloc.c | 81 ++++++++++++++++-------------------------- mm/vmalloc.c | 2 +- 6 files changed, 47 insertions(+), 90 deletions(-) (limited to 'include/linux/page-flags.h') diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h index 5088637fe5c2..6583a58670c5 100644 --- a/include/linux/gfp_types.h +++ b/include/linux/gfp_types.h @@ -47,16 +47,14 @@ typedef unsigned int __bitwise gfp_t; #define ___GFP_ACCOUNT 0x400000u #define ___GFP_ZEROTAGS 0x800000u #ifdef CONFIG_KASAN_HW_TAGS -#define ___GFP_SKIP_ZERO 0x1000000u -#define ___GFP_SKIP_KASAN_UNPOISON 0x2000000u -#define ___GFP_SKIP_KASAN_POISON 0x4000000u +#define ___GFP_SKIP_ZERO 0x1000000u +#define ___GFP_SKIP_KASAN 0x2000000u #else -#define ___GFP_SKIP_ZERO 0 -#define ___GFP_SKIP_KASAN_UNPOISON 0 -#define ___GFP_SKIP_KASAN_POISON 0 +#define ___GFP_SKIP_ZERO 0 +#define ___GFP_SKIP_KASAN 0 #endif #ifdef CONFIG_LOCKDEP -#define ___GFP_NOLOCKDEP 0x8000000u +#define ___GFP_NOLOCKDEP 0x4000000u #else #define ___GFP_NOLOCKDEP 0 #endif @@ -234,25 +232,24 @@ typedef unsigned int __bitwise gfp_t; * memory tags at the same time as zeroing memory has minimal additional * performace impact. * - * %__GFP_SKIP_KASAN_UNPOISON makes KASAN skip unpoisoning on page allocation. - * Only effective in HW_TAGS mode. - * - * %__GFP_SKIP_KASAN_POISON makes KASAN skip poisoning on page deallocation. - * Typically, used for userspace pages. Only effective in HW_TAGS mode. + * %__GFP_SKIP_KASAN makes KASAN skip unpoisoning on page allocation. + * Used for userspace and vmalloc pages; the latter are unpoisoned by + * kasan_unpoison_vmalloc instead. For userspace pages, results in + * poisoning being skipped as well, see should_skip_kasan_poison for + * details. Only effective in HW_TAGS mode. */ #define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) #define __GFP_COMP ((__force gfp_t)___GFP_COMP) #define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) #define __GFP_ZEROTAGS ((__force gfp_t)___GFP_ZEROTAGS) #define __GFP_SKIP_ZERO ((__force gfp_t)___GFP_SKIP_ZERO) -#define __GFP_SKIP_KASAN_UNPOISON ((__force gfp_t)___GFP_SKIP_KASAN_UNPOISON) -#define __GFP_SKIP_KASAN_POISON ((__force gfp_t)___GFP_SKIP_KASAN_POISON) +#define __GFP_SKIP_KASAN ((__force gfp_t)___GFP_SKIP_KASAN) /* Disable lockdep for GFP context tracking */ #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) /* Room for N __GFP_FOO bits */ -#define __GFP_BITS_SHIFT (27 + IS_ENABLED(CONFIG_LOCKDEP)) +#define __GFP_BITS_SHIFT (26 + IS_ENABLED(CONFIG_LOCKDEP)) #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /** @@ -335,8 +332,7 @@ typedef unsigned int __bitwise gfp_t; #define GFP_DMA __GFP_DMA #define GFP_DMA32 __GFP_DMA32 #define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) -#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE | \ - __GFP_SKIP_KASAN_POISON | __GFP_SKIP_KASAN_UNPOISON) +#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE | __GFP_SKIP_KASAN) #define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM) #define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 57287102c5bd..dcda20c47b8f 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -135,9 +135,6 @@ enum pageflags { #ifdef CONFIG_ARCH_USES_PG_ARCH_X PG_arch_2, PG_arch_3, -#endif -#ifdef CONFIG_KASAN_HW_TAGS - PG_skip_kasan_poison, #endif __NR_PAGEFLAGS, @@ -594,12 +591,6 @@ TESTCLEARFLAG(Young, young, PF_ANY) PAGEFLAG(Idle, idle, PF_ANY) #endif -#ifdef CONFIG_KASAN_HW_TAGS -PAGEFLAG(SkipKASanPoison, skip_kasan_poison, PF_HEAD) -#else -PAGEFLAG_FALSE(SkipKASanPoison, skip_kasan_poison) -#endif - /* * PageReported() is used to track reported free pages within the Buddy * allocator. We can use the non-atomic version of the test and set diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index b28218b7998e..b63e7c0fbbe5 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -55,8 +55,7 @@ #ifdef CONFIG_KASAN_HW_TAGS #define __def_gfpflag_names_kasan , \ gfpflag_string(__GFP_SKIP_ZERO), \ - gfpflag_string(__GFP_SKIP_KASAN_POISON), \ - gfpflag_string(__GFP_SKIP_KASAN_UNPOISON) + gfpflag_string(__GFP_SKIP_KASAN) #else #define __def_gfpflag_names_kasan #endif @@ -96,13 +95,6 @@ #define IF_HAVE_PG_ARCH_X(_name) #endif -#ifdef CONFIG_KASAN_HW_TAGS -#define IF_HAVE_PG_SKIP_KASAN_POISON(_name) \ - ,{1UL << PG_##_name, __stringify(_name)} -#else -#define IF_HAVE_PG_SKIP_KASAN_POISON(_name) -#endif - #define DEF_PAGEFLAG_NAME(_name) { 1UL << PG_##_name, __stringify(_name) } #define __def_pageflag_names \ @@ -133,8 +125,7 @@ IF_HAVE_PG_HWPOISON(hwpoison) \ IF_HAVE_PG_IDLE(idle) \ IF_HAVE_PG_IDLE(young) \ IF_HAVE_PG_ARCH_X(arch_2) \ -IF_HAVE_PG_ARCH_X(arch_3) \ -IF_HAVE_PG_SKIP_KASAN_POISON(skip_kasan_poison) +IF_HAVE_PG_ARCH_X(arch_3) #define show_page_flags(flags) \ (flags) ? __print_flags(flags, "|", \ diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c index d1bcb0205327..bb4f56e5bdec 100644 --- a/mm/kasan/hw_tags.c +++ b/mm/kasan/hw_tags.c @@ -318,7 +318,7 @@ void *__kasan_unpoison_vmalloc(const void *start, unsigned long size, * Thus, for VM_ALLOC mappings, hardware tag-based KASAN only tags * the first virtual mapping, which is created by vmalloc(). * Tagging the page_alloc memory backing that vmalloc() allocation is - * skipped, see ___GFP_SKIP_KASAN_UNPOISON. + * skipped, see ___GFP_SKIP_KASAN. * * For non-VM_ALLOC allocations, page_alloc memory is tagged as usual. */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a109444e9f44..3737f9d58f5f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -112,17 +112,6 @@ typedef int __bitwise fpi_t; */ #define FPI_TO_TAIL ((__force fpi_t)BIT(1)) -/* - * Don't poison memory with KASAN (only for the tag-based modes). - * During boot, all non-reserved memblock memory is exposed to page_alloc. - * Poisoning all that memory lengthens boot time, especially on systems with - * large amount of RAM. This flag is used to skip that poisoning. - * This is only done for the tag-based KASAN modes, as those are able to - * detect memory corruptions with the memory tags assigned by default. - * All memory allocated normally after boot gets poisoned as usual. - */ -#define FPI_SKIP_KASAN_POISON ((__force fpi_t)BIT(2)) - /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */ static DEFINE_MUTEX(pcp_batch_high_lock); #define MIN_PERCPU_PAGELIST_HIGH_FRACTION (8) @@ -1370,13 +1359,19 @@ out: /* * Skip KASAN memory poisoning when either: * - * 1. Deferred memory initialization has not yet completed, - * see the explanation below. - * 2. Skipping poisoning is requested via FPI_SKIP_KASAN_POISON, - * see the comment next to it. - * 3. Skipping poisoning is requested via __GFP_SKIP_KASAN_POISON, - * see the comment next to it. - * 4. The allocation is excluded from being checked due to sampling, + * 1. For generic KASAN: deferred memory initialization has not yet completed. + * Tag-based KASAN modes skip pages freed via deferred memory initialization + * using page tags instead (see below). + * 2. For tag-based KASAN modes: the page has a match-all KASAN tag, indicating + * that error detection is disabled for accesses via the page address. + * + * Pages will have match-all tags in the following circumstances: + * + * 1. Pages are being initialized for the first time, including during deferred + * memory init; see the call to page_kasan_tag_reset in __init_single_page. + * 2. The allocation was not unpoisoned due to __GFP_SKIP_KASAN, with the + * exception of pages unpoisoned by kasan_unpoison_vmalloc. + * 3. The allocation was excluded from being checked due to sampling, * see the call to kasan_unpoison_pages. * * Poisoning pages during deferred memory init will greatly lengthen the @@ -1392,10 +1387,10 @@ out: */ static inline bool should_skip_kasan_poison(struct page *page, fpi_t fpi_flags) { - return deferred_pages_enabled() || - (!IS_ENABLED(CONFIG_KASAN_GENERIC) && - (fpi_flags & FPI_SKIP_KASAN_POISON)) || - PageSkipKASanPoison(page); + if (IS_ENABLED(CONFIG_KASAN_GENERIC)) + return deferred_pages_enabled(); + + return page_kasan_tag(page) == 0xff; } static void kernel_init_pages(struct page *page, int numpages) @@ -1730,7 +1725,7 @@ void __free_pages_core(struct page *page, unsigned int order) * Bypass PCP and place fresh pages right to the tail, primarily * relevant for memory onlining. */ - __free_pages_ok(page, order, FPI_TO_TAIL | FPI_SKIP_KASAN_POISON); + __free_pages_ok(page, order, FPI_TO_TAIL); } #ifdef CONFIG_NUMA @@ -2396,9 +2391,9 @@ static inline bool should_skip_kasan_unpoison(gfp_t flags) /* * With hardware tag-based KASAN enabled, skip if this has been - * requested via __GFP_SKIP_KASAN_UNPOISON. + * requested via __GFP_SKIP_KASAN. */ - return flags & __GFP_SKIP_KASAN_UNPOISON; + return flags & __GFP_SKIP_KASAN; } static inline bool should_skip_init(gfp_t flags) @@ -2417,7 +2412,6 @@ inline void post_alloc_hook(struct page *page, unsigned int order, bool init = !want_init_on_free() && want_init_on_alloc(gfp_flags) && !should_skip_init(gfp_flags); bool zero_tags = init && (gfp_flags & __GFP_ZEROTAGS); - bool reset_tags = true; int i; set_page_private(page, 0); @@ -2451,37 +2445,22 @@ inline void post_alloc_hook(struct page *page, unsigned int order, /* Take note that memory was initialized by the loop above. */ init = false; } - if (!should_skip_kasan_unpoison(gfp_flags)) { - /* Try unpoisoning (or setting tags) and initializing memory. */ - if (kasan_unpoison_pages(page, order, init)) { - /* Take note that memory was initialized by KASAN. */ - if (kasan_has_integrated_init()) - init = false; - /* Take note that memory tags were set by KASAN. */ - reset_tags = false; - } else { - /* - * KASAN decided to exclude this allocation from being - * (un)poisoned due to sampling. Make KASAN skip - * poisoning when the allocation is freed. - */ - SetPageSkipKASanPoison(page); - } - } - /* - * If memory tags have not been set by KASAN, reset the page tags to - * ensure page_address() dereferencing does not fault. - */ - if (reset_tags) { + if (!should_skip_kasan_unpoison(gfp_flags) && + kasan_unpoison_pages(page, order, init)) { + /* Take note that memory was initialized by KASAN. */ + if (kasan_has_integrated_init()) + init = false; + } else { + /* + * If memory tags have not been set by KASAN, reset the page + * tags to ensure page_address() dereferencing does not fault. + */ for (i = 0; i != 1 << order; ++i) page_kasan_tag_reset(page + i); } /* If memory is still not initialized, initialize it now. */ if (init) kernel_init_pages(page, 1 << order); - /* Propagate __GFP_SKIP_KASAN_POISON to page flags. */ - if (kasan_hw_tags_enabled() && (gfp_flags & __GFP_SKIP_KASAN_POISON)) - SetPageSkipKASanPoison(page); set_page_owner(page, order, gfp_flags); page_table_check_alloc(page, order); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index bef6cf2b4d46..5e60e9792cbf 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3188,7 +3188,7 @@ again: * pages backing VM_ALLOC mapping. Memory is instead * poisoned and zeroed by kasan_unpoison_vmalloc(). */ - gfp_mask |= __GFP_SKIP_KASAN_UNPOISON | __GFP_SKIP_ZERO; + gfp_mask |= __GFP_SKIP_KASAN | __GFP_SKIP_ZERO; } /* Take note that the mapping is PAGE_KERNEL. */ -- cgit v1.2.3-70-g09d2 From c4ba69f00c18524eb89990e9afda9d2a9b54de2f Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 28 Feb 2023 10:59:54 +0100 Subject: mm, page_flags: remove PG_slob_free With SLOB removed we no longer need the PG_slob_free alias for PG_private. Also update tools/mm/page-types. Signed-off-by: Vlastimil Babka Acked-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Acked-by: Lorenzo Stoakes Acked-by: Mike Rapoport (IBM) --- include/linux/page-flags.h | 4 ---- tools/mm/page-types.c | 6 +----- 2 files changed, 1 insertion(+), 9 deletions(-) (limited to 'include/linux/page-flags.h') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index a7e3a3405520..2bdc41cb0594 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -174,9 +174,6 @@ enum pageflags { /* Remapped by swiotlb-xen. */ PG_xen_remapped = PG_owner_priv_1, - /* SLOB */ - PG_slob_free = PG_private, - #ifdef CONFIG_MEMORY_FAILURE /* * Compound pages. Stored in first tail page's flags. @@ -483,7 +480,6 @@ PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD) PAGEFLAG(Workingset, workingset, PF_HEAD) TESTCLEARFLAG(Workingset, workingset, PF_HEAD) __PAGEFLAG(Slab, slab, PF_NO_TAIL) -__PAGEFLAG(SlobFree, slob_free, PF_NO_TAIL) PAGEFLAG(Checked, checked, PF_NO_COMPOUND) /* Used by some filesystems */ /* Xen */ diff --git a/tools/mm/page-types.c b/tools/mm/page-types.c index 381dcc00cb62..8d5595b6c59f 100644 --- a/tools/mm/page-types.c +++ b/tools/mm/page-types.c @@ -85,7 +85,6 @@ */ #define KPF_ANON_EXCLUSIVE 47 #define KPF_READAHEAD 48 -#define KPF_SLOB_FREE 49 #define KPF_SLUB_FROZEN 50 #define KPF_SLUB_DEBUG 51 #define KPF_FILE 61 @@ -141,7 +140,6 @@ static const char * const page_flag_names[] = { [KPF_ANON_EXCLUSIVE] = "d:anon_exclusive", [KPF_READAHEAD] = "I:readahead", - [KPF_SLOB_FREE] = "P:slob_free", [KPF_SLUB_FROZEN] = "A:slub_frozen", [KPF_SLUB_DEBUG] = "E:slub_debug", @@ -478,10 +476,8 @@ static uint64_t expand_overloaded_flags(uint64_t flags, uint64_t pme) if ((flags & BIT(ANON)) && (flags & BIT(MAPPEDTODISK))) flags ^= BIT(MAPPEDTODISK) | BIT(ANON_EXCLUSIVE); - /* SLOB/SLUB overload several page flags */ + /* SLUB overloads several page flags */ if (flags & BIT(SLAB)) { - if (flags & BIT(PRIVATE)) - flags ^= BIT(PRIVATE) | BIT(SLOB_FREE); if (flags & BIT(ACTIVE)) flags ^= BIT(ACTIVE) | BIT(SLUB_FROZEN); if (flags & BIT(ERROR)) -- cgit v1.2.3-70-g09d2 From cd57b77197a434709aec0e7fb8b2e6ec8479aa4e Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 24 Mar 2023 18:01:03 +0000 Subject: ext4: Convert ext4_bio_write_page() to use a folio Remove several calls to compound_head() and the last caller of set_page_writeback_keepwrite(), so remove the wrapper too. Also export bio_add_folio() as this is the first caller from a module. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Ritesh Harjani (IBM) Reviewed-by: Theodore Ts'o Link: https://lore.kernel.org/r/20230324180129.1220691-4-willy@infradead.org Signed-off-by: Theodore Ts'o --- block/bio.c | 1 + fs/ext4/page-io.c | 58 +++++++++++++++++++++------------------------- include/linux/page-flags.h | 5 ---- 3 files changed, 28 insertions(+), 36 deletions(-) (limited to 'include/linux/page-flags.h') diff --git a/block/bio.c b/block/bio.c index fd11614bba4d..043944fd46eb 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1159,6 +1159,7 @@ bool bio_add_folio(struct bio *bio, struct folio *folio, size_t len, return false; return bio_add_page(bio, &folio->page, len, off) > 0; } +EXPORT_SYMBOL(bio_add_folio); void __bio_release_pages(struct bio *bio, bool mark_dirty) { diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 8703fd732abb..7850d2cb2e08 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -409,12 +409,10 @@ static void io_submit_init_bio(struct ext4_io_submit *io, static void io_submit_add_bh(struct ext4_io_submit *io, struct inode *inode, - struct page *pagecache_page, - struct page *bounce_page, + struct folio *folio, + struct folio *io_folio, struct buffer_head *bh) { - int ret; - if (io->io_bio && (bh->b_blocknr != io->io_next_block || !fscrypt_mergeable_bio_bh(io->io_bio, bh))) { submit_and_retry: @@ -422,11 +420,9 @@ submit_and_retry: } if (io->io_bio == NULL) io_submit_init_bio(io, bh); - ret = bio_add_page(io->io_bio, bounce_page ?: pagecache_page, - bh->b_size, bh_offset(bh)); - if (ret != bh->b_size) + if (!bio_add_folio(io->io_bio, io_folio, bh->b_size, bh_offset(bh))) goto submit_and_retry; - wbc_account_cgroup_owner(io->io_wbc, pagecache_page, bh->b_size); + wbc_account_cgroup_owner(io->io_wbc, &folio->page, bh->b_size); io->io_next_block++; } @@ -434,8 +430,9 @@ int ext4_bio_write_page(struct ext4_io_submit *io, struct page *page, int len) { - struct page *bounce_page = NULL; - struct inode *inode = page->mapping->host; + struct folio *folio = page_folio(page); + struct folio *io_folio = folio; + struct inode *inode = folio->mapping->host; unsigned block_start; struct buffer_head *bh, *head; int ret = 0; @@ -443,30 +440,30 @@ int ext4_bio_write_page(struct ext4_io_submit *io, struct writeback_control *wbc = io->io_wbc; bool keep_towrite = false; - BUG_ON(!PageLocked(page)); - BUG_ON(PageWriteback(page)); + BUG_ON(!folio_test_locked(folio)); + BUG_ON(folio_test_writeback(folio)); - ClearPageError(page); + folio_clear_error(folio); /* * Comments copied from block_write_full_page: * - * The page straddles i_size. It must be zeroed out on each and every + * The folio straddles i_size. It must be zeroed out on each and every * writepage invocation because it may be mmapped. "A file is mapped * in multiples of the page size. For a file that is not a multiple of * the page size, the remaining memory is zeroed when mapped, and * writes to that region are not written out to the file." */ - if (len < PAGE_SIZE) - zero_user_segment(page, len, PAGE_SIZE); + if (len < folio_size(folio)) + folio_zero_segment(folio, len, folio_size(folio)); /* * In the first loop we prepare and mark buffers to submit. We have to - * mark all buffers in the page before submitting so that - * end_page_writeback() cannot be called from ext4_end_bio() when IO + * mark all buffers in the folio before submitting so that + * folio_end_writeback() cannot be called from ext4_end_bio() when IO * on the first buffer finishes and we are still working on submitting * the second buffer. */ - bh = head = page_buffers(page); + bh = head = folio_buffers(folio); do { block_start = bh_offset(bh); if (block_start >= len) { @@ -481,14 +478,14 @@ int ext4_bio_write_page(struct ext4_io_submit *io, clear_buffer_dirty(bh); /* * Keeping dirty some buffer we cannot write? Make sure - * to redirty the page and keep TOWRITE tag so that - * racing WB_SYNC_ALL writeback does not skip the page. + * to redirty the folio and keep TOWRITE tag so that + * racing WB_SYNC_ALL writeback does not skip the folio. * This happens e.g. when doing writeout for * transaction commit. */ if (buffer_dirty(bh)) { - if (!PageDirty(page)) - redirty_page_for_writepage(wbc, page); + if (!folio_test_dirty(folio)) + folio_redirty_for_writepage(wbc, folio); keep_towrite = true; } continue; @@ -500,11 +497,11 @@ int ext4_bio_write_page(struct ext4_io_submit *io, nr_to_submit++; } while ((bh = bh->b_this_page) != head); - /* Nothing to submit? Just unlock the page... */ + /* Nothing to submit? Just unlock the folio... */ if (!nr_to_submit) return 0; - bh = head = page_buffers(page); + bh = head = folio_buffers(folio); /* * If any blocks are being written to an encrypted file, encrypt them @@ -516,6 +513,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, if (fscrypt_inode_uses_fs_layer_crypto(inode) && nr_to_submit) { gfp_t gfp_flags = GFP_NOFS; unsigned int enc_bytes = round_up(len, i_blocksize(inode)); + struct page *bounce_page; /* * Since bounce page allocation uses a mempool, we can only use @@ -542,7 +540,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, } printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret); - redirty_page_for_writepage(wbc, page); + folio_redirty_for_writepage(wbc, folio); do { if (buffer_async_write(bh)) { clear_buffer_async_write(bh); @@ -553,18 +551,16 @@ int ext4_bio_write_page(struct ext4_io_submit *io, return ret; } + io_folio = page_folio(bounce_page); } - if (keep_towrite) - set_page_writeback_keepwrite(page); - else - set_page_writeback(page); + __folio_start_writeback(folio, keep_towrite); /* Now submit buffers to write */ do { if (!buffer_async_write(bh)) continue; - io_submit_add_bh(io, inode, page, bounce_page, bh); + io_submit_add_bh(io, inode, folio, io_folio, bh); } while ((bh = bh->b_this_page) != head); return 0; diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index a7e3a3405520..2e00cc14a81c 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -766,11 +766,6 @@ bool set_page_writeback(struct page *page); #define folio_start_writeback_keepwrite(folio) \ __folio_start_writeback(folio, true) -static inline void set_page_writeback_keepwrite(struct page *page) -{ - folio_start_writeback_keepwrite(page_folio(page)); -} - static inline bool test_set_page_writeback(struct page *page) { return set_page_writeback(page); -- cgit v1.2.3-70-g09d2 From 957ebbdf434013ee01f29f6c9174eced995ebbe7 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 27 Mar 2023 16:10:50 +0100 Subject: hugetlb: remove PageHeadHuge() Sidhartha Kumar removed the last caller of PageHeadHuge(), so we can now remove it and make folio_test_hugetlb() the real implementation. Add kernel-doc for folio_test_hugetlb(). Link: https://lkml.kernel.org/r/20230327151050.1787744-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Reviewed-by: David Hildenbrand Reviewed-by: Muchun Song Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 7 +------ mm/hugetlb.c | 18 +++++++++++------- 2 files changed, 12 insertions(+), 13 deletions(-) (limited to 'include/linux/page-flags.h') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index dcda20c47b8f..efc42fae3d96 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -815,14 +815,9 @@ static inline void ClearPageCompound(struct page *page) #ifdef CONFIG_HUGETLB_PAGE int PageHuge(struct page *page); -int PageHeadHuge(struct page *page); -static inline bool folio_test_hugetlb(struct folio *folio) -{ - return PageHeadHuge(&folio->page); -} +bool folio_test_hugetlb(struct folio *folio); #else TESTPAGEFLAG_FALSE(Huge, hugetlb) -TESTPAGEFLAG_FALSE(HeadHuge, headhuge) #endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a58b3739ed4b..7e4a80769c9e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2050,19 +2050,23 @@ int PageHuge(struct page *page) } EXPORT_SYMBOL_GPL(PageHuge); -/* - * PageHeadHuge() only returns true for hugetlbfs head page, but not for - * normal or transparent huge pages. +/** + * folio_test_hugetlb - Determine if the folio belongs to hugetlbfs + * @folio: The folio to test. + * + * Context: Any context. Caller should have a reference on the folio to + * prevent it from being turned into a tail page. + * Return: True for hugetlbfs folios, false for anon folios or folios + * belonging to other filesystems. */ -int PageHeadHuge(struct page *page_head) +bool folio_test_hugetlb(struct folio *folio) { - struct folio *folio = (struct folio *)page_head; if (!folio_test_large(folio)) - return 0; + return false; return folio->_folio_dtor == HUGETLB_PAGE_DTOR; } -EXPORT_SYMBOL_GPL(PageHeadHuge); +EXPORT_SYMBOL_GPL(folio_test_hugetlb); /* * Find and lock address space (mapping) in write mode. -- cgit v1.2.3-70-g09d2 From 44d0fb387b53e56c8a050bac5c7d460e21eb226f Mon Sep 17 00:00:00 2001 From: Ruihan Li Date: Mon, 15 May 2023 21:09:58 +0800 Subject: mm: page_table_check: Ensure user pages are not slab pages The current uses of PageAnon in page table check functions can lead to type confusion bugs between struct page and slab [1], if slab pages are accidentally mapped into the user space. This is because slab reuses the bits in struct page to store its internal states, which renders PageAnon ineffective on slab pages. Since slab pages are not expected to be mapped into the user space, this patch adds BUG_ON(PageSlab(page)) checks to make sure that slab pages are not inadvertently mapped. Otherwise, there must be some bugs in the kernel. Reported-by: syzbot+fcf1a817ceb50935ce99@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/000000000000258e5e05fae79fc1@google.com/ [1] Fixes: df4e817b7108 ("mm: page table check") Cc: # 5.17 Signed-off-by: Ruihan Li Acked-by: Pasha Tatashin Link: https://lore.kernel.org/r/20230515130958.32471-5-lrh2000@pku.edu.cn Signed-off-by: Greg Kroah-Hartman --- include/linux/page-flags.h | 6 ++++++ mm/page_table_check.c | 6 ++++++ 2 files changed, 12 insertions(+) (limited to 'include/linux/page-flags.h') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 1c68d67b832f..92a2063a0a23 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -617,6 +617,12 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) * Please note that, confusingly, "page_mapping" refers to the inode * address_space which maps the page from disk; whereas "page_mapped" * refers to user virtual address space into which the page is mapped. + * + * For slab pages, since slab reuses the bits in struct page to store its + * internal states, the page->mapping does not exist as such, nor do these + * flags below. So in order to avoid testing non-existent bits, please + * make sure that PageSlab(page) actually evaluates to false before calling + * the following functions (e.g., PageAnon). See mm/slab.h. */ #define PAGE_MAPPING_ANON 0x1 #define PAGE_MAPPING_MOVABLE 0x2 diff --git a/mm/page_table_check.c b/mm/page_table_check.c index 25d8610c0042..f2baf97d5f38 100644 --- a/mm/page_table_check.c +++ b/mm/page_table_check.c @@ -71,6 +71,8 @@ static void page_table_check_clear(struct mm_struct *mm, unsigned long addr, page = pfn_to_page(pfn); page_ext = page_ext_get(page); + + BUG_ON(PageSlab(page)); anon = PageAnon(page); for (i = 0; i < pgcnt; i++) { @@ -107,6 +109,8 @@ static void page_table_check_set(struct mm_struct *mm, unsigned long addr, page = pfn_to_page(pfn); page_ext = page_ext_get(page); + + BUG_ON(PageSlab(page)); anon = PageAnon(page); for (i = 0; i < pgcnt; i++) { @@ -133,6 +137,8 @@ void __page_table_check_zero(struct page *page, unsigned int order) struct page_ext *page_ext; unsigned long i; + BUG_ON(PageSlab(page)); + page_ext = page_ext_get(page); BUG_ON(!page_ext); for (i = 0; i < (1ul << order); i++) { -- cgit v1.2.3-70-g09d2