From f099c961f4998ad7107b1c6a7d6efb225e9a4614 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 15 Dec 2023 20:02:32 +0000 Subject: fs: remove clean_page_buffers() Patch series "Clean up the writeback paths". Most of these patches verge on the trivial, converting filesystems that just use block_write_full_page() to use mpage_writepages(). But as we saw with Christoph's earlier patchset, there can be some "interesting" gotchas, and I clearly haven't tested the majority of filesystems I've touched here. Patches 3 & 4 get rid of a lot of stack usage on architectures with larger page sizes; 1024 bytes on 64-bit systems with 64KiB pages. It starts to open the door to larger folio sizes on all architectures, but it's certainly not enough yet. Patch 14 is kind of trivial, but it's nice to get that simplification in. This patch (of 14): This function has been unused since the removal of bdev_write_page(). Link: https://lkml.kernel.org/r/20231215200245.748418-1-willy@infradead.org Link: https://lkml.kernel.org/r/20231215200245.748418-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Jens Axboe Signed-off-by: Andrew Morton --- fs/mpage.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'fs/mpage.c') diff --git a/fs/mpage.c b/fs/mpage.c index ffb064ed9d04..63bf99856024 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -455,16 +455,6 @@ static void clean_buffers(struct page *page, unsigned first_unmapped) try_to_free_buffers(page_folio(page)); } -/* - * For situations where we want to clean all buffers attached to a page. - * We don't need to calculate how many buffers are attached to the page, - * we just need to specify a number larger than the maximum number of buffers. - */ -void clean_page_buffers(struct page *page) -{ - clean_buffers(page, ~0U); -} - static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc, void *data) { -- cgit v1.2.3-70-g09d2 From e8ff8248d37718da1a678648a4485b76f64b9d29 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 15 Dec 2023 20:02:33 +0000 Subject: fs: convert clean_buffers() to take a folio The only caller already has a folio, so pass it in and use it throughout. Saves two calls to compound_head(). Link: https://lkml.kernel.org/r/20231215200245.748418-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Jens Axboe Signed-off-by: Andrew Morton --- fs/mpage.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs/mpage.c') diff --git a/fs/mpage.c b/fs/mpage.c index 63bf99856024..630f4a7c7d03 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -430,13 +430,13 @@ struct mpage_data { * We have our BIO, so we can now mark the buffers clean. Make * sure to only clean buffers which we know we'll be writing. */ -static void clean_buffers(struct page *page, unsigned first_unmapped) +static void clean_buffers(struct folio *folio, unsigned first_unmapped) { unsigned buffer_counter = 0; - struct buffer_head *bh, *head; - if (!page_has_buffers(page)) + struct buffer_head *bh, *head = folio_buffers(folio); + + if (!head) return; - head = page_buffers(page); bh = head; do { @@ -451,8 +451,8 @@ static void clean_buffers(struct page *page, unsigned first_unmapped) * read_folio would fail to serialize with the bh and it would read from * disk before we reach the platter. */ - if (buffer_heads_over_limit && PageUptodate(page)) - try_to_free_buffers(page_folio(page)); + if (buffer_heads_over_limit && folio_test_uptodate(folio)) + try_to_free_buffers(folio); } static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc, @@ -615,7 +615,7 @@ alloc_new: goto alloc_new; } - clean_buffers(&folio->page, first_unmapped); + clean_buffers(folio, first_unmapped); BUG_ON(folio_test_writeback(folio)); folio_start_writeback(folio); -- cgit v1.2.3-70-g09d2 From 6ad7c607b125ce02bd0870f490d990aee8609070 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 15 Dec 2023 20:02:34 +0000 Subject: fs: reduce stack usage in __mpage_writepage Some architectures support a very large PAGE_SIZE, so instead of the 8 pointers we see with a 4kB PAGE_SIZE, we can see 128 pointers with 64kB or so many on Hexagon that it trips compiler warnings about exceeding stack frame size. All we're doing with this array is checking for block contiguity, which we can as well do by remembering the address of the first block in the page and checking this block is at the appropriate offset from that address. Link: https://lkml.kernel.org/r/20231215200245.748418-4-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Jens Axboe Signed-off-by: Andrew Morton --- fs/mpage.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'fs/mpage.c') diff --git a/fs/mpage.c b/fs/mpage.c index 630f4a7c7d03..84b02098e7a5 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -466,7 +466,7 @@ static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc, const unsigned blocks_per_page = PAGE_SIZE >> blkbits; sector_t last_block; sector_t block_in_file; - sector_t blocks[MAX_BUF_PER_PAGE]; + sector_t first_block; unsigned page_block; unsigned first_unmapped = blocks_per_page; struct block_device *bdev = NULL; @@ -504,10 +504,12 @@ static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc, if (!buffer_dirty(bh) || !buffer_uptodate(bh)) goto confused; if (page_block) { - if (bh->b_blocknr != blocks[page_block-1] + 1) + if (bh->b_blocknr != first_block + page_block) goto confused; + } else { + first_block = bh->b_blocknr; } - blocks[page_block++] = bh->b_blocknr; + page_block++; boundary = buffer_boundary(bh); if (boundary) { boundary_block = bh->b_blocknr; @@ -556,10 +558,12 @@ static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc, boundary_bdev = map_bh.b_bdev; } if (page_block) { - if (map_bh.b_blocknr != blocks[page_block-1] + 1) + if (map_bh.b_blocknr != first_block + page_block) goto confused; + } else { + first_block = map_bh.b_blocknr; } - blocks[page_block++] = map_bh.b_blocknr; + page_block++; boundary = buffer_boundary(&map_bh); bdev = map_bh.b_bdev; if (block_in_file == last_block) @@ -591,7 +595,7 @@ page_is_mapped: /* * This page will go to BIO. Do we need to send this BIO off first? */ - if (bio && mpd->last_block_in_bio != blocks[0] - 1) + if (bio && mpd->last_block_in_bio != first_block - 1) bio = mpage_bio_submit_write(bio); alloc_new: @@ -599,7 +603,7 @@ alloc_new: bio = bio_alloc(bdev, BIO_MAX_VECS, REQ_OP_WRITE | wbc_to_write_flags(wbc), GFP_NOFS); - bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9); + bio->bi_iter.bi_sector = first_block << (blkbits - 9); wbc_init_bio(wbc, bio); } @@ -627,7 +631,7 @@ alloc_new: boundary_block, 1 << blkbits); } } else { - mpd->last_block_in_bio = blocks[blocks_per_page - 1]; + mpd->last_block_in_bio = first_block + blocks_per_page - 1; } goto out; -- cgit v1.2.3-70-g09d2 From 12ac5a65cb5612e938abdd58c2dcc9cdc80b6101 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 15 Dec 2023 20:02:35 +0000 Subject: fs: reduce stack usage in do_mpage_readpage Some architectures support a very large PAGE_SIZE, so instead of the 8 pointers we see with a 4kB PAGE_SIZE, we can see 128 pointers with 64kB or so many on Hexagon that it trips compiler warnings about exceeding stack frame size. All we're doing with this array is checking for block contiguity, which we can as well do by remembering the address of the first block in the page and checking this block is at the appropriate offset from that address. Link: https://lkml.kernel.org/r/20231215200245.748418-5-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Jens Axboe Signed-off-by: Andrew Morton --- fs/mpage.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'fs/mpage.c') diff --git a/fs/mpage.c b/fs/mpage.c index 84b02098e7a5..d4963f3d8051 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -166,7 +166,7 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args) sector_t block_in_file; sector_t last_block; sector_t last_block_in_file; - sector_t blocks[MAX_BUF_PER_PAGE]; + sector_t first_block; unsigned page_block; unsigned first_hole = blocks_per_page; struct block_device *bdev = NULL; @@ -205,6 +205,7 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args) unsigned map_offset = block_in_file - args->first_logical_block; unsigned last = nblocks - map_offset; + first_block = map_bh->b_blocknr + map_offset; for (relative_block = 0; ; relative_block++) { if (relative_block == last) { clear_buffer_mapped(map_bh); @@ -212,8 +213,6 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args) } if (page_block == blocks_per_page) break; - blocks[page_block] = map_bh->b_blocknr + map_offset + - relative_block; page_block++; block_in_file++; } @@ -259,7 +258,9 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args) goto confused; /* hole -> non-hole */ /* Contiguous blocks? */ - if (page_block && blocks[page_block-1] != map_bh->b_blocknr-1) + if (!page_block) + first_block = map_bh->b_blocknr; + else if (first_block + page_block != map_bh->b_blocknr) goto confused; nblocks = map_bh->b_size >> blkbits; for (relative_block = 0; ; relative_block++) { @@ -268,7 +269,6 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args) break; } else if (page_block == blocks_per_page) break; - blocks[page_block] = map_bh->b_blocknr+relative_block; page_block++; block_in_file++; } @@ -289,7 +289,7 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args) /* * This folio will go to BIO. Do we need to send this BIO off first? */ - if (args->bio && (args->last_block_in_bio != blocks[0] - 1)) + if (args->bio && (args->last_block_in_bio != first_block - 1)) args->bio = mpage_bio_submit_read(args->bio); alloc_new: @@ -298,7 +298,7 @@ alloc_new: gfp); if (args->bio == NULL) goto confused; - args->bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9); + args->bio->bi_iter.bi_sector = first_block << (blkbits - 9); } length = first_hole << blkbits; @@ -313,7 +313,7 @@ alloc_new: (first_hole != blocks_per_page)) args->bio = mpage_bio_submit_read(args->bio); else - args->last_block_in_bio = blocks[blocks_per_page - 1]; + args->last_block_in_bio = first_block + blocks_per_page - 1; out: return args->bio; -- cgit v1.2.3-70-g09d2 From 17bf23a981be9c6629198a76940c777eb5c8c521 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 15 Dec 2023 20:02:44 +0000 Subject: fs: convert block_write_full_page to block_write_full_folio Convert the function to be compatible with writepage_t so that it can be passed to write_cache_pages() by blkdev. This removes a call to compound_head(). We can also remove the function export as both callers are built-in. Link: https://lkml.kernel.org/r/20231215200245.748418-14-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Jens Axboe Signed-off-by: Andrew Morton --- block/fops.c | 21 ++++++++++++++++++--- fs/buffer.c | 16 +++++++--------- fs/ext4/page-io.c | 2 +- fs/gfs2/aops.c | 4 ++-- fs/mpage.c | 2 +- fs/ntfs/aops.c | 4 ++-- fs/ocfs2/alloc.c | 2 +- fs/ocfs2/file.c | 2 +- include/linux/buffer_head.h | 4 ++-- 9 files changed, 35 insertions(+), 22 deletions(-) (limited to 'fs/mpage.c') diff --git a/block/fops.c b/block/fops.c index 0bdad1e8d514..0cf8cf72cdfa 100644 --- a/block/fops.c +++ b/block/fops.c @@ -410,9 +410,24 @@ static int blkdev_get_block(struct inode *inode, sector_t iblock, return 0; } -static int blkdev_writepage(struct page *page, struct writeback_control *wbc) +/* + * We cannot call mpage_writepages() as it does not take the buffer lock. + * We must use block_write_full_folio() directly which holds the buffer + * lock. The buffer lock provides the synchronisation with writeback + * that filesystems rely on when they use the blockdev's mapping. + */ +static int blkdev_writepages(struct address_space *mapping, + struct writeback_control *wbc) { - return block_write_full_page(page, blkdev_get_block, wbc); + struct blk_plug plug; + int err; + + blk_start_plug(&plug); + err = write_cache_pages(mapping, wbc, block_write_full_folio, + blkdev_get_block); + blk_finish_plug(&plug); + + return err; } static int blkdev_read_folio(struct file *file, struct folio *folio) @@ -449,7 +464,7 @@ const struct address_space_operations def_blk_aops = { .invalidate_folio = block_invalidate_folio, .read_folio = blkdev_read_folio, .readahead = blkdev_readahead, - .writepage = blkdev_writepage, + .writepages = blkdev_writepages, .write_begin = blkdev_write_begin, .write_end = blkdev_write_end, .migrate_folio = buffer_migrate_folio_norefs, diff --git a/fs/buffer.c b/fs/buffer.c index 3a8c8322ed28..c838b4a31009 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -372,7 +372,7 @@ static void end_buffer_async_read_io(struct buffer_head *bh, int uptodate) } /* - * Completion handler for block_write_full_page() - pages which are unlocked + * Completion handler for block_write_full_folio() - pages which are unlocked * during I/O, and which have PageWriteback cleared upon I/O completion. */ void end_buffer_async_write(struct buffer_head *bh, int uptodate) @@ -1771,18 +1771,18 @@ static struct buffer_head *folio_create_buffers(struct folio *folio, */ /* - * While block_write_full_page is writing back the dirty buffers under + * While block_write_full_folio is writing back the dirty buffers under * the page lock, whoever dirtied the buffers may decide to clean them * again at any time. We handle that by only looking at the buffer * state inside lock_buffer(). * - * If block_write_full_page() is called for regular writeback + * If block_write_full_folio() is called for regular writeback * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a * locked buffer. This only can happen if someone has written the buffer * directly, with submit_bh(). At the address_space level PageWriteback * prevents this contention from occurring. * - * If block_write_full_page() is called with wbc->sync_mode == + * If block_write_full_folio() is called with wbc->sync_mode == * WB_SYNC_ALL, the writes are posted using REQ_SYNC; this * causes the writes to be flagged as synchronous writes. */ @@ -1829,7 +1829,7 @@ int __block_write_full_folio(struct inode *inode, struct folio *folio, * truncate in progress. */ /* - * The buffer was zeroed by block_write_full_page() + * The buffer was zeroed by block_write_full_folio() */ clear_buffer_dirty(bh); set_buffer_uptodate(bh); @@ -2696,10 +2696,9 @@ EXPORT_SYMBOL(block_truncate_page); /* * The generic ->writepage function for buffer-backed address_spaces */ -int block_write_full_page(struct page *page, get_block_t *get_block, - struct writeback_control *wbc) +int block_write_full_folio(struct folio *folio, struct writeback_control *wbc, + void *get_block) { - struct folio *folio = page_folio(page); struct inode * const inode = folio->mapping->host; loff_t i_size = i_size_read(inode); @@ -2726,7 +2725,6 @@ int block_write_full_page(struct page *page, get_block_t *get_block, return __block_write_full_folio(inode, folio, get_block, wbc, end_buffer_async_write); } -EXPORT_SYMBOL(block_write_full_page); sector_t generic_block_bmap(struct address_space *mapping, sector_t block, get_block_t *get_block) diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index dfdd7e5cf038..312bc6813357 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -444,7 +444,7 @@ int ext4_bio_write_folio(struct ext4_io_submit *io, struct folio *folio, folio_clear_error(folio); /* - * Comments copied from block_write_full_page: + * Comments copied from block_write_full_folio: * * The folio straddles i_size. It must be zeroed out on each and every * writepage invocation because it may be mmapped. "A file is mapped diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 5cffb079b87c..f986cd032b76 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -82,11 +82,11 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock, } /** - * gfs2_write_jdata_folio - gfs2 jdata-specific version of block_write_full_page + * gfs2_write_jdata_folio - gfs2 jdata-specific version of block_write_full_folio * @folio: The folio to write * @wbc: The writeback control * - * This is the same as calling block_write_full_page, but it also + * This is the same as calling block_write_full_folio, but it also * writes pages outside of i_size */ static int gfs2_write_jdata_folio(struct folio *folio, diff --git a/fs/mpage.c b/fs/mpage.c index d4963f3d8051..738882e0766d 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -642,7 +642,7 @@ confused: /* * The caller has a ref on the inode, so *mapping is stable */ - ret = block_write_full_page(&folio->page, mpd->get_block, wbc); + ret = block_write_full_folio(folio, wbc, mpd->get_block); mapping_set_error(mapping, ret); out: mpd->bio = bio; diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 70479ce915e8..6c414957e2c2 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -1304,7 +1304,7 @@ done: * page cleaned. The VM has already locked the page and marked it clean. * * For non-resident attributes, ntfs_writepage() writes the @page by calling - * the ntfs version of the generic block_write_full_page() function, + * the ntfs version of the generic block_write_full_folio() function, * ntfs_write_block(), which in turn if necessary creates and writes the * buffers associated with the page asynchronously. * @@ -1314,7 +1314,7 @@ done: * vfs inode dirty code path for the inode the mft record belongs to or via the * vm page dirty code path for the page the mft record is in. * - * Based on ntfs_read_folio() and fs/buffer.c::block_write_full_page(). + * Based on ntfs_read_folio() and fs/buffer.c::block_write_full_folio(). * * Return 0 on success and -errno on error. */ diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 91b32b2377ac..ea9127ba3208 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -6934,7 +6934,7 @@ static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end, * nonzero data on subsequent file extends. * * We need to call this before i_size is updated on the inode because - * otherwise block_write_full_page() will skip writeout of pages past + * otherwise block_write_full_folio() will skip writeout of pages past * i_size. */ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle, diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 94e2a1244442..8b6d15010703 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -818,7 +818,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, /* * fs-writeback will release the dirty pages without page lock * whose offset are over inode size, the release happens at - * block_write_full_page(). + * block_write_full_folio(). */ i_size_write(inode, abs_to); inode->i_blocks = ocfs2_inode_sector_count(inode); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 94f6161eb45e..396b2adf24bf 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -252,8 +252,8 @@ void __bh_read_batch(int nr, struct buffer_head *bhs[], * address_spaces. */ void block_invalidate_folio(struct folio *folio, size_t offset, size_t length); -int block_write_full_page(struct page *page, get_block_t *get_block, - struct writeback_control *wbc); +int block_write_full_folio(struct folio *folio, struct writeback_control *wbc, + void *get_block); int __block_write_full_folio(struct inode *inode, struct folio *folio, get_block_t *get_block, struct writeback_control *wbc, bh_end_io_t *handler); -- cgit v1.2.3-70-g09d2