diff options
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 810 |
1 files changed, 302 insertions, 508 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index bf0b7dea4900..8dbd352e3986 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -136,7 +136,6 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, new_size); } -static int __ext4_journalled_writepage(struct page *page, unsigned int len); static int ext4_meta_trans_blocks(struct inode *inode, int lblocks, int pextents); @@ -180,33 +179,6 @@ void ext4_evict_inode(struct inode *inode) if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL) ext4_evict_ea_inode(inode); if (inode->i_nlink) { - /* - * When journalling data dirty buffers are tracked only in the - * journal. So although mm thinks everything is clean and - * ready for reaping the inode might still have some pages to - * write in the running transaction or waiting to be - * checkpointed. Thus calling jbd2_journal_invalidate_folio() - * (via truncate_inode_pages()) to discard these buffers can - * cause data loss. Also even if we did not discard these - * buffers, we would have no way to find them after the inode - * is reaped and thus user could see stale data if he tries to - * read them before the transaction is checkpointed. So be - * careful and force everything to disk here... We use - * ei->i_datasync_tid to store the newest transaction - * containing inode's data. - * - * Note that directories do not have this problem because they - * don't use page cache. - */ - if (inode->i_ino != EXT4_JOURNAL_INO && - ext4_should_journal_data(inode) && - S_ISREG(inode->i_mode) && inode->i_data.nrpages) { - journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; - tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; - - jbd2_complete_transaction(journal, commit_tid); - filemap_write_and_wait(&inode->i_data); - } truncate_inode_pages_final(&inode->i_data); goto no_delete; @@ -1005,29 +977,17 @@ int ext4_walk_page_buffers(handle_t *handle, struct inode *inode, } /* - * To preserve ordering, it is essential that the hole instantiation and - * the data write be encapsulated in a single transaction. We cannot - * close off a transaction and start a new one between the ext4_get_block() - * and the commit_write(). So doing the jbd2_journal_start at the start of - * prepare_write() is the right place. - * - * Also, this function can nest inside ext4_writepage(). In that case, we - * *know* that ext4_writepage() has generated enough buffer credits to do the - * whole page. So we won't block on the journal in that case, which is good, - * because the caller may be PF_MEMALLOC. - * - * By accident, ext4 can be reentered when a transaction is open via - * quota file writes. If we were to commit the transaction while thus - * reentered, there can be a deadlock - we would be holding a quota - * lock, and the commit would never complete if another thread had a - * transaction open and was blocking on the quota lock - a ranking - * violation. - * - * So what we do is to rely on the fact that jbd2_journal_stop/journal_start - * will _not_ run commit under these circumstances because handle->h_ref - * is elevated. We'll still have enough credits for the tiny quotafile - * write. + * Helper for handling dirtying of journalled data. We also mark the folio as + * dirty so that writeback code knows about this page (and inode) contains + * dirty data. ext4_writepages() then commits appropriate transaction to + * make data stable. */ +static int ext4_dirty_journalled_data(handle_t *handle, struct buffer_head *bh) +{ + folio_mark_dirty(bh->b_folio); + return ext4_handle_dirty_metadata(handle, NULL, bh); +} + int do_journal_get_write_access(handle_t *handle, struct inode *inode, struct buffer_head *bh) { @@ -1050,17 +1010,17 @@ int do_journal_get_write_access(handle_t *handle, struct inode *inode, ret = ext4_journal_get_write_access(handle, inode->i_sb, bh, EXT4_JTR_NONE); if (!ret && dirty) - ret = ext4_handle_dirty_metadata(handle, NULL, bh); + ret = ext4_dirty_journalled_data(handle, bh); return ret; } #ifdef CONFIG_FS_ENCRYPTION -static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, +static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len, get_block_t *get_block) { unsigned from = pos & (PAGE_SIZE - 1); unsigned to = from + len; - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; unsigned block_start, block_end; sector_t block; int err = 0; @@ -1070,22 +1030,24 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, int nr_wait = 0; int i; - BUG_ON(!PageLocked(page)); + BUG_ON(!folio_test_locked(folio)); BUG_ON(from > PAGE_SIZE); BUG_ON(to > PAGE_SIZE); BUG_ON(from > to); - if (!page_has_buffers(page)) - create_empty_buffers(page, blocksize, 0); - head = page_buffers(page); + head = folio_buffers(folio); + if (!head) { + create_empty_buffers(&folio->page, blocksize, 0); + head = folio_buffers(folio); + } bbits = ilog2(blocksize); - block = (sector_t)page->index << (PAGE_SHIFT - bbits); + block = (sector_t)folio->index << (PAGE_SHIFT - bbits); for (bh = head, block_start = 0; bh != head || !block_start; block++, block_start = block_end, bh = bh->b_this_page) { block_end = block_start + blocksize; if (block_end <= from || block_start >= to) { - if (PageUptodate(page)) { + if (folio_test_uptodate(folio)) { set_buffer_uptodate(bh); } continue; @@ -1098,19 +1060,20 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, if (err) break; if (buffer_new(bh)) { - if (PageUptodate(page)) { + if (folio_test_uptodate(folio)) { clear_buffer_new(bh); set_buffer_uptodate(bh); mark_buffer_dirty(bh); continue; } if (block_end > to || block_start < from) - zero_user_segments(page, to, block_end, - block_start, from); + folio_zero_segments(folio, to, + block_end, + block_start, from); continue; } } - if (PageUptodate(page)) { + if (folio_test_uptodate(folio)) { set_buffer_uptodate(bh); continue; } @@ -1130,14 +1093,13 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, err = -EIO; } if (unlikely(err)) { - page_zero_new_buffers(page, from, to); + page_zero_new_buffers(&folio->page, from, to); } else if (fscrypt_inode_uses_fs_layer_crypto(inode)) { for (i = 0; i < nr_wait; i++) { int err2; - err2 = fscrypt_decrypt_pagecache_blocks(page_folio(page), - blocksize, - bh_offset(wait[i])); + err2 = fscrypt_decrypt_pagecache_blocks(folio, + blocksize, bh_offset(wait[i])); if (err2) { clear_buffer_uptodate(wait[i]); err = err2; @@ -1149,6 +1111,13 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, } #endif +/* + * To preserve ordering, it is essential that the hole instantiation and + * the data write be encapsulated in a single transaction. We cannot + * close off a transaction and start a new one between the ext4_get_block() + * and the ext4_write_end(). So doing the jbd2_journal_start at the start of + * ext4_write_begin() is the right place. + */ static int ext4_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, struct page **pagep, void **fsdata) @@ -1157,7 +1126,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, int ret, needed_blocks; handle_t *handle; int retries = 0; - struct page *page; + struct folio *folio; pgoff_t index; unsigned from, to; @@ -1184,68 +1153,68 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, } /* - * grab_cache_page_write_begin() can take a long time if the - * system is thrashing due to memory pressure, or if the page + * __filemap_get_folio() can take a long time if the + * system is thrashing due to memory pressure, or if the folio * is being written back. So grab it first before we start * the transaction handle. This also allows us to allocate - * the page (if needed) without using GFP_NOFS. + * the folio (if needed) without using GFP_NOFS. */ retry_grab: - page = grab_cache_page_write_begin(mapping, index); - if (!page) + folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, + mapping_gfp_mask(mapping)); + if (!folio) return -ENOMEM; /* * The same as page allocation, we prealloc buffer heads before * starting the handle. */ - if (!page_has_buffers(page)) - create_empty_buffers(page, inode->i_sb->s_blocksize, 0); + if (!folio_buffers(folio)) + create_empty_buffers(&folio->page, inode->i_sb->s_blocksize, 0); - unlock_page(page); + folio_unlock(folio); retry_journal: handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks); if (IS_ERR(handle)) { - put_page(page); + folio_put(folio); return PTR_ERR(handle); } - lock_page(page); - if (page->mapping != mapping) { - /* The page got truncated from under us */ - unlock_page(page); - put_page(page); + folio_lock(folio); + if (folio->mapping != mapping) { + /* The folio got truncated from under us */ + folio_unlock(folio); + folio_put(folio); ext4_journal_stop(handle); goto retry_grab; } - /* In case writeback began while the page was unlocked */ - wait_for_stable_page(page); + /* In case writeback began while the folio was unlocked */ + folio_wait_stable(folio); #ifdef CONFIG_FS_ENCRYPTION if (ext4_should_dioread_nolock(inode)) - ret = ext4_block_write_begin(page, pos, len, + ret = ext4_block_write_begin(folio, pos, len, ext4_get_block_unwritten); else - ret = ext4_block_write_begin(page, pos, len, - ext4_get_block); + ret = ext4_block_write_begin(folio, pos, len, ext4_get_block); #else if (ext4_should_dioread_nolock(inode)) - ret = __block_write_begin(page, pos, len, + ret = __block_write_begin(&folio->page, pos, len, ext4_get_block_unwritten); else - ret = __block_write_begin(page, pos, len, ext4_get_block); + ret = __block_write_begin(&folio->page, pos, len, ext4_get_block); #endif if (!ret && ext4_should_journal_data(inode)) { ret = ext4_walk_page_buffers(handle, inode, - page_buffers(page), from, to, NULL, - do_journal_get_write_access); + folio_buffers(folio), from, to, + NULL, do_journal_get_write_access); } if (ret) { bool extended = (pos + len > inode->i_size) && !ext4_verity_in_progress(inode); - unlock_page(page); + folio_unlock(folio); /* * __block_write_begin may have instantiated a few blocks * outside i_size. Trim these off again. Don't need @@ -1273,10 +1242,10 @@ retry_journal: if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry_journal; - put_page(page); + folio_put(folio); return ret; } - *pagep = page; + *pagep = &folio->page; return ret; } @@ -1288,7 +1257,7 @@ static int write_end_fn(handle_t *handle, struct inode *inode, if (!buffer_mapped(bh) || buffer_freed(bh)) return 0; set_buffer_uptodate(bh); - ret = ext4_handle_dirty_metadata(handle, NULL, bh); + ret = ext4_dirty_journalled_data(handle, bh); clear_buffer_meta(bh); clear_buffer_prio(bh); return ret; @@ -1306,6 +1275,7 @@ static int ext4_write_end(struct file *file, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) { + struct folio *folio = page_folio(page); handle_t *handle = ext4_journal_current_handle(); struct inode *inode = mapping->host; loff_t old_size = inode->i_size; @@ -1321,7 +1291,7 @@ static int ext4_write_end(struct file *file, copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); /* - * it's important to update i_size while still holding page lock: + * it's important to update i_size while still holding folio lock: * page writeout could otherwise come in and zero beyond i_size. * * If FS_IOC_ENABLE_VERITY is running on this inode, then Merkle tree @@ -1329,15 +1299,15 @@ static int ext4_write_end(struct file *file, */ if (!verity) i_size_changed = ext4_update_inode_size(inode, pos + copied); - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); if (old_size < pos && !verity) pagecache_isize_extended(inode, old_size, pos); /* - * Don't mark the inode dirty under page lock. First, it unnecessarily - * makes the holding time of page lock longer. Second, it forces lock - * ordering of page lock and transaction start for journaling + * Don't mark the inode dirty under folio lock. First, it unnecessarily + * makes the holding time of folio lock longer. Second, it forces lock + * ordering of folio lock and transaction start for journaling * filesystems. */ if (i_size_changed) @@ -1371,28 +1341,28 @@ static int ext4_write_end(struct file *file, /* * This is a private version of page_zero_new_buffers() which doesn't * set the buffer to be dirty, since in data=journalled mode we need - * to call ext4_handle_dirty_metadata() instead. + * to call ext4_dirty_journalled_data() instead. */ static void ext4_journalled_zero_new_buffers(handle_t *handle, struct inode *inode, - struct page *page, + struct folio *folio, unsigned from, unsigned to) { unsigned int block_start = 0, block_end; struct buffer_head *head, *bh; - bh = head = page_buffers(page); + bh = head = folio_buffers(folio); do { block_end = block_start + bh->b_size; if (buffer_new(bh)) { if (block_end > from && block_start < to) { - if (!PageUptodate(page)) { + if (!folio_test_uptodate(folio)) { unsigned start, size; start = max(from, block_start); size = min(to, block_end) - start; - zero_user(page, start, size); + folio_zero_range(folio, start, size); write_end_fn(handle, inode, bh); } clear_buffer_new(bh); @@ -1408,6 +1378,7 @@ static int ext4_journalled_write_end(struct file *file, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) { + struct folio *folio = page_folio(page); handle_t *handle = ext4_journal_current_handle(); struct inode *inode = mapping->host; loff_t old_size = inode->i_size; @@ -1426,25 +1397,26 @@ static int ext4_journalled_write_end(struct file *file, if (ext4_has_inline_data(inode)) return ext4_write_inline_data_end(inode, pos, len, copied, page); - if (unlikely(copied < len) && !PageUptodate(page)) { + if (unlikely(copied < len) && !folio_test_uptodate(folio)) { copied = 0; - ext4_journalled_zero_new_buffers(handle, inode, page, from, to); + ext4_journalled_zero_new_buffers(handle, inode, folio, + from, to); } else { if (unlikely(copied < len)) - ext4_journalled_zero_new_buffers(handle, inode, page, + ext4_journalled_zero_new_buffers(handle, inode, folio, from + copied, to); - ret = ext4_walk_page_buffers(handle, inode, page_buffers(page), + ret = ext4_walk_page_buffers(handle, inode, + folio_buffers(folio), from, from + copied, &partial, write_end_fn); if (!partial) - SetPageUptodate(page); + folio_mark_uptodate(folio); } if (!verity) size_changed = ext4_update_inode_size(inode, pos + copied); - ext4_set_inode_state(inode, EXT4_STATE_JDATA); EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); if (old_size < pos && !verity) pagecache_isize_extended(inode, old_size, pos); @@ -1568,6 +1540,7 @@ struct mpage_da_data { struct ext4_io_submit io_submit; /* IO submission data */ unsigned int do_map:1; unsigned int scanned_until_end:1; + unsigned int journalled_more_data:1; }; static void mpage_release_unused_pages(struct mpage_da_data *mpd, @@ -1649,12 +1622,6 @@ static void ext4_print_free_blocks(struct inode *inode) return; } -static int ext4_bh_delay_or_unwritten(handle_t *handle, struct inode *inode, - struct buffer_head *bh) -{ - return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh); -} - /* * ext4_insert_delayed_block - adds a delayed block to the extents status * tree, incrementing the reserved cluster/block @@ -1887,249 +1854,41 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, return 0; } -static int __ext4_journalled_writepage(struct page *page, - unsigned int len) +static void mpage_folio_done(struct mpage_da_data *mpd, struct folio *folio) { - struct address_space *mapping = page->mapping; - struct inode *inode = mapping->host; - handle_t *handle = NULL; - int ret = 0, err = 0; - int inline_data = ext4_has_inline_data(inode); - struct buffer_head *inode_bh = NULL; - loff_t size; - - ClearPageChecked(page); - - if (inline_data) { - BUG_ON(page->index != 0); - BUG_ON(len > ext4_get_max_inline_size(inode)); - inode_bh = ext4_journalled_write_inline_data(inode, len, page); - if (inode_bh == NULL) - goto out; - } - /* - * We need to release the page lock before we start the - * journal, so grab a reference so the page won't disappear - * out from under us. - */ - get_page(page); - unlock_page(page); - - handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, - ext4_writepage_trans_blocks(inode)); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - put_page(page); - goto out_no_pagelock; - } - BUG_ON(!ext4_handle_valid(handle)); - - lock_page(page); - put_page(page); - size = i_size_read(inode); - if (page->mapping != mapping || page_offset(page) > size) { - /* The page got truncated from under us */ - ext4_journal_stop(handle); - ret = 0; - goto out; - } - - if (inline_data) { - ret = ext4_mark_inode_dirty(handle, inode); - } else { - struct buffer_head *page_bufs = page_buffers(page); - - if (page->index == size >> PAGE_SHIFT) - len = size & ~PAGE_MASK; - else - len = PAGE_SIZE; - - ret = ext4_walk_page_buffers(handle, inode, page_bufs, 0, len, - NULL, do_journal_get_write_access); - - err = ext4_walk_page_buffers(handle, inode, page_bufs, 0, len, - NULL, write_end_fn); - } - if (ret == 0) - ret = err; - err = ext4_jbd2_inode_add_write(handle, inode, page_offset(page), len); - if (ret == 0) - ret = err; - EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; - err = ext4_journal_stop(handle); - if (!ret) - ret = err; - - ext4_set_inode_state(inode, EXT4_STATE_JDATA); -out: - unlock_page(page); -out_no_pagelock: - brelse(inode_bh); - return ret; + mpd->first_page += folio_nr_pages(folio); + folio_unlock(folio); } -/* - * Note that we don't need to start a transaction unless we're journaling data - * because we should have holes filled from ext4_page_mkwrite(). We even don't - * need to file the inode to the transaction's list in ordered mode because if - * we are writing back data added by write(), the inode is already there and if - * we are writing back data modified via mmap(), no one guarantees in which - * transaction the data will hit the disk. In case we are journaling data, we - * cannot start transaction directly because transaction start ranks above page - * lock so we have to do some magic. - * - * This function can get called via... - * - ext4_writepages after taking page lock (have journal handle) - * - journal_submit_inode_data_buffers (no journal handle) - * - shrink_page_list via the kswapd/direct reclaim (no journal handle) - * - grab_page_cache when doing write_begin (have journal handle) - * - * We don't do any block allocation in this function. If we have page with - * multiple blocks we need to write those buffer_heads that are mapped. This - * is important for mmaped based write. So if we do with blocksize 1K - * truncate(f, 1024); - * a = mmap(f, 0, 4096); - * a[0] = 'a'; - * truncate(f, 4096); - * we have in the page first buffer_head mapped via page_mkwrite call back - * but other buffer_heads would be unmapped but dirty (dirty done via the - * do_wp_page). So writepage should write the first block. If we modify - * the mmap area beyond 1024 we will again get a page_fault and the - * page_mkwrite callback will do the block allocation and mark the - * buffer_heads mapped. - * - * We redirty the page if we have any buffer_heads that is either delay or - * unwritten in the page. - * - * We can get recursively called as show below. - * - * ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() -> - * ext4_writepage() - * - * But since we don't do any block allocation we should not deadlock. - * Page also have the dirty flag cleared so we don't get recurive page_lock. - */ -static int ext4_writepage(struct page *page, - struct writeback_control *wbc) +static int mpage_submit_folio(struct mpage_da_data *mpd, struct folio *folio) { - struct folio *folio = page_folio(page); - int ret = 0; - loff_t size; - unsigned int len; - struct buffer_head *page_bufs = NULL; - struct inode *inode = page->mapping->host; - struct ext4_io_submit io_submit; - - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) { - folio_invalidate(folio, 0, folio_size(folio)); - folio_unlock(folio); - return -EIO; - } - - trace_ext4_writepage(page); - size = i_size_read(inode); - if (page->index == size >> PAGE_SHIFT && - !ext4_verity_in_progress(inode)) - len = size & ~PAGE_MASK; - else - len = PAGE_SIZE; - - /* Should never happen but for bugs in other kernel subsystems */ - if (!page_has_buffers(page)) { - ext4_warning_inode(inode, - "page %lu does not have buffers attached", page->index); - ClearPageDirty(page); - unlock_page(page); - return 0; - } - - page_bufs = page_buffers(page); - /* - * We cannot do block allocation or other extent handling in this - * function. If there are buffers needing that, we have to redirty - * the page. But we may reach here when we do a journal commit via - * journal_submit_inode_data_buffers() and in that case we must write - * allocated buffers to achieve data=ordered mode guarantees. - * - * Also, if there is only one buffer per page (the fs block - * size == the page size), if one buffer needs block - * allocation or needs to modify the extent tree to clear the - * unwritten flag, we know that the page can't be written at - * all, so we might as well refuse the write immediately. - * Unfortunately if the block size != page size, we can't as - * easily detect this case using ext4_walk_page_buffers(), but - * for the extremely common case, this is an optimization that - * skips a useless round trip through ext4_bio_write_page(). - */ - if (ext4_walk_page_buffers(NULL, inode, page_bufs, 0, len, NULL, - ext4_bh_delay_or_unwritten)) { - redirty_page_for_writepage(wbc, page); - if ((current->flags & PF_MEMALLOC) || - (inode->i_sb->s_blocksize == PAGE_SIZE)) { - /* - * For memory cleaning there's no point in writing only - * some buffers. So just bail out. Warn if we came here - * from direct reclaim. - */ - WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) - == PF_MEMALLOC); - unlock_page(page); - return 0; - } - } - - if (PageChecked(page) && ext4_should_journal_data(inode)) - /* - * It's mmapped pagecache. Add buffers and journal it. There - * doesn't seem much point in redirtying the page here. - */ - return __ext4_journalled_writepage(page, len); - - ext4_io_submit_init(&io_submit, wbc); - io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS); - if (!io_submit.io_end) { - redirty_page_for_writepage(wbc, page); - unlock_page(page); - return -ENOMEM; - } - ret = ext4_bio_write_page(&io_submit, page, len); - ext4_io_submit(&io_submit); - /* Drop io_end reference we got from init */ - ext4_put_io_end_defer(io_submit.io_end); - return ret; -} - -static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) -{ - int len; + size_t len; loff_t size; int err; - BUG_ON(page->index != mpd->first_page); - clear_page_dirty_for_io(page); + BUG_ON(folio->index != mpd->first_page); + folio_clear_dirty_for_io(folio); /* * We have to be very careful here! Nothing protects writeback path * against i_size changes and the page can be writeably mapped into * page tables. So an application can be growing i_size and writing - * data through mmap while writeback runs. clear_page_dirty_for_io() + * data through mmap while writeback runs. folio_clear_dirty_for_io() * write-protects our page in page tables and the page cannot get - * written to again until we release page lock. So only after - * clear_page_dirty_for_io() we are safe to sample i_size for - * ext4_bio_write_page() to zero-out tail of the written page. We rely - * on the barrier provided by TestClearPageDirty in - * clear_page_dirty_for_io() to make sure i_size is really sampled only + * written to again until we release folio lock. So only after + * folio_clear_dirty_for_io() we are safe to sample i_size for + * ext4_bio_write_folio() to zero-out tail of the written page. We rely + * on the barrier provided by folio_test_clear_dirty() in + * folio_clear_dirty_for_io() to make sure i_size is really sampled only * after page tables are updated. */ size = i_size_read(mpd->inode); - if (page->index == size >> PAGE_SHIFT && + len = folio_size(folio); + if (folio_pos(folio) + len > size && !ext4_verity_in_progress(mpd->inode)) len = size & ~PAGE_MASK; - else - len = PAGE_SIZE; - err = ext4_bio_write_page(&mpd->io_submit, page, len); + err = ext4_bio_write_folio(&mpd->io_submit, folio, len); if (!err) mpd->wbc->nr_to_write--; - mpd->first_page++; return err; } @@ -2240,9 +1999,10 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd, } while (lblk++, (bh = bh->b_this_page) != head); /* So far everything mapped? Submit the page for IO. */ if (mpd->map.m_len == 0) { - err = mpage_submit_page(mpd, head->b_page); + err = mpage_submit_folio(mpd, head->b_folio); if (err < 0) return err; + mpage_folio_done(mpd, head->b_folio); } if (lblk >= blocks) { mpd->scanned_until_end = 1; @@ -2252,21 +2012,22 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd, } /* - * mpage_process_page - update page buffers corresponding to changed extent and - * may submit fully mapped page for IO - * - * @mpd - description of extent to map, on return next extent to map - * @m_lblk - logical block mapping. - * @m_pblk - corresponding physical mapping. - * @map_bh - determines on return whether this page requires any further + * mpage_process_folio - update folio buffers corresponding to changed extent + * and may submit fully mapped page for IO + * @mpd: description of extent to map, on return next extent to map + * @folio: Contains these buffers. + * @m_lblk: logical block mapping. + * @m_pblk: corresponding physical mapping. + * @map_bh: determines on return whether this page requires any further * mapping or not. - * Scan given page buffers corresponding to changed extent and update buffer + * + * Scan given folio buffers corresponding to changed extent and update buffer * state according to new extent state. * We map delalloc buffers to their physical location, clear unwritten bits. - * If the given page is not fully mapped, we update @map to the next extent in - * the given page that needs mapping & return @map_bh as true. + * If the given folio is not fully mapped, we update @mpd to the next extent in + * the given folio that needs mapping & return @map_bh as true. */ -static int mpage_process_page(struct mpage_da_data *mpd, struct page *page, +static int mpage_process_folio(struct mpage_da_data *mpd, struct folio *folio, ext4_lblk_t *m_lblk, ext4_fsblk_t *m_pblk, bool *map_bh) { @@ -2279,14 +2040,14 @@ static int mpage_process_page(struct mpage_da_data *mpd, struct page *page, ssize_t io_end_size = 0; struct ext4_io_end_vec *io_end_vec = ext4_last_io_end_vec(io_end); - bh = head = page_buffers(page); + bh = head = folio_buffers(folio); do { if (lblk < mpd->map.m_lblk) continue; if (lblk >= mpd->map.m_lblk + mpd->map.m_len) { /* * Buffer after end of mapped extent. - * Find next buffer in the page to map. + * Find next buffer in the folio to map. */ mpd->map.m_len = 0; mpd->map.m_flags = 0; @@ -2359,9 +2120,9 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd) if (nr == 0) break; for (i = 0; i < nr; i++) { - struct page *page = &fbatch.folios[i]->page; + struct folio *folio = fbatch.folios[i]; - err = mpage_process_page(mpd, page, &lblk, &pblock, + err = mpage_process_folio(mpd, folio, &lblk, &pblock, &map_bh); /* * If map_bh is true, means page may require further bh @@ -2371,9 +2132,10 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd) if (err < 0 || map_bh) goto out; /* Page fully mapped - let IO run! */ - err = mpage_submit_page(mpd, page); + err = mpage_submit_folio(mpd, folio); if (err < 0) goto out; + mpage_folio_done(mpd, folio); } folio_batch_release(&fbatch); } @@ -2559,17 +2321,45 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) MAX_WRITEPAGES_EXTENT_LEN + bpp - 1, bpp); } -/* Return true if the page needs to be written as part of transaction commit */ -static bool ext4_page_nomap_can_writeout(struct page *page) +static int ext4_journal_page_buffers(handle_t *handle, struct page *page, + int len) { - struct buffer_head *bh, *head; + struct buffer_head *page_bufs = page_buffers(page); + struct inode *inode = page->mapping->host; + int ret, err; - bh = head = page_buffers(page); - do { - if (buffer_dirty(bh) && buffer_mapped(bh) && !buffer_delay(bh)) - return true; - } while ((bh = bh->b_this_page) != head); - return false; + ret = ext4_walk_page_buffers(handle, inode, page_bufs, 0, len, + NULL, do_journal_get_write_access); + err = ext4_walk_page_buffers(handle, inode, page_bufs, 0, len, + NULL, write_end_fn); + if (ret == 0) + ret = err; + err = ext4_jbd2_inode_add_write(handle, inode, page_offset(page), len); + if (ret == 0) + ret = err; + EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; + + return ret; +} + +static int mpage_journal_page_buffers(handle_t *handle, + struct mpage_da_data *mpd, + struct page *page) +{ + struct inode *inode = mpd->inode; + loff_t size = i_size_read(inode); + int len; + + ClearPageChecked(page); + mpd->wbc->nr_to_write--; + + if (page->index == size >> PAGE_SHIFT && + !ext4_verity_in_progress(inode)) + len = size & ~PAGE_MASK; + else + len = PAGE_SIZE; + + return ext4_journal_page_buffers(handle, page, len); } /* @@ -2597,7 +2387,6 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) struct address_space *mapping = mpd->inode->i_mapping; struct folio_batch fbatch; unsigned int nr_folios; - long left = mpd->wbc->nr_to_write; pgoff_t index = mpd->first_page; pgoff_t end = mpd->last_page; xa_mark_t tag; @@ -2605,14 +2394,23 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) int blkbits = mpd->inode->i_blkbits; ext4_lblk_t lblk; struct buffer_head *head; + handle_t *handle = NULL; + int bpp = ext4_journal_blocks_per_page(mpd->inode); if (mpd->wbc->sync_mode == WB_SYNC_ALL || mpd->wbc->tagged_writepages) tag = PAGECACHE_TAG_TOWRITE; else tag = PAGECACHE_TAG_DIRTY; - folio_batch_init(&fbatch); + mpd->map.m_len = 0; mpd->next_page = index; + if (ext4_should_journal_data(mpd->inode)) { + handle = ext4_journal_start(mpd->inode, EXT4_HT_WRITE_PAGE, + bpp); + if (IS_ERR(handle)) + return PTR_ERR(handle); + } + folio_batch_init(&fbatch); while (index <= end) { nr_folios = filemap_get_folios_tag(mapping, &index, end, tag, &fbatch); @@ -2630,13 +2428,22 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) * newly appeared dirty pages, but have not synced all * of the old dirty pages. */ - if (mpd->wbc->sync_mode == WB_SYNC_NONE && left <= 0) + if (mpd->wbc->sync_mode == WB_SYNC_NONE && + mpd->wbc->nr_to_write <= + mpd->map.m_len >> (PAGE_SHIFT - blkbits)) goto out; /* If we can't merge this page, we are done. */ if (mpd->map.m_len > 0 && mpd->next_page != folio->index) goto out; + if (handle) { + err = ext4_journal_ensure_credits(handle, bpp, + 0); + if (err < 0) + goto out; + } + folio_lock(folio); /* * If the page is no longer dirty, or its mapping no @@ -2676,18 +2483,28 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) mpd->first_page = folio->index; mpd->next_page = folio->index + folio_nr_pages(folio); /* - * Writeout for transaction commit where we cannot - * modify metadata is simple. Just submit the page. + * Writeout when we cannot modify metadata is simple. + * Just submit the page. For data=journal mode we + * first handle writeout of the page for checkpoint and + * only after that handle delayed page dirtying. This + * makes sure current data is checkpointed to the final + * location before possibly journalling it again which + * is desirable when the page is frequently dirtied + * through a pin. */ if (!mpd->can_map) { - if (ext4_page_nomap_can_writeout(&folio->page)) { - err = mpage_submit_page(mpd, &folio->page); + err = mpage_submit_folio(mpd, folio); + if (err < 0) + goto out; + /* Pending dirtying of journalled data? */ + if (folio_test_checked(folio)) { + err = mpage_journal_page_buffers(handle, + mpd, &folio->page); if (err < 0) goto out; - } else { - folio_unlock(folio); - mpd->first_page += folio_nr_pages(folio); + mpd->journalled_more_data = 1; } + mpage_folio_done(mpd, folio); } else { /* Add all dirty buffers to mpd */ lblk = ((ext4_lblk_t)folio->index) << @@ -2699,24 +2516,21 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) goto out; err = 0; } - left -= folio_nr_pages(folio); } folio_batch_release(&fbatch); cond_resched(); } mpd->scanned_until_end = 1; + if (handle) + ext4_journal_stop(handle); return 0; out: folio_batch_release(&fbatch); + if (handle) + ext4_journal_stop(handle); return err; } -static int ext4_writepage_cb(struct folio *folio, struct writeback_control *wbc, - void *data) -{ - return ext4_writepage(&folio->page, wbc); -} - static int ext4_do_writepages(struct mpage_da_data *mpd) { struct writeback_control *wbc = mpd->wbc; @@ -2742,13 +2556,6 @@ static int ext4_do_writepages(struct mpage_da_data *mpd) if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) goto out_writepages; - if (ext4_should_journal_data(inode)) { - blk_start_plug(&plug); - ret = write_cache_pages(mapping, wbc, ext4_writepage_cb, NULL); - blk_finish_plug(&plug); - goto out_writepages; - } - /* * If the filesystem has aborted, it is read-only, so return * right away instead of dumping stack traces later on that @@ -2783,6 +2590,26 @@ static int ext4_do_writepages(struct mpage_da_data *mpd) ext4_journal_stop(handle); } + /* + * data=journal mode does not do delalloc so we just need to writeout / + * journal already mapped buffers. On the other hand we need to commit + * transaction to make data stable. We expect all the data to be + * already in the journal (the only exception are DMA pinned pages + * dirtied behind our back) so we commit transaction here and run the + * writeback loop to checkpoint them. The checkpointing is not actually + * necessary to make data persistent *but* quite a few places (extent + * shifting operations, fsverity, ...) depend on being able to drop + * pagecache pages after calling filemap_write_and_wait() and for that + * checkpointing needs to happen. + */ + if (ext4_should_journal_data(inode)) { + mpd->can_map = 0; + if (wbc->sync_mode == WB_SYNC_ALL) + ext4_fc_commit(sbi->s_journal, + EXT4_I(inode)->i_datasync_tid); + } + mpd->journalled_more_data = 0; + if (ext4_should_dioread_nolock(inode)) { /* * We may need to convert up to one extent per block in @@ -2962,6 +2789,13 @@ static int ext4_writepages(struct address_space *mapping, percpu_down_read(&EXT4_SB(sb)->s_writepages_rwsem); ret = ext4_do_writepages(&mpd); + /* + * For data=journal writeback we could have come across pages marked + * for delayed dirtying (PageChecked) which were just added to the + * running transaction. Try once more to get them to stable storage. + */ + if (!ret && mpd.journalled_more_data) + ret = ext4_do_writepages(&mpd); percpu_up_read(&EXT4_SB(sb)->s_writepages_rwsem); return ret; @@ -3043,7 +2877,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata) { int ret, retries = 0; - struct page *page; + struct folio *folio; pgoff_t index; struct inode *inode = mapping->host; @@ -3070,22 +2904,22 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, } retry: - page = grab_cache_page_write_begin(mapping, index); - if (!page) + folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, + mapping_gfp_mask(mapping)); + if (!folio) return -ENOMEM; - /* In case writeback began while the page was unlocked */ - wait_for_stable_page(page); + /* In case writeback began while the folio was unlocked */ + folio_wait_stable(folio); #ifdef CONFIG_FS_ENCRYPTION - ret = ext4_block_write_begin(page, pos, len, - ext4_da_get_block_prep); + ret = ext4_block_write_begin(folio, pos, len, ext4_da_get_block_prep); #else - ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); + ret = __block_write_begin(&folio->page, pos, len, ext4_da_get_block_prep); #endif if (ret < 0) { - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); /* * block_write_begin may have instantiated a few blocks * outside i_size. Trim these off again. Don't need @@ -3100,7 +2934,7 @@ retry: return ret; } - *pagep = page; + *pagep = &folio->page; return ret; } @@ -3159,9 +2993,8 @@ static int ext4_da_write_end(struct file *file, * i_disksize since writeback will push i_disksize upto i_size * eventually. If the end of the current write is > i_size and * inside an allocated block (ext4_da_should_update_i_disksize() - * check), we need to update i_disksize here as neither - * ext4_writepage() nor certain ext4_writepages() paths not - * allocating blocks update i_disksize. + * check), we need to update i_disksize here as certain + * ext4_writepages() paths not allocating blocks update i_disksize. * * Note that we defer inode dirtying to generic_write_end() / * ext4_da_write_inline_data_end(). @@ -3235,9 +3068,7 @@ int ext4_alloc_da_blocks(struct inode *inode) static sector_t ext4_bmap(struct address_space *mapping, sector_t block) { struct inode *inode = mapping->host; - journal_t *journal; sector_t ret = 0; - int err; inode_lock_shared(inode); /* @@ -3247,45 +3078,16 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) goto out; if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && - test_opt(inode->i_sb, DELALLOC)) { + (test_opt(inode->i_sb, DELALLOC) || + ext4_should_journal_data(inode))) { /* - * With delalloc we want to sync the file - * so that we can make sure we allocate - * blocks for file + * With delalloc or journalled data we want to sync the file so + * that we can make sure we allocate blocks for file and data + * is in place for the user to see it */ filemap_write_and_wait(mapping); } - if (EXT4_JOURNAL(inode) && - ext4_test_inode_state(inode, EXT4_STATE_JDATA)) { - /* - * This is a REALLY heavyweight approach, but the use of - * bmap on dirty files is expected to be extremely rare: - * only if we run lilo or swapon on a freshly made file - * do we expect this to happen. - * - * (bmap requires CAP_SYS_RAWIO so this does not - * represent an unprivileged user DOS attack --- we'd be - * in trouble if mortal users could trigger this path at - * will.) - * - * NB. EXT4_STATE_JDATA is not set on files other than - * regular files. If somebody wants to bmap a directory - * or symlink and gets confused because the buffer - * hasn't yet been flushed to disk, they deserve - * everything they get. - */ - - ext4_clear_inode_state(inode, EXT4_STATE_JDATA); - journal = EXT4_JOURNAL(inode); - jbd2_journal_lock_updates(journal); - err = jbd2_journal_flush(journal, 0); - jbd2_journal_unlock_updates(journal); - - if (err) - goto out; - } - ret = iomap_bmap(mapping, block, &ext4_iomap_ops); out: @@ -3295,17 +3097,16 @@ out: static int ext4_read_folio(struct file *file, struct folio *folio) { - struct page *page = &folio->page; int ret = -EAGAIN; - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; - trace_ext4_readpage(page); + trace_ext4_readpage(&folio->page); if (ext4_has_inline_data(inode)) - ret = ext4_readpage_inline(inode, page); + ret = ext4_readpage_inline(inode, folio); if (ret == -EAGAIN) - return ext4_mpage_readpages(inode, NULL, page); + return ext4_mpage_readpages(inode, NULL, folio); return ret; } @@ -3686,24 +3487,26 @@ const struct iomap_ops ext4_iomap_report_ops = { }; /* - * Whenever the folio is being dirtied, corresponding buffers should already - * be attached to the transaction (we take care of this in ext4_page_mkwrite() - * and ext4_write_begin()). However we cannot move buffers to dirty transaction - * lists here because ->dirty_folio is called under VFS locks and the folio - * is not necessarily locked. - * - * We cannot just dirty the folio and leave attached buffers clean, because the - * buffers' dirty state is "definitive". We cannot just set the buffers dirty - * or jbddirty because all the journalling code will explode. - * - * So what we do is to mark the folio "pending dirty" and next time writepage - * is called, propagate that into the buffers appropriately. + * For data=journal mode, folio should be marked dirty only when it was + * writeably mapped. When that happens, it was already attached to the + * transaction and marked as jbddirty (we take care of this in + * ext4_page_mkwrite()). On transaction commit, we writeprotect page mappings + * so we should have nothing to do here, except for the case when someone + * had the page pinned and dirtied the page through this pin (e.g. by doing + * direct IO to it). In that case we'd need to attach buffers here to the + * transaction but we cannot due to lock ordering. We cannot just dirty the + * folio and leave attached buffers clean, because the buffers' dirty state is + * "definitive". We cannot just set the buffers dirty or jbddirty because all + * the journalling code will explode. So what we do is to mark the folio + * "pending dirty" and next time ext4_writepages() is called, attach buffers + * to the transaction appropriately. */ static bool ext4_journalled_dirty_folio(struct address_space *mapping, struct folio *folio) { WARN_ON_ONCE(!folio_buffers(folio)); - folio_set_checked(folio); + if (folio_maybe_dma_pinned(folio)) + folio_set_checked(folio); return filemap_dirty_folio(mapping, folio); } @@ -3809,23 +3612,26 @@ static int __ext4_block_zero_page_range(handle_t *handle, ext4_lblk_t iblock; struct inode *inode = mapping->host; struct buffer_head *bh; - struct page *page; + struct folio *folio; int err = 0; - page = find_or_create_page(mapping, from >> PAGE_SHIFT, - mapping_gfp_constraint(mapping, ~__GFP_FS)); - if (!page) + folio = __filemap_get_folio(mapping, from >> PAGE_SHIFT, + FGP_LOCK | FGP_ACCESSED | FGP_CREAT, + mapping_gfp_constraint(mapping, ~__GFP_FS)); + if (!folio) return -ENOMEM; blocksize = inode->i_sb->s_blocksize; iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits); - if (!page_has_buffers(page)) - create_empty_buffers(page, blocksize, 0); + bh = folio_buffers(folio); + if (!bh) { + create_empty_buffers(&folio->page, blocksize, 0); + bh = folio_buffers(folio); + } /* Find the buffer that contains "offset" */ - bh = page_buffers(page); pos = blocksize; while (offset >= pos) { bh = bh->b_this_page; @@ -3847,7 +3653,7 @@ static int __ext4_block_zero_page_range(handle_t *handle, } /* Ok, it's mapped. Make sure it's up-to-date */ - if (PageUptodate(page)) + if (folio_test_uptodate(folio)) set_buffer_uptodate(bh); if (!buffer_uptodate(bh)) { @@ -3857,7 +3663,7 @@ static int __ext4_block_zero_page_range(handle_t *handle, if (fscrypt_inode_uses_fs_layer_crypto(inode)) { /* We expect the key to be set. */ BUG_ON(!fscrypt_has_encryption_key(inode)); - err = fscrypt_decrypt_pagecache_blocks(page_folio(page), + err = fscrypt_decrypt_pagecache_blocks(folio, blocksize, bh_offset(bh)); if (err) { @@ -3873,11 +3679,11 @@ static int __ext4_block_zero_page_range(handle_t *handle, if (err) goto unlock; } - zero_user(page, offset, length); + folio_zero_range(folio, offset, length); BUFFER_TRACE(bh, "zeroed end of block"); if (ext4_should_journal_data(inode)) { - err = ext4_handle_dirty_metadata(handle, inode, bh); + err = ext4_dirty_journalled_data(handle, bh); } else { err = 0; mark_buffer_dirty(bh); @@ -3887,8 +3693,8 @@ static int __ext4_block_zero_page_range(handle_t *handle, } unlock: - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); return err; } @@ -5385,7 +5191,7 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode) * If the folio is fully truncated, we don't need to wait for any commit * (and we even should not as __ext4_journalled_invalidate_folio() may * strip all buffers from the folio but keep the folio dirty which can then - * confuse e.g. concurrent ext4_writepage() seeing dirty folio without + * confuse e.g. concurrent ext4_writepages() seeing dirty folio without * buffers). Also we don't need to wait for any commit if all buffers in * the folio remain valid. This is most beneficial for the common case of * blocksize == PAGESIZE. @@ -6212,7 +6018,7 @@ static int ext4_bh_unmapped(handle_t *handle, struct inode *inode, vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; - struct page *page = vmf->page; + struct folio *folio = page_folio(vmf->page); loff_t size; unsigned long len; int err; @@ -6256,19 +6062,18 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf) goto out_ret; } - lock_page(page); + folio_lock(folio); size = i_size_read(inode); /* Page got truncated from under us? */ - if (page->mapping != mapping || page_offset(page) > size) { - unlock_page(page); + if (folio->mapping != mapping || folio_pos(folio) > size) { + folio_unlock(folio); ret = VM_FAULT_NOPAGE; goto out; } - if (page->index == size >> PAGE_SHIFT) - len = size & ~PAGE_MASK; - else - len = PAGE_SIZE; + len = folio_size(folio); + if (folio_pos(folio) + len > size) + len = size - folio_pos(folio); /* * Return if we have all the buffers mapped. This avoids the need to do * journal_start/journal_stop which can block and take a long time @@ -6276,17 +6081,17 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf) * This cannot be done for data journalling, as we have to add the * inode to the transaction's list to writeprotect pages on commit. */ - if (page_has_buffers(page)) { - if (!ext4_walk_page_buffers(NULL, inode, page_buffers(page), + if (folio_buffers(folio)) { + if (!ext4_walk_page_buffers(NULL, inode, folio_buffers(folio), 0, len, NULL, ext4_bh_unmapped)) { /* Wait so that we don't change page under IO */ - wait_for_stable_page(page); + folio_wait_stable(folio); ret = VM_FAULT_LOCKED; goto out; } } - unlock_page(page); + folio_unlock(folio); /* OK, we need to fill the hole... */ if (ext4_should_dioread_nolock(inode)) get_block = ext4_get_block_unwritten; @@ -6307,36 +6112,25 @@ retry_alloc: if (!ext4_should_journal_data(inode)) { err = block_page_mkwrite(vma, vmf, get_block); } else { - lock_page(page); + folio_lock(folio); size = i_size_read(inode); /* Page got truncated from under us? */ - if (page->mapping != mapping || page_offset(page) > size) { + if (folio->mapping != mapping || folio_pos(folio) > size) { ret = VM_FAULT_NOPAGE; goto out_error; } - if (page->index == size >> PAGE_SHIFT) - len = size & ~PAGE_MASK; - else - len = PAGE_SIZE; + len = folio_size(folio); + if (folio_pos(folio) + len > size) + len = size - folio_pos(folio); - err = __block_write_begin(page, 0, len, ext4_get_block); + err = __block_write_begin(&folio->page, 0, len, ext4_get_block); if (!err) { ret = VM_FAULT_SIGBUS; - if (ext4_walk_page_buffers(handle, inode, - page_buffers(page), 0, len, NULL, - do_journal_get_write_access)) + if (ext4_journal_page_buffers(handle, &folio->page, len)) goto out_error; - if (ext4_walk_page_buffers(handle, inode, - page_buffers(page), 0, len, NULL, - write_end_fn)) - goto out_error; - if (ext4_jbd2_inode_add_write(handle, inode, - page_offset(page), len)) - goto out_error; - ext4_set_inode_state(inode, EXT4_STATE_JDATA); } else { - unlock_page(page); + folio_unlock(folio); } } ext4_journal_stop(handle); @@ -6349,7 +6143,7 @@ out: sb_end_pagefault(inode->i_sb); return ret; out_error: - unlock_page(page); + folio_unlock(folio); ext4_journal_stop(handle); goto out; } |