diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-12 13:25:53 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-12 13:25:53 -0700 |
commit | f153fbe1ea11939e2514ba4b3b62bbd946e2892c (patch) | |
tree | c8a482f07398b4288b72ace1fe94f026b862920f /fs | |
parent | d453cc5a278ddf8fd4f0a89815c5da2c6650bbea (diff) | |
parent | a1bafc3109d713ed83f73d61ba5cb1e6fd80fdbc (diff) |
Merge tag 'erofs-for-6.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs
Pull erofs updates from Gao Xiang:
"In this cycle, we introduce compressed inode support over fscache
since a lot of native EROFS images are explicitly compressed so that
EROFS over fscache can be more widely used even without Dragonfly
Nydus [1].
Apart from that, there are some folio conversions for compressed
inodes available as well as a lockdep false positive fix.
Summary:
- Some folio conversions for compressed inodes;
- Add compressed inode support over fscache;
- Fix lockdep false positives of erofs_pseudo_mnt"
Link: https://nydus.dev [1]
* tag 'erofs-for-6.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
erofs: support compressed inodes over fscache
erofs: make iov_iter describe target buffers over fscache
erofs: fix lockdep false positives on initializing erofs_pseudo_mnt
erofs: refine managed cache operations to folios
erofs: convert z_erofs_submissionqueue_endio() to folios
erofs: convert z_erofs_fill_bio_vec() to folios
erofs: get rid of `justfound` debugging tag
erofs: convert z_erofs_do_read_page() to folios
erofs: convert z_erofs_onlinepage_.* to folios
Diffstat (limited to 'fs')
-rw-r--r-- | fs/erofs/compress.h | 7 | ||||
-rw-r--r-- | fs/erofs/decompressor_deflate.c | 3 | ||||
-rw-r--r-- | fs/erofs/decompressor_lzma.c | 3 | ||||
-rw-r--r-- | fs/erofs/fscache.c | 297 | ||||
-rw-r--r-- | fs/erofs/inode.c | 14 | ||||
-rw-r--r-- | fs/erofs/internal.h | 9 | ||||
-rw-r--r-- | fs/erofs/super.c | 30 | ||||
-rw-r--r-- | fs/erofs/utils.c | 2 | ||||
-rw-r--r-- | fs/erofs/zdata.c | 287 |
9 files changed, 335 insertions, 317 deletions
diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h index 7cc5841577b2..333587ba6183 100644 --- a/fs/erofs/compress.h +++ b/fs/erofs/compress.h @@ -81,13 +81,6 @@ static inline bool z_erofs_put_shortlivedpage(struct page **pagepool, return true; } -#define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping) -static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi, - struct page *page) -{ - return page->mapping == MNGD_MAPPING(sbi); -} - int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf, unsigned int padbufsize); extern const struct z_erofs_decompressor erofs_decompressors[]; diff --git a/fs/erofs/decompressor_deflate.c b/fs/erofs/decompressor_deflate.c index b98872058abe..81e65c453ef0 100644 --- a/fs/erofs/decompressor_deflate.c +++ b/fs/erofs/decompressor_deflate.c @@ -212,9 +212,6 @@ again: if (rq->out[no] != rq->in[j]) continue; - - DBG_BUGON(erofs_page_is_managed(EROFS_SB(sb), - rq->in[j])); tmppage = erofs_allocpage(pgpl, rq->gfp); if (!tmppage) { err = -ENOMEM; diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c index 6ca357d83cfa..4b28dc130c9f 100644 --- a/fs/erofs/decompressor_lzma.c +++ b/fs/erofs/decompressor_lzma.c @@ -258,9 +258,6 @@ again: if (rq->out[no] != rq->in[j]) continue; - - DBG_BUGON(erofs_page_is_managed(EROFS_SB(rq->sb), - rq->in[j])); tmppage = erofs_allocpage(pgpl, rq->gfp); if (!tmppage) { err = -ENOMEM; diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index 89a7c2453aae..8aff1a724805 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -3,6 +3,7 @@ * Copyright (C) 2022, Alibaba Cloud * Copyright (C) 2022, Bytedance Inc. All rights reserved. */ +#include <linux/pseudo_fs.h> #include <linux/fscache.h> #include "internal.h" @@ -12,9 +13,27 @@ static LIST_HEAD(erofs_domain_list); static LIST_HEAD(erofs_domain_cookies_list); static struct vfsmount *erofs_pseudo_mnt; -struct erofs_fscache_request { - struct erofs_fscache_request *primary; - struct netfs_cache_resources cache_resources; +static int erofs_anon_init_fs_context(struct fs_context *fc) +{ + return init_pseudo(fc, EROFS_SUPER_MAGIC) ? 0 : -ENOMEM; +} + +static struct file_system_type erofs_anon_fs_type = { + .owner = THIS_MODULE, + .name = "pseudo_erofs", + .init_fs_context = erofs_anon_init_fs_context, + .kill_sb = kill_anon_super, +}; + +struct erofs_fscache_io { + struct netfs_cache_resources cres; + struct iov_iter iter; + netfs_io_terminated_t end_io; + void *private; + refcount_t ref; +}; + +struct erofs_fscache_rq { struct address_space *mapping; /* The mapping being accessed */ loff_t start; /* Start position */ size_t len; /* Length of the request */ @@ -23,44 +42,17 @@ struct erofs_fscache_request { refcount_t ref; }; -static struct erofs_fscache_request *erofs_fscache_req_alloc(struct address_space *mapping, - loff_t start, size_t len) +static bool erofs_fscache_io_put(struct erofs_fscache_io *io) { - struct erofs_fscache_request *req; - - req = kzalloc(sizeof(struct erofs_fscache_request), GFP_KERNEL); - if (!req) - return ERR_PTR(-ENOMEM); - - req->mapping = mapping; - req->start = start; - req->len = len; - refcount_set(&req->ref, 1); - - return req; + if (!refcount_dec_and_test(&io->ref)) + return false; + if (io->cres.ops) + io->cres.ops->end_operation(&io->cres); + kfree(io); + return true; } -static struct erofs_fscache_request *erofs_fscache_req_chain(struct erofs_fscache_request *primary, - size_t len) -{ - struct erofs_fscache_request *req; - - /* use primary request for the first submission */ - if (!primary->submitted) { - refcount_inc(&primary->ref); - return primary; - } - - req = erofs_fscache_req_alloc(primary->mapping, - primary->start + primary->submitted, len); - if (!IS_ERR(req)) { - req->primary = primary; - refcount_inc(&primary->ref); - } - return req; -} - -static void erofs_fscache_req_complete(struct erofs_fscache_request *req) +static void erofs_fscache_req_complete(struct erofs_fscache_rq *req) { struct folio *folio; bool failed = req->error; @@ -80,120 +72,196 @@ static void erofs_fscache_req_complete(struct erofs_fscache_request *req) rcu_read_unlock(); } -static void erofs_fscache_req_put(struct erofs_fscache_request *req) +static void erofs_fscache_req_put(struct erofs_fscache_rq *req) { - if (refcount_dec_and_test(&req->ref)) { - if (req->cache_resources.ops) - req->cache_resources.ops->end_operation(&req->cache_resources); - if (!req->primary) - erofs_fscache_req_complete(req); - else - erofs_fscache_req_put(req->primary); - kfree(req); - } + if (!refcount_dec_and_test(&req->ref)) + return; + erofs_fscache_req_complete(req); + kfree(req); } -static void erofs_fscache_subreq_complete(void *priv, +static struct erofs_fscache_rq *erofs_fscache_req_alloc(struct address_space *mapping, + loff_t start, size_t len) +{ + struct erofs_fscache_rq *req = kzalloc(sizeof(*req), GFP_KERNEL); + + if (!req) + return NULL; + req->mapping = mapping; + req->start = start; + req->len = len; + refcount_set(&req->ref, 1); + return req; +} + +static void erofs_fscache_req_io_put(struct erofs_fscache_io *io) +{ + struct erofs_fscache_rq *req = io->private; + + if (erofs_fscache_io_put(io)) + erofs_fscache_req_put(req); +} + +static void erofs_fscache_req_end_io(void *priv, ssize_t transferred_or_error, bool was_async) { - struct erofs_fscache_request *req = priv; + struct erofs_fscache_io *io = priv; + struct erofs_fscache_rq *req = io->private; - if (IS_ERR_VALUE(transferred_or_error)) { - if (req->primary) - req->primary->error = transferred_or_error; - else - req->error = transferred_or_error; - } - erofs_fscache_req_put(req); + if (IS_ERR_VALUE(transferred_or_error)) + req->error = transferred_or_error; + erofs_fscache_req_io_put(io); +} + +static struct erofs_fscache_io *erofs_fscache_req_io_alloc(struct erofs_fscache_rq *req) +{ + struct erofs_fscache_io *io = kzalloc(sizeof(*io), GFP_KERNEL); + + if (!io) + return NULL; + io->end_io = erofs_fscache_req_end_io; + io->private = req; + refcount_inc(&req->ref); + refcount_set(&io->ref, 1); + return io; } /* - * Read data from fscache (cookie, pstart, len), and fill the read data into - * page cache described by (req->mapping, lstart, len). @pstart describeis the - * start physical address in the cache file. + * Read data from fscache described by cookie at pstart physical address + * offset, and fill the read data into buffer described by io->iter. */ -static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie, - struct erofs_fscache_request *req, loff_t pstart, size_t len) +static int erofs_fscache_read_io_async(struct fscache_cookie *cookie, + loff_t pstart, struct erofs_fscache_io *io) { enum netfs_io_source source; - struct super_block *sb = req->mapping->host->i_sb; - struct netfs_cache_resources *cres = &req->cache_resources; - struct iov_iter iter; - loff_t lstart = req->start + req->submitted; - size_t done = 0; + struct netfs_cache_resources *cres = &io->cres; + struct iov_iter *iter = &io->iter; int ret; - DBG_BUGON(len > req->len - req->submitted); - ret = fscache_begin_read_operation(cres, cookie); if (ret) return ret; - while (done < len) { - loff_t sstart = pstart + done; - size_t slen = len - done; + while (iov_iter_count(iter)) { + size_t orig_count = iov_iter_count(iter), len = orig_count; unsigned long flags = 1 << NETFS_SREQ_ONDEMAND; source = cres->ops->prepare_ondemand_read(cres, - sstart, &slen, LLONG_MAX, &flags, 0); - if (WARN_ON(slen == 0)) + pstart, &len, LLONG_MAX, &flags, 0); + if (WARN_ON(len == 0)) source = NETFS_INVALID_READ; if (source != NETFS_READ_FROM_CACHE) { - erofs_err(sb, "failed to fscache prepare_read (source %d)", source); + erofs_err(NULL, "prepare_read failed (source %d)", source); return -EIO; } - refcount_inc(&req->ref); - iov_iter_xarray(&iter, ITER_DEST, &req->mapping->i_pages, - lstart + done, slen); - - ret = fscache_read(cres, sstart, &iter, NETFS_READ_HOLE_FAIL, - erofs_fscache_subreq_complete, req); + iov_iter_truncate(iter, len); + refcount_inc(&io->ref); + ret = fscache_read(cres, pstart, iter, NETFS_READ_HOLE_FAIL, + io->end_io, io); if (ret == -EIOCBQUEUED) ret = 0; if (ret) { - erofs_err(sb, "failed to fscache_read (ret %d)", ret); + erofs_err(NULL, "fscache_read failed (ret %d)", ret); return ret; } + if (WARN_ON(iov_iter_count(iter))) + return -EIO; - done += slen; + iov_iter_reexpand(iter, orig_count - len); + pstart += len; } - DBG_BUGON(done != len); return 0; } -static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio) +struct erofs_fscache_bio { + struct erofs_fscache_io io; + struct bio bio; /* w/o bdev to share bio_add_page/endio() */ + struct bio_vec bvecs[BIO_MAX_VECS]; +}; + +static void erofs_fscache_bio_endio(void *priv, + ssize_t transferred_or_error, bool was_async) +{ + struct erofs_fscache_bio *io = priv; + + if (IS_ERR_VALUE(transferred_or_error)) + io->bio.bi_status = errno_to_blk_status(transferred_or_error); + io->bio.bi_end_io(&io->bio); + BUILD_BUG_ON(offsetof(struct erofs_fscache_bio, io) != 0); + erofs_fscache_io_put(&io->io); +} + +struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev) { + struct erofs_fscache_bio *io; + + io = kmalloc(sizeof(*io), GFP_KERNEL | __GFP_NOFAIL); + bio_init(&io->bio, NULL, io->bvecs, BIO_MAX_VECS, REQ_OP_READ); + io->io.private = mdev->m_fscache->cookie; + io->io.end_io = erofs_fscache_bio_endio; + refcount_set(&io->io.ref, 1); + return &io->bio; +} + +void erofs_fscache_submit_bio(struct bio *bio) +{ + struct erofs_fscache_bio *io = container_of(bio, + struct erofs_fscache_bio, bio); int ret; + + iov_iter_bvec(&io->io.iter, ITER_DEST, io->bvecs, bio->bi_vcnt, + bio->bi_iter.bi_size); + ret = erofs_fscache_read_io_async(io->io.private, + bio->bi_iter.bi_sector << 9, &io->io); + erofs_fscache_io_put(&io->io); + if (!ret) + return; + bio->bi_status = errno_to_blk_status(ret); + bio->bi_end_io(bio); +} + +static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio) +{ struct erofs_fscache *ctx = folio->mapping->host->i_private; - struct erofs_fscache_request *req; + int ret = -ENOMEM; + struct erofs_fscache_rq *req; + struct erofs_fscache_io *io; req = erofs_fscache_req_alloc(folio->mapping, folio_pos(folio), folio_size(folio)); - if (IS_ERR(req)) { + if (!req) { folio_unlock(folio); - return PTR_ERR(req); + return ret; } - ret = erofs_fscache_read_folios_async(ctx->cookie, req, - folio_pos(folio), folio_size(folio)); + io = erofs_fscache_req_io_alloc(req); + if (!io) { + req->error = ret; + goto out; + } + iov_iter_xarray(&io->iter, ITER_DEST, &folio->mapping->i_pages, + folio_pos(folio), folio_size(folio)); + + ret = erofs_fscache_read_io_async(ctx->cookie, folio_pos(folio), io); if (ret) req->error = ret; + erofs_fscache_req_io_put(io); +out: erofs_fscache_req_put(req); return ret; } -static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary) +static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req) { - struct address_space *mapping = primary->mapping; + struct address_space *mapping = req->mapping; struct inode *inode = mapping->host; struct super_block *sb = inode->i_sb; - struct erofs_fscache_request *req; + struct erofs_fscache_io *io; struct erofs_map_blocks map; struct erofs_map_dev mdev; - struct iov_iter iter; - loff_t pos = primary->start + primary->submitted; + loff_t pos = req->start + req->submitted; size_t count; int ret; @@ -204,6 +272,7 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary) if (map.m_flags & EROFS_MAP_META) { struct erofs_buf buf = __EROFS_BUF_INITIALIZER; + struct iov_iter iter; erofs_blk_t blknr; size_t offset, size; void *src; @@ -224,15 +293,17 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary) } iov_iter_zero(PAGE_SIZE - size, &iter); erofs_put_metabuf(&buf); - primary->submitted += PAGE_SIZE; + req->submitted += PAGE_SIZE; return 0; } - count = primary->len - primary->submitted; + count = req->len - req->submitted; if (!(map.m_flags & EROFS_MAP_MAPPED)) { + struct iov_iter iter; + iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count); iov_iter_zero(count, &iter); - primary->submitted += count; + req->submitted += count; return 0; } @@ -247,18 +318,19 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary) if (ret) return ret; - req = erofs_fscache_req_chain(primary, count); - if (IS_ERR(req)) - return PTR_ERR(req); + io = erofs_fscache_req_io_alloc(req); + if (!io) + return -ENOMEM; + iov_iter_xarray(&io->iter, ITER_DEST, &mapping->i_pages, pos, count); + ret = erofs_fscache_read_io_async(mdev.m_fscache->cookie, + mdev.m_pa + (pos - map.m_la), io); + erofs_fscache_req_io_put(io); - ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie, - req, mdev.m_pa + (pos - map.m_la), count); - erofs_fscache_req_put(req); - primary->submitted += count; + req->submitted += count; return ret; } -static int erofs_fscache_data_read(struct erofs_fscache_request *req) +static int erofs_fscache_data_read(struct erofs_fscache_rq *req) { int ret; @@ -267,20 +339,19 @@ static int erofs_fscache_data_read(struct erofs_fscache_request *req) if (ret) req->error = ret; } while (!ret && req->submitted < req->len); - return ret; } static int erofs_fscache_read_folio(struct file *file, struct folio *folio) { - struct erofs_fscache_request *req; + struct erofs_fscache_rq *req; int ret; req = erofs_fscache_req_alloc(folio->mapping, folio_pos(folio), folio_size(folio)); - if (IS_ERR(req)) { + if (!req) { folio_unlock(folio); - return PTR_ERR(req); + return -ENOMEM; } ret = erofs_fscache_data_read(req); @@ -290,14 +361,14 @@ static int erofs_fscache_read_folio(struct file *file, struct folio *folio) static void erofs_fscache_readahead(struct readahead_control *rac) { - struct erofs_fscache_request *req; + struct erofs_fscache_rq *req; if (!readahead_count(rac)) return; req = erofs_fscache_req_alloc(rac->mapping, readahead_pos(rac), readahead_length(rac)); - if (IS_ERR(req)) + if (!req) return; /* The request completion will drop refs on the folios. */ @@ -381,7 +452,7 @@ static int erofs_fscache_init_domain(struct super_block *sb) goto out; if (!erofs_pseudo_mnt) { - struct vfsmount *mnt = kern_mount(&erofs_fs_type); + struct vfsmount *mnt = kern_mount(&erofs_anon_fs_type); if (IS_ERR(mnt)) { err = PTR_ERR(mnt); goto out; diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index 36e638e8b53a..0eb0e6f933c3 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -259,14 +259,12 @@ static int erofs_fill_inode(struct inode *inode) if (erofs_inode_is_data_compressed(vi->datalayout)) { #ifdef CONFIG_EROFS_FS_ZIP - if (!erofs_is_fscache_mode(inode->i_sb)) { - DO_ONCE_LITE_IF(inode->i_sb->s_blocksize != PAGE_SIZE, - erofs_info, inode->i_sb, - "EXPERIMENTAL EROFS subpage compressed block support in use. Use at your own risk!"); - inode->i_mapping->a_ops = &z_erofs_aops; - err = 0; - goto out_unlock; - } + DO_ONCE_LITE_IF(inode->i_blkbits != PAGE_SHIFT, + erofs_info, inode->i_sb, + "EXPERIMENTAL EROFS subpage compressed block support in use. Use at your own risk!"); + inode->i_mapping->a_ops = &z_erofs_aops; + err = 0; + goto out_unlock; #endif err = -EOPNOTSUPP; goto out_unlock; diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 0f0706325b7b..39c67119f43b 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -385,7 +385,6 @@ struct erofs_map_dev { unsigned int m_deviceid; }; -extern struct file_system_type erofs_fs_type; extern const struct super_operations erofs_sops; extern const struct address_space_operations erofs_raw_access_aops; @@ -467,8 +466,8 @@ int __init erofs_init_shrinker(void); void erofs_exit_shrinker(void); int __init z_erofs_init_zip_subsystem(void); void z_erofs_exit_zip_subsystem(void); -int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, - struct erofs_workgroup *egrp); +int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi, + struct erofs_workgroup *egrp); int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, int flags); void *erofs_get_pcpubuf(unsigned int requiredpages); @@ -513,6 +512,8 @@ void erofs_fscache_unregister_fs(struct super_block *sb); struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, char *name, unsigned int flags); void erofs_fscache_unregister_cookie(struct erofs_fscache *fscache); +struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev); +void erofs_fscache_submit_bio(struct bio *bio); #else static inline int erofs_fscache_register_fs(struct super_block *sb) { @@ -530,6 +531,8 @@ struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, static inline void erofs_fscache_unregister_cookie(struct erofs_fscache *fscache) { } +static inline struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev) { return NULL; } +static inline void erofs_fscache_submit_bio(struct bio *bio) {} #endif #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 9b4b66dcdd4f..6fbb1fba2d31 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -579,13 +579,6 @@ static const struct export_operations erofs_export_ops = { .get_parent = erofs_get_parent, }; -static int erofs_fc_fill_pseudo_super(struct super_block *sb, struct fs_context *fc) -{ - static const struct tree_descr empty_descr = {""}; - - return simple_fill_super(sb, EROFS_SUPER_MAGIC, &empty_descr); -} - static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) { struct inode *inode; @@ -712,11 +705,6 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) return 0; } -static int erofs_fc_anon_get_tree(struct fs_context *fc) -{ - return get_tree_nodev(fc, erofs_fc_fill_pseudo_super); -} - static int erofs_fc_get_tree(struct fs_context *fc) { struct erofs_fs_context *ctx = fc->fs_private; @@ -789,20 +777,10 @@ static const struct fs_context_operations erofs_context_ops = { .free = erofs_fc_free, }; -static const struct fs_context_operations erofs_anon_context_ops = { - .get_tree = erofs_fc_anon_get_tree, -}; - static int erofs_init_fs_context(struct fs_context *fc) { struct erofs_fs_context *ctx; - /* pseudo mount for anon inodes */ - if (fc->sb_flags & SB_KERNMOUNT) { - fc->ops = &erofs_anon_context_ops; - return 0; - } - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; @@ -824,12 +802,6 @@ static void erofs_kill_sb(struct super_block *sb) { struct erofs_sb_info *sbi; - /* pseudo mount for anon inodes */ - if (sb->s_flags & SB_KERNMOUNT) { - kill_anon_super(sb); - return; - } - if (erofs_is_fscache_mode(sb)) kill_anon_super(sb); else @@ -868,7 +840,7 @@ static void erofs_put_super(struct super_block *sb) erofs_fscache_unregister_fs(sb); } -struct file_system_type erofs_fs_type = { +static struct file_system_type erofs_fs_type = { .owner = THIS_MODULE, .name = "erofs", .init_fs_context = erofs_init_fs_context, diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c index e146d09151af..518bdd69c823 100644 --- a/fs/erofs/utils.c +++ b/fs/erofs/utils.c @@ -129,7 +129,7 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, * the XArray. Otherwise some cached pages could be still attached to * the orphan old workgroup when the new one is available in the tree. */ - if (erofs_try_to_free_all_cached_pages(sbi, grp)) + if (erofs_try_to_free_all_cached_folios(sbi, grp)) goto out; /* diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index ff0aa72b0db3..3216b920d369 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -19,7 +19,10 @@ typedef void *z_erofs_next_pcluster_t; struct z_erofs_bvec { - struct page *page; + union { + struct page *page; + struct folio *folio; + }; int offset; unsigned int end; }; @@ -116,47 +119,46 @@ static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl) return PAGE_ALIGN(pcl->pclustersize) >> PAGE_SHIFT; } +#define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping) +static bool erofs_folio_is_managed(struct erofs_sb_info *sbi, struct folio *fo) +{ + return fo->mapping == MNGD_MAPPING(sbi); +} + /* - * bit 30: I/O error occurred on this page - * bit 0 - 29: remaining parts to complete this page + * bit 30: I/O error occurred on this folio + * bit 0 - 29: remaining parts to complete this folio */ -#define Z_EROFS_PAGE_EIO (1 << 30) +#define Z_EROFS_FOLIO_EIO (1 << 30) -static inline void z_erofs_onlinepage_init(struct page *page) +static void z_erofs_onlinefolio_init(struct folio *folio) { union { atomic_t o; - unsigned long v; + void *v; } u = { .o = ATOMIC_INIT(1) }; - set_page_private(page, u.v); - smp_wmb(); - SetPagePrivate(page); + folio->private = u.v; /* valid only if file-backed folio is locked */ } -static inline void z_erofs_onlinepage_split(struct page *page) +static void z_erofs_onlinefolio_split(struct folio *folio) { - atomic_inc((atomic_t *)&page->private); + atomic_inc((atomic_t *)&folio->private); } -static void z_erofs_onlinepage_endio(struct page *page, int err) +static void z_erofs_onlinefolio_end(struct folio *folio, int err) { int orig, v; - DBG_BUGON(!PagePrivate(page)); - do { - orig = atomic_read((atomic_t *)&page->private); - v = (orig - 1) | (err ? Z_EROFS_PAGE_EIO : 0); - } while (atomic_cmpxchg((atomic_t *)&page->private, orig, v) != orig); - - if (!(v & ~Z_EROFS_PAGE_EIO)) { - set_page_private(page, 0); - ClearPagePrivate(page); - if (!(v & Z_EROFS_PAGE_EIO)) - SetPageUptodate(page); - unlock_page(page); - } + orig = atomic_read((atomic_t *)&folio->private); + v = (orig - 1) | (err ? Z_EROFS_FOLIO_EIO : 0); + } while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig); + + if (v & ~Z_EROFS_FOLIO_EIO) + return; + folio->private = 0; + folio_end_read(folio, !(v & Z_EROFS_FOLIO_EIO)); } #define Z_EROFS_ONSTACK_PAGES 32 @@ -572,17 +574,13 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe) for (i = 0; i < pclusterpages; ++i) { struct page *page, *newpage; - void *t; /* mark pages just found for debugging */ /* Inaccurate check w/o locking to avoid unneeded lookups */ if (READ_ONCE(pcl->compressed_bvecs[i].page)) continue; page = find_get_page(mc, pcl->obj.index + i); - if (page) { - t = (void *)((unsigned long)page | 1); - newpage = NULL; - } else { + if (!page) { /* I/O is needed, no possible to decompress directly */ standalone = false; if (!shouldalloc) @@ -596,11 +594,10 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe) if (!newpage) continue; set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE); - t = (void *)((unsigned long)newpage | 1); } spin_lock(&pcl->obj.lockref.lock); if (!pcl->compressed_bvecs[i].page) { - pcl->compressed_bvecs[i].page = t; + pcl->compressed_bvecs[i].page = page ? page : newpage; spin_unlock(&pcl->obj.lockref.lock); continue; } @@ -620,9 +617,9 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe) fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE; } -/* called by erofs_shrinker to get rid of all compressed_pages */ -int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, - struct erofs_workgroup *grp) +/* called by erofs_shrinker to get rid of all cached compressed bvecs */ +int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi, + struct erofs_workgroup *grp) { struct z_erofs_pcluster *const pcl = container_of(grp, struct z_erofs_pcluster, obj); @@ -630,27 +627,22 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, int i; DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); - /* - * refcount of workgroup is now freezed as 0, - * therefore no need to worry about available decompression users. - */ + /* There is no actice user since the pcluster is now freezed */ for (i = 0; i < pclusterpages; ++i) { - struct page *page = pcl->compressed_bvecs[i].page; + struct folio *folio = pcl->compressed_bvecs[i].folio; - if (!page) + if (!folio) continue; - /* block other users from reclaiming or migrating the page */ - if (!trylock_page(page)) + /* Avoid reclaiming or migrating this folio */ + if (!folio_trylock(folio)) return -EBUSY; - if (!erofs_page_is_managed(sbi, page)) + if (!erofs_folio_is_managed(sbi, folio)) continue; - - /* barrier is implied in the following 'unlock_page' */ - WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL); - detach_page_private(page); - unlock_page(page); + pcl->compressed_bvecs[i].folio = NULL; + folio_detach_private(folio); + folio_unlock(folio); } return 0; } @@ -667,20 +659,17 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp) ret = false; spin_lock(&pcl->obj.lockref.lock); - if (pcl->obj.lockref.count > 0) - goto out; - - DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); - for (i = 0; i < pclusterpages; ++i) { - if (pcl->compressed_bvecs[i].page == &folio->page) { - WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL); - ret = true; - break; + if (pcl->obj.lockref.count <= 0) { + DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); + for (i = 0; i < pclusterpages; ++i) { + if (pcl->compressed_bvecs[i].folio == folio) { + pcl->compressed_bvecs[i].folio = NULL; + folio_detach_private(folio); + ret = true; + break; + } } } - if (ret) - folio_detach_private(folio); -out: spin_unlock(&pcl->obj.lockref.lock); return ret; } @@ -962,20 +951,20 @@ static int z_erofs_read_fragment(struct super_block *sb, struct page *page, return 0; } -static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, - struct page *page, bool ra) +static int z_erofs_scan_folio(struct z_erofs_decompress_frontend *fe, + struct folio *folio, bool ra) { struct inode *const inode = fe->inode; struct erofs_map_blocks *const map = &fe->map; - const loff_t offset = page_offset(page); - const unsigned int bs = i_blocksize(inode); + const loff_t offset = folio_pos(folio); + const unsigned int bs = i_blocksize(inode), fs = folio_size(folio); bool tight = true, exclusive; unsigned int cur, end, len, split; int err = 0; - z_erofs_onlinepage_init(page); + z_erofs_onlinefolio_init(folio); split = 0; - end = PAGE_SIZE; + end = fs; repeat: if (offset + end - 1 < map->m_la || offset + end - 1 >= map->m_la + map->m_llen) { @@ -992,7 +981,7 @@ repeat: ++split; if (!(map->m_flags & EROFS_MAP_MAPPED)) { - zero_user_segment(page, cur, end); + folio_zero_segment(folio, cur, end); tight = false; goto next_part; } @@ -1001,8 +990,8 @@ repeat: erofs_off_t fpos = offset + cur - map->m_la; len = min_t(unsigned int, map->m_llen - fpos, end - cur); - err = z_erofs_read_fragment(inode->i_sb, page, cur, cur + len, - EROFS_I(inode)->z_fragmentoff + fpos); + err = z_erofs_read_fragment(inode->i_sb, &folio->page, cur, + cur + len, EROFS_I(inode)->z_fragmentoff + fpos); if (err) goto out; tight = false; @@ -1017,25 +1006,25 @@ repeat: } /* - * Ensure the current partial page belongs to this submit chain rather + * Ensure the current partial folio belongs to this submit chain rather * than other concurrent submit chains or the noio(bypass) chain since - * those chains are handled asynchronously thus the page cannot be used + * those chains are handled asynchronously thus the folio cannot be used * for inplace I/O or bvpage (should be processed in a strict order.) */ tight &= (fe->mode > Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE); - exclusive = (!cur && ((split <= 1) || (tight && bs == PAGE_SIZE))); + exclusive = (!cur && ((split <= 1) || (tight && bs == fs))); if (cur) tight &= (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED); err = z_erofs_attach_page(fe, &((struct z_erofs_bvec) { - .page = page, + .page = &folio->page, .offset = offset - map->m_la, .end = end, }), exclusive); if (err) goto out; - z_erofs_onlinepage_split(page); + z_erofs_onlinefolio_split(folio); if (fe->pcl->pageofs_out != (map->m_la & ~PAGE_MASK)) fe->pcl->multibases = true; if (fe->pcl->length < offset + end - map->m_la) { @@ -1056,7 +1045,7 @@ next_part: goto repeat; out: - z_erofs_onlinepage_endio(page, err); + z_erofs_onlinefolio_end(folio, err); return err; } @@ -1159,7 +1148,7 @@ static void z_erofs_fill_other_copies(struct z_erofs_decompress_backend *be, cur += len; } kunmap_local(dst); - z_erofs_onlinepage_endio(bvi->bvec.page, err); + z_erofs_onlinefolio_end(page_folio(bvi->bvec.page), err); list_del(p); kfree(bvi); } @@ -1210,7 +1199,7 @@ static int z_erofs_parse_in_bvecs(struct z_erofs_decompress_backend *be, be->compressed_pages[i] = page; if (z_erofs_is_inline_pcluster(pcl) || - erofs_page_is_managed(EROFS_SB(be->sb), page)) { + erofs_folio_is_managed(EROFS_SB(be->sb), page_folio(page))) { if (!PageUptodate(page)) err = -EIO; continue; @@ -1295,7 +1284,8 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, /* consider shortlived pages added when decompressing */ page = be->compressed_pages[i]; - if (!page || erofs_page_is_managed(sbi, page)) + if (!page || + erofs_folio_is_managed(sbi, page_folio(page))) continue; (void)z_erofs_put_shortlivedpage(be->pagepool, page); WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL); @@ -1316,7 +1306,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, /* recycle all individual short-lived pages */ if (z_erofs_put_shortlivedpage(be->pagepool, page)) continue; - z_erofs_onlinepage_endio(page, err); + z_erofs_onlinefolio_end(page_folio(page), err); } if (be->decompressed_pages != be->onstack_pages) @@ -1430,38 +1420,34 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec, struct z_erofs_bvec zbv; struct address_space *mapping; struct page *page; - int justfound, bs = i_blocksize(f->inode); + int bs = i_blocksize(f->inode); - /* Except for inplace pages, the entire page can be used for I/Os */ + /* Except for inplace folios, the entire folio can be used for I/Os */ bvec->bv_offset = 0; bvec->bv_len = PAGE_SIZE; repeat: spin_lock(&pcl->obj.lockref.lock); zbv = pcl->compressed_bvecs[nr]; - page = zbv.page; - justfound = (unsigned long)page & 1UL; - page = (struct page *)((unsigned long)page & ~1UL); - pcl->compressed_bvecs[nr].page = page; spin_unlock(&pcl->obj.lockref.lock); - if (!page) - goto out_allocpage; + if (!zbv.folio) + goto out_allocfolio; - bvec->bv_page = page; - DBG_BUGON(z_erofs_is_shortlived_page(page)); + bvec->bv_page = &zbv.folio->page; + DBG_BUGON(z_erofs_is_shortlived_page(bvec->bv_page)); /* - * Handle preallocated cached pages. We tried to allocate such pages + * Handle preallocated cached folios. We tried to allocate such folios * without triggering direct reclaim. If allocation failed, inplace - * file-backed pages will be used instead. + * file-backed folios will be used instead. */ - if (page->private == Z_EROFS_PREALLOCATED_PAGE) { - set_page_private(page, 0); + if (zbv.folio->private == (void *)Z_EROFS_PREALLOCATED_PAGE) { + zbv.folio->private = 0; tocache = true; goto out_tocache; } - mapping = READ_ONCE(page->mapping); + mapping = READ_ONCE(zbv.folio->mapping); /* - * File-backed pages for inplace I/Os are all locked steady, + * File-backed folios for inplace I/Os are all locked steady, * therefore it is impossible for `mapping` to be NULL. */ if (mapping && mapping != mc) { @@ -1471,26 +1457,21 @@ repeat: return; } - lock_page(page); - /* only true if page reclaim goes wrong, should never happen */ - DBG_BUGON(justfound && PagePrivate(page)); - - /* the cached page is still in managed cache */ - if (page->mapping == mc) { + folio_lock(zbv.folio); + if (zbv.folio->mapping == mc) { /* - * The cached page is still available but without a valid - * `->private` pcluster hint. Let's reconnect them. + * The cached folio is still in managed cache but without + * a valid `->private` pcluster hint. Let's reconnect them. */ - if (!PagePrivate(page)) { - DBG_BUGON(!justfound); - /* compressed_bvecs[] already takes a ref */ - attach_page_private(page, pcl); - put_page(page); + if (!folio_test_private(zbv.folio)) { + folio_attach_private(zbv.folio, pcl); + /* compressed_bvecs[] already takes a ref before */ + folio_put(zbv.folio); } /* no need to submit if it is already up-to-date */ - if (PageUptodate(page)) { - unlock_page(page); + if (folio_test_uptodate(zbv.folio)) { + folio_unlock(zbv.folio); bvec->bv_page = NULL; } return; @@ -1500,34 +1481,32 @@ repeat: * It has been truncated, so it's unsafe to reuse this one. Let's * allocate a new page for compressed data. */ - DBG_BUGON(page->mapping); - DBG_BUGON(!justfound); - + DBG_BUGON(zbv.folio->mapping); tocache = true; - unlock_page(page); - put_page(page); -out_allocpage: + folio_unlock(zbv.folio); + folio_put(zbv.folio); +out_allocfolio: page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL); spin_lock(&pcl->obj.lockref.lock); - if (pcl->compressed_bvecs[nr].page) { + if (pcl->compressed_bvecs[nr].folio) { erofs_pagepool_add(&f->pagepool, page); spin_unlock(&pcl->obj.lockref.lock); cond_resched(); goto repeat; } - pcl->compressed_bvecs[nr].page = page; + pcl->compressed_bvecs[nr].folio = zbv.folio = page_folio(page); spin_unlock(&pcl->obj.lockref.lock); bvec->bv_page = page; out_tocache: if (!tocache || bs != PAGE_SIZE || - add_to_page_cache_lru(page, mc, pcl->obj.index + nr, gfp)) { - /* turn into a temporary shortlived page (1 ref) */ - set_page_private(page, Z_EROFS_SHORTLIVED_PAGE); + filemap_add_folio(mc, zbv.folio, pcl->obj.index + nr, gfp)) { + /* turn into a temporary shortlived folio (1 ref) */ + zbv.folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE; return; } - attach_page_private(page, pcl); + folio_attach_private(zbv.folio, pcl); /* drop a refcount added by allocpage (then 2 refs in total here) */ - put_page(page); + folio_put(zbv.folio); } static struct z_erofs_decompressqueue *jobqueue_init(struct super_block *sb, @@ -1582,28 +1561,29 @@ static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl, qtail[JQ_BYPASS] = &pcl->next; } -static void z_erofs_submissionqueue_endio(struct bio *bio) +static void z_erofs_endio(struct bio *bio) { struct z_erofs_decompressqueue *q = bio->bi_private; blk_status_t err = bio->bi_status; - struct bio_vec *bvec; - struct bvec_iter_all iter_all; + struct folio_iter fi; - bio_for_each_segment_all(bvec, bio, iter_all) { - struct page *page = bvec->bv_page; + bio_for_each_folio_all(fi, bio) { + struct folio *folio = fi.folio; - DBG_BUGON(PageUptodate(page)); - DBG_BUGON(z_erofs_page_is_invalidated(page)); - if (erofs_page_is_managed(EROFS_SB(q->sb), page)) { - if (!err) - SetPageUptodate(page); - unlock_page(page); - } + DBG_BUGON(folio_test_uptodate(folio)); + DBG_BUGON(z_erofs_page_is_invalidated(&folio->page)); + if (!erofs_folio_is_managed(EROFS_SB(q->sb), folio)) + continue; + + if (!err) + folio_mark_uptodate(folio); + folio_unlock(folio); } if (err) q->eio = true; z_erofs_decompress_kickoff(q, -1); - bio_put(bio); + if (bio->bi_bdev) + bio_put(bio); } static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f, @@ -1617,7 +1597,6 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f, z_erofs_next_pcluster_t owned_head = f->owned_head; /* bio is NULL initially, so no need to initialize last_{index,bdev} */ erofs_off_t last_pa; - struct block_device *last_bdev; unsigned int nr_bios = 0; struct bio *bio = NULL; unsigned long pflags; @@ -1664,9 +1643,13 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f, continue; if (bio && (cur != last_pa || - last_bdev != mdev.m_bdev)) { -submit_bio_retry: - submit_bio(bio); + bio->bi_bdev != mdev.m_bdev)) { +io_retry: + if (!erofs_is_fscache_mode(sb)) + submit_bio(bio); + else + erofs_fscache_submit_bio(bio); + if (memstall) { psi_memstall_leave(&pflags); memstall = 0; @@ -1681,15 +1664,16 @@ submit_bio_retry: } if (!bio) { - bio = bio_alloc(mdev.m_bdev, BIO_MAX_VECS, - REQ_OP_READ, GFP_NOIO); - bio->bi_end_io = z_erofs_submissionqueue_endio; + bio = erofs_is_fscache_mode(sb) ? + erofs_fscache_bio_alloc(&mdev) : + bio_alloc(mdev.m_bdev, BIO_MAX_VECS, + REQ_OP_READ, GFP_NOIO); + bio->bi_end_io = z_erofs_endio; bio->bi_iter.bi_sector = cur >> 9; bio->bi_private = q[JQ_SUBMIT]; if (readahead) bio->bi_opf |= REQ_RAHEAD; ++nr_bios; - last_bdev = mdev.m_bdev; } if (cur + bvec.bv_len > end) @@ -1697,7 +1681,7 @@ submit_bio_retry: DBG_BUGON(bvec.bv_len < sb->s_blocksize); if (!bio_add_page(bio, bvec.bv_page, bvec.bv_len, bvec.bv_offset)) - goto submit_bio_retry; + goto io_retry; last_pa = cur + bvec.bv_len; bypass = false; @@ -1710,7 +1694,10 @@ submit_bio_retry: } while (owned_head != Z_EROFS_PCLUSTER_TAIL); if (bio) { - submit_bio(bio); + if (!erofs_is_fscache_mode(sb)) + submit_bio(bio); + else + erofs_fscache_submit_bio(bio); if (memstall) psi_memstall_leave(&pflags); } @@ -1795,7 +1782,7 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f, if (PageUptodate(page)) unlock_page(page); else - (void)z_erofs_do_read_page(f, page, !!rac); + z_erofs_scan_folio(f, page_folio(page), !!rac); put_page(page); } @@ -1816,7 +1803,7 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio) f.headoffset = (erofs_off_t)folio->index << PAGE_SHIFT; z_erofs_pcluster_readmore(&f, NULL, true); - err = z_erofs_do_read_page(&f, &folio->page, false); + err = z_erofs_scan_folio(&f, folio, false); z_erofs_pcluster_readmore(&f, NULL, false); z_erofs_pcluster_end(&f); @@ -1857,7 +1844,7 @@ static void z_erofs_readahead(struct readahead_control *rac) folio = head; head = folio_get_private(folio); - err = z_erofs_do_read_page(&f, &folio->page, true); + err = z_erofs_scan_folio(&f, folio, true); if (err && err != -EINTR) erofs_err(inode->i_sb, "readahead error at folio %lu @ nid %llu", folio->index, EROFS_I(inode)->nid); |