summaryrefslogtreecommitdiff
path: root/fs/erofs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/erofs')
-rw-r--r--fs/erofs/compress.h23
-rw-r--r--fs/erofs/data.c36
-rw-r--r--fs/erofs/decompressor.c7
-rw-r--r--fs/erofs/erofs_fs.h3
-rw-r--r--fs/erofs/fileio.c13
-rw-r--r--fs/erofs/fscache.c10
-rw-r--r--fs/erofs/internal.h15
-rw-r--r--fs/erofs/super.c110
-rw-r--r--fs/erofs/xattr.c4
-rw-r--r--fs/erofs/zdata.c252
-rw-r--r--fs/erofs/zmap.c125
-rw-r--r--fs/erofs/zutil.c13
12 files changed, 245 insertions, 366 deletions
diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
index 7bfe251680ec..65ff39401020 100644
--- a/fs/erofs/compress.h
+++ b/fs/erofs/compress.h
@@ -29,29 +29,8 @@ struct z_erofs_decompressor {
char *name;
};
-/* some special page->private (unsigned long, see below) */
#define Z_EROFS_SHORTLIVED_PAGE (-1UL << 2)
-#define Z_EROFS_PREALLOCATED_PAGE (-2UL << 2)
-
-/*
- * For all pages in a pcluster, page->private should be one of
- * Type Last 2bits page->private
- * short-lived page 00 Z_EROFS_SHORTLIVED_PAGE
- * preallocated page (tryalloc) 00 Z_EROFS_PREALLOCATED_PAGE
- * cached/managed page 00 pointer to z_erofs_pcluster
- * online page (file-backed, 01/10/11 sub-index << 2 | count
- * some pages can be used for inplace I/O)
- *
- * page->mapping should be one of
- * Type page->mapping
- * short-lived page NULL
- * preallocated page NULL
- * cached/managed page non-NULL or NULL (invalidated/truncated page)
- * online page non-NULL
- *
- * For all managed pages, PG_private should be set with 1 extra refcount,
- * which is used for page reclaim / migration.
- */
+#define Z_EROFS_PREALLOCATED_FOLIO ((void *)(-2UL << 2))
/*
* Currently, short-lived pages are pages directly from buddy system
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 1c49f8962021..0cd6b5c4df98 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -56,10 +56,10 @@ void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb)
buf->file = NULL;
if (erofs_is_fileio_mode(sbi)) {
- buf->file = sbi->fdev; /* some fs like FUSE needs it */
+ buf->file = sbi->dif0.file; /* some fs like FUSE needs it */
buf->mapping = buf->file->f_mapping;
} else if (erofs_is_fscache_mode(sb))
- buf->mapping = sbi->s_fscache->inode->i_mapping;
+ buf->mapping = sbi->dif0.fscache->inode->i_mapping;
else
buf->mapping = sb->s_bdev->bd_mapping;
}
@@ -179,19 +179,13 @@ out:
}
static void erofs_fill_from_devinfo(struct erofs_map_dev *map,
- struct erofs_device_info *dif)
+ struct super_block *sb, struct erofs_device_info *dif)
{
+ map->m_sb = sb;
+ map->m_dif = dif;
map->m_bdev = NULL;
- map->m_fp = NULL;
- if (dif->file) {
- if (S_ISBLK(file_inode(dif->file)->i_mode))
- map->m_bdev = file_bdev(dif->file);
- else
- map->m_fp = dif->file;
- }
- map->m_daxdev = dif->dax_dev;
- map->m_dax_part_off = dif->dax_part_off;
- map->m_fscache = dif->fscache;
+ if (dif->file && S_ISBLK(file_inode(dif->file)->i_mode))
+ map->m_bdev = file_bdev(dif->file);
}
int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
@@ -201,12 +195,8 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
erofs_off_t startoff, length;
int id;
- map->m_bdev = sb->s_bdev;
- map->m_daxdev = EROFS_SB(sb)->dax_dev;
- map->m_dax_part_off = EROFS_SB(sb)->dax_part_off;
- map->m_fscache = EROFS_SB(sb)->s_fscache;
- map->m_fp = EROFS_SB(sb)->fdev;
-
+ erofs_fill_from_devinfo(map, sb, &EROFS_SB(sb)->dif0);
+ map->m_bdev = sb->s_bdev; /* use s_bdev for the primary device */
if (map->m_deviceid) {
down_read(&devs->rwsem);
dif = idr_find(&devs->tree, map->m_deviceid - 1);
@@ -219,7 +209,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
up_read(&devs->rwsem);
return 0;
}
- erofs_fill_from_devinfo(map, dif);
+ erofs_fill_from_devinfo(map, sb, dif);
up_read(&devs->rwsem);
} else if (devs->extra_devices && !devs->flatdev) {
down_read(&devs->rwsem);
@@ -232,7 +222,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
if (map->m_pa >= startoff &&
map->m_pa < startoff + length) {
map->m_pa -= startoff;
- erofs_fill_from_devinfo(map, dif);
+ erofs_fill_from_devinfo(map, sb, dif);
break;
}
}
@@ -302,7 +292,7 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
iomap->offset = map.m_la;
if (flags & IOMAP_DAX)
- iomap->dax_dev = mdev.m_daxdev;
+ iomap->dax_dev = mdev.m_dif->dax_dev;
else
iomap->bdev = mdev.m_bdev;
iomap->length = map.m_llen;
@@ -331,7 +321,7 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
iomap->type = IOMAP_MAPPED;
iomap->addr = mdev.m_pa;
if (flags & IOMAP_DAX)
- iomap->addr += mdev.m_dax_part_off;
+ iomap->addr += mdev.m_dif->dax_part_off;
}
return 0;
}
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index eb318c7ddd80..2b123b070a42 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -7,14 +7,7 @@
#include "compress.h"
#include <linux/lz4.h>
-#ifndef LZ4_DISTANCE_MAX /* history window size */
-#define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */
-#endif
-
#define LZ4_MAX_DISTANCE_PAGES (DIV_ROUND_UP(LZ4_DISTANCE_MAX, PAGE_SIZE) + 1)
-#ifndef LZ4_DECOMPRESS_INPLACE_MARGIN
-#define LZ4_DECOMPRESS_INPLACE_MARGIN(srcsize) (((srcsize) >> 8) + 32)
-#endif
struct z_erofs_lz4_decompress_ctx {
struct z_erofs_decompress_req *rq;
diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h
index c8f2ae845bd2..199395ed1c1f 100644
--- a/fs/erofs/erofs_fs.h
+++ b/fs/erofs/erofs_fs.h
@@ -9,6 +9,7 @@
#ifndef __EROFS_FS_H
#define __EROFS_FS_H
+/* to allow for x86 boot sectors and other oddities. */
#define EROFS_SUPER_OFFSET 1024
#define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001
@@ -54,7 +55,7 @@ struct erofs_deviceslot {
/* erofs on-disk super block (currently 128 bytes) */
struct erofs_super_block {
__le32 magic; /* file system magic number */
- __le32 checksum; /* crc32c(super_block) */
+ __le32 checksum; /* crc32c to avoid unexpected on-disk overlap */
__le32 feature_compat;
__u8 blkszbits; /* filesystem block size in bit shift */
__u8 sb_extslots; /* superblock size = 128 + sb_extslots * 16 */
diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c
index 3af96b1e2c2a..0ffd1c63beeb 100644
--- a/fs/erofs/fileio.c
+++ b/fs/erofs/fileio.c
@@ -6,9 +6,10 @@
#include <trace/events/erofs.h>
struct erofs_fileio_rq {
- struct bio_vec bvecs[BIO_MAX_VECS];
+ struct bio_vec bvecs[16];
struct bio bio;
struct kiocb iocb;
+ struct super_block *sb;
};
struct erofs_fileio {
@@ -52,8 +53,9 @@ static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq)
rq->iocb.ki_pos = rq->bio.bi_iter.bi_sector << SECTOR_SHIFT;
rq->iocb.ki_ioprio = get_current_ioprio();
rq->iocb.ki_complete = erofs_fileio_ki_complete;
- rq->iocb.ki_flags = (rq->iocb.ki_filp->f_mode & FMODE_CAN_ODIRECT) ?
- IOCB_DIRECT : 0;
+ if (test_opt(&EROFS_SB(rq->sb)->opt, DIRECT_IO) &&
+ rq->iocb.ki_filp->f_mode & FMODE_CAN_ODIRECT)
+ rq->iocb.ki_flags = IOCB_DIRECT;
iov_iter_bvec(&iter, ITER_DEST, rq->bvecs, rq->bio.bi_vcnt,
rq->bio.bi_iter.bi_size);
ret = vfs_iocb_iter_read(rq->iocb.ki_filp, &rq->iocb, &iter);
@@ -66,8 +68,9 @@ static struct erofs_fileio_rq *erofs_fileio_rq_alloc(struct erofs_map_dev *mdev)
struct erofs_fileio_rq *rq = kzalloc(sizeof(*rq),
GFP_KERNEL | __GFP_NOFAIL);
- bio_init(&rq->bio, NULL, rq->bvecs, BIO_MAX_VECS, REQ_OP_READ);
- rq->iocb.ki_filp = mdev->m_fp;
+ bio_init(&rq->bio, NULL, rq->bvecs, ARRAY_SIZE(rq->bvecs), REQ_OP_READ);
+ rq->iocb.ki_filp = mdev->m_dif->file;
+ rq->sb = mdev->m_sb;
return rq;
}
diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
index fda16eedafb5..ce3d8737df85 100644
--- a/fs/erofs/fscache.c
+++ b/fs/erofs/fscache.c
@@ -198,7 +198,7 @@ struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev)
io = kmalloc(sizeof(*io), GFP_KERNEL | __GFP_NOFAIL);
bio_init(&io->bio, NULL, io->bvecs, BIO_MAX_VECS, REQ_OP_READ);
- io->io.private = mdev->m_fscache->cookie;
+ io->io.private = mdev->m_dif->fscache->cookie;
io->io.end_io = erofs_fscache_bio_endio;
refcount_set(&io->io.ref, 1);
return &io->bio;
@@ -316,7 +316,7 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
if (!io)
return -ENOMEM;
iov_iter_xarray(&io->iter, ITER_DEST, &mapping->i_pages, pos, count);
- ret = erofs_fscache_read_io_async(mdev.m_fscache->cookie,
+ ret = erofs_fscache_read_io_async(mdev.m_dif->fscache->cookie,
mdev.m_pa + (pos - map.m_la), io);
erofs_fscache_req_io_put(io);
@@ -657,7 +657,7 @@ int erofs_fscache_register_fs(struct super_block *sb)
if (IS_ERR(fscache))
return PTR_ERR(fscache);
- sbi->s_fscache = fscache;
+ sbi->dif0.fscache = fscache;
return 0;
}
@@ -665,14 +665,14 @@ void erofs_fscache_unregister_fs(struct super_block *sb)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
- erofs_fscache_unregister_cookie(sbi->s_fscache);
+ erofs_fscache_unregister_cookie(sbi->dif0.fscache);
if (sbi->domain)
erofs_fscache_domain_put(sbi->domain);
else
fscache_relinquish_volume(sbi->volume, NULL, false);
- sbi->s_fscache = NULL;
+ sbi->dif0.fscache = NULL;
sbi->volume = NULL;
sbi->domain = NULL;
}
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 1c847c30a918..686d835eb533 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -107,6 +107,7 @@ struct erofs_xattr_prefix_item {
};
struct erofs_sb_info {
+ struct erofs_device_info dif0;
struct erofs_mount_opts opt; /* options */
#ifdef CONFIG_EROFS_FS_ZIP
/* list for all registered superblocks, mainly for shrinker */
@@ -124,13 +125,9 @@ struct erofs_sb_info {
struct erofs_sb_lz4_info lz4;
#endif /* CONFIG_EROFS_FS_ZIP */
- struct file *fdev;
struct inode *packed_inode;
struct erofs_dev_context *devs;
- struct dax_device *dax_dev;
- u64 dax_part_off;
u64 total_blocks;
- u32 primarydevice_blocks;
u32 meta_blkaddr;
#ifdef CONFIG_EROFS_FS_XATTR
@@ -166,7 +163,6 @@ struct erofs_sb_info {
/* fscache support */
struct fscache_volume *volume;
- struct erofs_fscache *s_fscache;
struct erofs_domain *domain;
char *fsid;
char *domain_id;
@@ -180,6 +176,7 @@ struct erofs_sb_info {
#define EROFS_MOUNT_POSIX_ACL 0x00000020
#define EROFS_MOUNT_DAX_ALWAYS 0x00000040
#define EROFS_MOUNT_DAX_NEVER 0x00000080
+#define EROFS_MOUNT_DIRECT_IO 0x00000100
#define clear_opt(opt, option) ((opt)->mount_opt &= ~EROFS_MOUNT_##option)
#define set_opt(opt, option) ((opt)->mount_opt |= EROFS_MOUNT_##option)
@@ -187,7 +184,7 @@ struct erofs_sb_info {
static inline bool erofs_is_fileio_mode(struct erofs_sb_info *sbi)
{
- return IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) && sbi->fdev;
+ return IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) && sbi->dif0.file;
}
static inline bool erofs_is_fscache_mode(struct super_block *sb)
@@ -357,11 +354,9 @@ enum {
};
struct erofs_map_dev {
- struct erofs_fscache *m_fscache;
+ struct super_block *m_sb;
+ struct erofs_device_info *m_dif;
struct block_device *m_bdev;
- struct dax_device *m_daxdev;
- struct file *m_fp;
- u64 m_dax_part_off;
erofs_off_t m_pa;
unsigned int m_deviceid;
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index c235a8e4315e..827b62665649 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -39,29 +39,21 @@ void _erofs_printk(struct super_block *sb, const char *fmt, ...)
static int erofs_superblock_csum_verify(struct super_block *sb, void *sbdata)
{
- size_t len = 1 << EROFS_SB(sb)->blkszbits;
- struct erofs_super_block *dsb;
- u32 expected_crc, crc;
+ struct erofs_super_block *dsb = sbdata + EROFS_SUPER_OFFSET;
+ u32 len = 1 << EROFS_SB(sb)->blkszbits, crc;
if (len > EROFS_SUPER_OFFSET)
len -= EROFS_SUPER_OFFSET;
+ len -= offsetof(struct erofs_super_block, checksum) +
+ sizeof(dsb->checksum);
- dsb = kmemdup(sbdata + EROFS_SUPER_OFFSET, len, GFP_KERNEL);
- if (!dsb)
- return -ENOMEM;
-
- expected_crc = le32_to_cpu(dsb->checksum);
- dsb->checksum = 0;
- /* to allow for x86 boot sectors and other oddities. */
- crc = crc32c(~0, dsb, len);
- kfree(dsb);
-
- if (crc != expected_crc) {
- erofs_err(sb, "invalid checksum 0x%08x, 0x%08x expected",
- crc, expected_crc);
- return -EBADMSG;
- }
- return 0;
+ /* skip .magic(pre-verified) and .checksum(0) fields */
+ crc = crc32c(0x5045B54A, (&dsb->checksum) + 1, len);
+ if (crc == le32_to_cpu(dsb->checksum))
+ return 0;
+ erofs_err(sb, "invalid checksum 0x%08x, 0x%08x expected",
+ crc, le32_to_cpu(dsb->checksum));
+ return -EBADMSG;
}
static void erofs_inode_init_once(void *ptr)
@@ -203,7 +195,7 @@ static int erofs_scan_devices(struct super_block *sb,
struct erofs_device_info *dif;
int id, err = 0;
- sbi->total_blocks = sbi->primarydevice_blocks;
+ sbi->total_blocks = sbi->dif0.blocks;
if (!erofs_sb_has_device_table(sbi))
ondisk_extradevs = 0;
else
@@ -307,7 +299,7 @@ static int erofs_read_superblock(struct super_block *sb)
sbi->sb_size);
goto out;
}
- sbi->primarydevice_blocks = le32_to_cpu(dsb->blocks);
+ sbi->dif0.blocks = le32_to_cpu(dsb->blocks);
sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr);
#ifdef CONFIG_EROFS_FS_XATTR
sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr);
@@ -364,14 +356,8 @@ static void erofs_default_options(struct erofs_sb_info *sbi)
}
enum {
- Opt_user_xattr,
- Opt_acl,
- Opt_cache_strategy,
- Opt_dax,
- Opt_dax_enum,
- Opt_device,
- Opt_fsid,
- Opt_domain_id,
+ Opt_user_xattr, Opt_acl, Opt_cache_strategy, Opt_dax, Opt_dax_enum,
+ Opt_device, Opt_fsid, Opt_domain_id, Opt_directio,
Opt_err
};
@@ -398,6 +384,7 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = {
fsparam_string("device", Opt_device),
fsparam_string("fsid", Opt_fsid),
fsparam_string("domain_id", Opt_domain_id),
+ fsparam_flag_no("directio", Opt_directio),
{}
};
@@ -511,8 +498,16 @@ static int erofs_fc_parse_param(struct fs_context *fc,
errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name);
break;
#endif
- default:
- return -ENOPARAM;
+ case Opt_directio:
+#ifdef CONFIG_EROFS_FS_BACKED_BY_FILE
+ if (result.boolean)
+ set_opt(&sbi->opt, DIRECT_IO);
+ else
+ clear_opt(&sbi->opt, DIRECT_IO);
+#else
+ errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name);
+#endif
+ break;
}
return 0;
}
@@ -602,9 +597,8 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
return -EINVAL;
}
- sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev,
- &sbi->dax_part_off,
- NULL, NULL);
+ sbi->dif0.dax_dev = fs_dax_get_by_bdev(sb->s_bdev,
+ &sbi->dif0.dax_part_off, NULL, NULL);
}
err = erofs_read_superblock(sb);
@@ -627,7 +621,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
}
if (test_opt(&sbi->opt, DAX_ALWAYS)) {
- if (!sbi->dax_dev) {
+ if (!sbi->dif0.dax_dev) {
errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
clear_opt(&sbi->opt, DAX_ALWAYS);
} else if (sbi->blkszbits != PAGE_SHIFT) {
@@ -703,16 +697,18 @@ static int erofs_fc_get_tree(struct fs_context *fc)
GET_TREE_BDEV_QUIET_LOOKUP : 0);
#ifdef CONFIG_EROFS_FS_BACKED_BY_FILE
if (ret == -ENOTBLK) {
+ struct file *file;
+
if (!fc->source)
return invalf(fc, "No source specified");
- sbi->fdev = filp_open(fc->source, O_RDONLY | O_LARGEFILE, 0);
- if (IS_ERR(sbi->fdev))
- return PTR_ERR(sbi->fdev);
+ file = filp_open(fc->source, O_RDONLY | O_LARGEFILE, 0);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+ sbi->dif0.file = file;
- if (S_ISREG(file_inode(sbi->fdev)->i_mode) &&
- sbi->fdev->f_mapping->a_ops->read_folio)
+ if (S_ISREG(file_inode(sbi->dif0.file)->i_mode) &&
+ sbi->dif0.file->f_mapping->a_ops->read_folio)
return get_tree_nodev(fc, erofs_fc_fill_super);
- fput(sbi->fdev);
}
#endif
return ret;
@@ -763,19 +759,24 @@ static void erofs_free_dev_context(struct erofs_dev_context *devs)
kfree(devs);
}
-static void erofs_fc_free(struct fs_context *fc)
+static void erofs_sb_free(struct erofs_sb_info *sbi)
{
- struct erofs_sb_info *sbi = fc->s_fs_info;
-
- if (!sbi)
- return;
-
erofs_free_dev_context(sbi->devs);
kfree(sbi->fsid);
kfree(sbi->domain_id);
+ if (sbi->dif0.file)
+ fput(sbi->dif0.file);
kfree(sbi);
}
+static void erofs_fc_free(struct fs_context *fc)
+{
+ struct erofs_sb_info *sbi = fc->s_fs_info;
+
+ if (sbi) /* free here if an error occurs before transferring to sb */
+ erofs_sb_free(sbi);
+}
+
static const struct fs_context_operations erofs_context_ops = {
.parse_param = erofs_fc_parse_param,
.get_tree = erofs_fc_get_tree,
@@ -809,19 +810,14 @@ static void erofs_kill_sb(struct super_block *sb)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
- if ((IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid) || sbi->fdev)
+ if ((IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid) ||
+ sbi->dif0.file)
kill_anon_super(sb);
else
kill_block_super(sb);
-
- erofs_free_dev_context(sbi->devs);
- fs_put_dax(sbi->dax_dev, NULL);
+ fs_put_dax(sbi->dif0.dax_dev, NULL);
erofs_fscache_unregister_fs(sb);
- kfree(sbi->fsid);
- kfree(sbi->domain_id);
- if (sbi->fdev)
- fput(sbi->fdev);
- kfree(sbi);
+ erofs_sb_free(sbi);
sb->s_fs_info = NULL;
}
@@ -947,6 +943,8 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",dax=always");
if (test_opt(opt, DAX_NEVER))
seq_puts(seq, ",dax=never");
+ if (erofs_is_fileio_mode(sbi) && test_opt(opt, DIRECT_IO))
+ seq_puts(seq, ",directio");
#ifdef CONFIG_EROFS_FS_ONDEMAND
if (sbi->fsid)
seq_printf(seq, ",fsid=%s", sbi->fsid);
diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c
index a90d7d649739..df2777e05661 100644
--- a/fs/erofs/xattr.c
+++ b/fs/erofs/xattr.c
@@ -407,7 +407,7 @@ int erofs_getxattr(struct inode *inode, int index, const char *name,
}
it.index = index;
- it.name = (struct qstr)QSTR_INIT(name, strlen(name));
+ it.name = QSTR(name);
if (it.name.len > EROFS_NAME_LEN)
return -ERANGE;
@@ -478,7 +478,7 @@ int erofs_xattr_prefixes_init(struct super_block *sb)
if (!sbi->xattr_prefix_count)
return 0;
- pfs = kzalloc(sbi->xattr_prefix_count * sizeof(*pfs), GFP_KERNEL);
+ pfs = kcalloc(sbi->xattr_prefix_count, sizeof(*pfs), GFP_KERNEL);
if (!pfs)
return -ENOMEM;
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 01f147505487..d771e06db738 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -12,12 +12,6 @@
#define Z_EROFS_PCLUSTER_MAX_PAGES (Z_EROFS_PCLUSTER_MAX_SIZE / PAGE_SIZE)
#define Z_EROFS_INLINE_BVECS 2
-/*
- * let's leave a type here in case of introducing
- * another tagged pointer later.
- */
-typedef void *z_erofs_next_pcluster_t;
-
struct z_erofs_bvec {
struct page *page;
int offset;
@@ -48,7 +42,7 @@ struct z_erofs_pcluster {
struct lockref lockref;
/* A: point to next chained pcluster or TAILs */
- z_erofs_next_pcluster_t next;
+ struct z_erofs_pcluster *next;
/* I: start block address of this pcluster */
erofs_off_t index;
@@ -94,12 +88,11 @@ struct z_erofs_pcluster {
/* the end of a chain of pclusters */
#define Z_EROFS_PCLUSTER_TAIL ((void *) 0x700 + POISON_POINTER_DELTA)
-#define Z_EROFS_PCLUSTER_NIL (NULL)
struct z_erofs_decompressqueue {
struct super_block *sb;
+ struct z_erofs_pcluster *head;
atomic_t pending_bios;
- z_erofs_next_pcluster_t head;
union {
struct completion done;
@@ -320,7 +313,7 @@ static void erofs_destroy_percpu_workers(void)
static struct kthread_worker *erofs_init_percpu_worker(int cpu)
{
struct kthread_worker *worker =
- kthread_create_worker_on_cpu(cpu, 0, "erofs_worker/%u", cpu);
+ kthread_run_worker_on_cpu(cpu, 0, "erofs_worker/%u");
if (IS_ERR(worker))
return worker;
@@ -462,39 +455,32 @@ err_decompressor:
}
enum z_erofs_pclustermode {
+ /* It has previously been linked into another processing chain */
Z_EROFS_PCLUSTER_INFLIGHT,
/*
- * a weak form of Z_EROFS_PCLUSTER_FOLLOWED, the difference is that it
- * could be dispatched into bypass queue later due to uptodated managed
- * pages. All related online pages cannot be reused for inplace I/O (or
- * bvpage) since it can be directly decoded without I/O submission.
+ * A weaker form of Z_EROFS_PCLUSTER_FOLLOWED; the difference is that it
+ * may be dispatched to the bypass queue later due to uptodated managed
+ * folios. All file-backed folios related to this pcluster cannot be
+ * reused for in-place I/O (or bvpage) since the pcluster may be decoded
+ * in a separate queue (and thus out of order).
*/
Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE,
/*
- * The pcluster was just linked to a decompression chain by us. It can
- * also be linked with the remaining pclusters, which means if the
- * processing page is the tail page of a pcluster, this pcluster can
- * safely use the whole page (since the previous pcluster is within the
- * same chain) for in-place I/O, as illustrated below:
- * ___________________________________________________
- * | tail (partial) page | head (partial) page |
- * | (of the current pcl) | (of the previous pcl) |
- * |___PCLUSTER_FOLLOWED___|_____PCLUSTER_FOLLOWED_____|
- *
- * [ (*) the page above can be used as inplace I/O. ]
+ * The pcluster has just been linked to our processing chain.
+ * File-backed folios (except for the head page) related to it can be
+ * used for in-place I/O (or bvpage).
*/
Z_EROFS_PCLUSTER_FOLLOWED,
};
-struct z_erofs_decompress_frontend {
+struct z_erofs_frontend {
struct inode *const inode;
struct erofs_map_blocks map;
struct z_erofs_bvec_iter biter;
struct page *pagepool;
struct page *candidate_bvpage;
- struct z_erofs_pcluster *pcl;
- z_erofs_next_pcluster_t owned_head;
+ struct z_erofs_pcluster *pcl, *head;
enum z_erofs_pclustermode mode;
erofs_off_t headoffset;
@@ -503,11 +489,11 @@ struct z_erofs_decompress_frontend {
unsigned int icur;
};
-#define DECOMPRESS_FRONTEND_INIT(__i) { \
- .inode = __i, .owned_head = Z_EROFS_PCLUSTER_TAIL, \
- .mode = Z_EROFS_PCLUSTER_FOLLOWED }
+#define Z_EROFS_DEFINE_FRONTEND(fe, i, ho) struct z_erofs_frontend fe = { \
+ .inode = i, .head = Z_EROFS_PCLUSTER_TAIL, \
+ .mode = Z_EROFS_PCLUSTER_FOLLOWED, .headoffset = ho }
-static bool z_erofs_should_alloc_cache(struct z_erofs_decompress_frontend *fe)
+static bool z_erofs_should_alloc_cache(struct z_erofs_frontend *fe)
{
unsigned int cachestrategy = EROFS_I_SB(fe->inode)->opt.cache_strategy;
@@ -524,19 +510,17 @@ static bool z_erofs_should_alloc_cache(struct z_erofs_decompress_frontend *fe)
return false;
}
-static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
+static void z_erofs_bind_cache(struct z_erofs_frontend *fe)
{
struct address_space *mc = MNGD_MAPPING(EROFS_I_SB(fe->inode));
struct z_erofs_pcluster *pcl = fe->pcl;
unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
bool shouldalloc = z_erofs_should_alloc_cache(fe);
- bool standalone = true;
- /*
- * optimistic allocation without direct reclaim since inplace I/O
- * can be used if low memory otherwise.
- */
+ bool may_bypass = true;
+ /* Optimistic allocation, as in-place I/O can be used as a fallback */
gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) |
__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
+ struct folio *folio, *newfolio;
unsigned int i;
if (i_blocksize(fe->inode) != PAGE_SIZE ||
@@ -544,47 +528,42 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
return;
for (i = 0; i < pclusterpages; ++i) {
- struct page *page, *newpage;
-
/* Inaccurate check w/o locking to avoid unneeded lookups */
if (READ_ONCE(pcl->compressed_bvecs[i].page))
continue;
- page = find_get_page(mc, pcl->index + i);
- if (!page) {
- /* I/O is needed, no possible to decompress directly */
- standalone = false;
+ folio = filemap_get_folio(mc, pcl->index + i);
+ if (IS_ERR(folio)) {
+ may_bypass = false;
if (!shouldalloc)
continue;
/*
- * Try cached I/O if allocation succeeds or fallback to
- * in-place I/O instead to avoid any direct reclaim.
+ * Allocate a managed folio for cached I/O, or it may be
+ * then filled with a file-backed folio for in-place I/O
*/
- newpage = erofs_allocpage(&fe->pagepool, gfp);
- if (!newpage)
+ newfolio = filemap_alloc_folio(gfp, 0);
+ if (!newfolio)
continue;
- set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE);
+ newfolio->private = Z_EROFS_PREALLOCATED_FOLIO;
+ folio = NULL;
}
spin_lock(&pcl->lockref.lock);
if (!pcl->compressed_bvecs[i].page) {
- pcl->compressed_bvecs[i].page = page ? page : newpage;
+ pcl->compressed_bvecs[i].page =
+ folio_page(folio ?: newfolio, 0);
spin_unlock(&pcl->lockref.lock);
continue;
}
spin_unlock(&pcl->lockref.lock);
-
- if (page)
- put_page(page);
- else if (newpage)
- erofs_pagepool_add(&fe->pagepool, newpage);
+ folio_put(folio ?: newfolio);
}
/*
- * don't do inplace I/O if all compressed pages are available in
- * managed cache since it can be moved to the bypass queue instead.
+ * Don't perform in-place I/O if all compressed pages are available in
+ * the managed cache, as the pcluster can be moved to the bypass queue.
*/
- if (standalone)
+ if (may_bypass)
fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
}
@@ -681,7 +660,7 @@ int erofs_init_managed_cache(struct super_block *sb)
}
/* callers must be with pcluster lock held */
-static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe,
+static int z_erofs_attach_page(struct z_erofs_frontend *fe,
struct z_erofs_bvec *bvec, bool exclusive)
{
struct z_erofs_pcluster *pcl = fe->pcl;
@@ -727,7 +706,7 @@ static bool z_erofs_get_pcluster(struct z_erofs_pcluster *pcl)
return true;
}
-static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe)
+static int z_erofs_register_pcluster(struct z_erofs_frontend *fe)
{
struct erofs_map_blocks *map = &fe->map;
struct super_block *sb = fe->inode->i_sb;
@@ -747,14 +726,11 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe)
if (IS_ERR(pcl))
return PTR_ERR(pcl);
- spin_lock_init(&pcl->lockref.lock);
- pcl->lockref.count = 1; /* one ref for this request */
+ lockref_init(&pcl->lockref); /* one ref for this request */
pcl->algorithmformat = map->m_algorithmformat;
pcl->length = 0;
pcl->partial = true;
-
- /* new pclusters should be claimed as type 1, primary and followed */
- pcl->next = fe->owned_head;
+ pcl->next = fe->head;
pcl->pageofs_out = map->m_la & ~PAGE_MASK;
fe->mode = Z_EROFS_PCLUSTER_FOLLOWED;
@@ -790,8 +766,7 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe)
goto err_out;
}
}
- fe->owned_head = &pcl->next;
- fe->pcl = pcl;
+ fe->head = fe->pcl = pcl;
return 0;
err_out:
@@ -800,7 +775,7 @@ err_out:
return err;
}
-static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe)
+static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe)
{
struct erofs_map_blocks *map = &fe->map;
struct super_block *sb = fe->inode->i_sb;
@@ -810,7 +785,7 @@ static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe)
DBG_BUGON(fe->pcl);
/* must be Z_EROFS_PCLUSTER_TAIL or pointed to previous pcluster */
- DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL);
+ DBG_BUGON(!fe->head);
if (!(map->m_flags & EROFS_MAP_META)) {
while (1) {
@@ -838,10 +813,9 @@ static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe)
if (ret == -EEXIST) {
mutex_lock(&fe->pcl->lock);
/* check if this pcluster hasn't been linked into any chain. */
- if (cmpxchg(&fe->pcl->next, Z_EROFS_PCLUSTER_NIL,
- fe->owned_head) == Z_EROFS_PCLUSTER_NIL) {
+ if (!cmpxchg(&fe->pcl->next, NULL, fe->head)) {
/* .. so it can be attached to our submission chain */
- fe->owned_head = &fe->pcl->next;
+ fe->head = fe->pcl;
fe->mode = Z_EROFS_PCLUSTER_FOLLOWED;
} else { /* otherwise, it belongs to an inflight chain */
fe->mode = Z_EROFS_PCLUSTER_INFLIGHT;
@@ -874,14 +848,9 @@ static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe)
return 0;
}
-/*
- * keep in mind that no referenced pclusters will be freed
- * only after a RCU grace period.
- */
static void z_erofs_rcu_callback(struct rcu_head *head)
{
- z_erofs_free_pcluster(container_of(head,
- struct z_erofs_pcluster, rcu));
+ z_erofs_free_pcluster(container_of(head, struct z_erofs_pcluster, rcu));
}
static bool __erofs_try_to_release_pcluster(struct erofs_sb_info *sbi,
@@ -923,12 +892,10 @@ static bool erofs_try_to_release_pcluster(struct erofs_sb_info *sbi,
return free;
}
-unsigned long z_erofs_shrink_scan(struct erofs_sb_info *sbi,
- unsigned long nr_shrink)
+unsigned long z_erofs_shrink_scan(struct erofs_sb_info *sbi, unsigned long nr)
{
struct z_erofs_pcluster *pcl;
- unsigned int freed = 0;
- unsigned long index;
+ unsigned long index, freed = 0;
xa_lock(&sbi->managed_pslots);
xa_for_each(&sbi->managed_pslots, index, pcl) {
@@ -938,7 +905,7 @@ unsigned long z_erofs_shrink_scan(struct erofs_sb_info *sbi,
xa_unlock(&sbi->managed_pslots);
++freed;
- if (!--nr_shrink)
+ if (!--nr)
return freed;
xa_lock(&sbi->managed_pslots);
}
@@ -967,7 +934,7 @@ static void z_erofs_put_pcluster(struct erofs_sb_info *sbi,
call_rcu(&pcl->rcu, z_erofs_rcu_callback);
}
-static void z_erofs_pcluster_end(struct z_erofs_decompress_frontend *fe)
+static void z_erofs_pcluster_end(struct z_erofs_frontend *fe)
{
struct z_erofs_pcluster *pcl = fe->pcl;
@@ -980,13 +947,9 @@ static void z_erofs_pcluster_end(struct z_erofs_decompress_frontend *fe)
if (fe->candidate_bvpage)
fe->candidate_bvpage = NULL;
- /*
- * if all pending pages are added, don't hold its reference
- * any longer if the pcluster isn't hosted by ourselves.
- */
+ /* Drop refcount if it doesn't belong to our processing chain */
if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE)
z_erofs_put_pcluster(EROFS_I_SB(fe->inode), pcl, false);
-
fe->pcl = NULL;
}
@@ -1015,7 +978,7 @@ static int z_erofs_read_fragment(struct super_block *sb, struct folio *folio,
return 0;
}
-static int z_erofs_scan_folio(struct z_erofs_decompress_frontend *f,
+static int z_erofs_scan_folio(struct z_erofs_frontend *f,
struct folio *folio, bool ra)
{
struct inode *const inode = f->inode;
@@ -1130,7 +1093,7 @@ static bool z_erofs_page_is_invalidated(struct page *page)
return !page_folio(page)->mapping && !z_erofs_is_shortlived_page(page);
}
-struct z_erofs_decompress_backend {
+struct z_erofs_backend {
struct page *onstack_pages[Z_EROFS_ONSTACK_PAGES];
struct super_block *sb;
struct z_erofs_pcluster *pcl;
@@ -1150,7 +1113,7 @@ struct z_erofs_bvec_item {
struct list_head list;
};
-static void z_erofs_do_decompressed_bvec(struct z_erofs_decompress_backend *be,
+static void z_erofs_do_decompressed_bvec(struct z_erofs_backend *be,
struct z_erofs_bvec *bvec)
{
struct z_erofs_bvec_item *item;
@@ -1173,8 +1136,7 @@ static void z_erofs_do_decompressed_bvec(struct z_erofs_decompress_backend *be,
list_add(&item->list, &be->decompressed_secondary_bvecs);
}
-static void z_erofs_fill_other_copies(struct z_erofs_decompress_backend *be,
- int err)
+static void z_erofs_fill_other_copies(struct z_erofs_backend *be, int err)
{
unsigned int off0 = be->pcl->pageofs_out;
struct list_head *p, *n;
@@ -1215,7 +1177,7 @@ static void z_erofs_fill_other_copies(struct z_erofs_decompress_backend *be,
}
}
-static void z_erofs_parse_out_bvecs(struct z_erofs_decompress_backend *be)
+static void z_erofs_parse_out_bvecs(struct z_erofs_backend *be)
{
struct z_erofs_pcluster *pcl = be->pcl;
struct z_erofs_bvec_iter biter;
@@ -1240,8 +1202,7 @@ static void z_erofs_parse_out_bvecs(struct z_erofs_decompress_backend *be)
z_erofs_put_shortlivedpage(be->pagepool, old_bvpage);
}
-static int z_erofs_parse_in_bvecs(struct z_erofs_decompress_backend *be,
- bool *overlapped)
+static int z_erofs_parse_in_bvecs(struct z_erofs_backend *be, bool *overlapped)
{
struct z_erofs_pcluster *pcl = be->pcl;
unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
@@ -1276,8 +1237,7 @@ static int z_erofs_parse_in_bvecs(struct z_erofs_decompress_backend *be,
return err;
}
-static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
- int err)
+static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err)
{
struct erofs_sb_info *const sbi = EROFS_SB(be->sb);
struct z_erofs_pcluster *pcl = be->pcl;
@@ -1394,7 +1354,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
pcl->vcnt = 0;
/* pcluster lock MUST be taken before the following line */
- WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_NIL);
+ WRITE_ONCE(pcl->next, NULL);
mutex_unlock(&pcl->lock);
if (z_erofs_is_inline_pcluster(pcl))
@@ -1407,21 +1367,19 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
static int z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io,
struct page **pagepool)
{
- struct z_erofs_decompress_backend be = {
+ struct z_erofs_backend be = {
.sb = io->sb,
.pagepool = pagepool,
.decompressed_secondary_bvecs =
LIST_HEAD_INIT(be.decompressed_secondary_bvecs),
+ .pcl = io->head,
};
- z_erofs_next_pcluster_t owned = io->head;
+ struct z_erofs_pcluster *next;
int err = io->eio ? -EIO : 0;
- while (owned != Z_EROFS_PCLUSTER_TAIL) {
- DBG_BUGON(owned == Z_EROFS_PCLUSTER_NIL);
-
- be.pcl = container_of(owned, struct z_erofs_pcluster, next);
- owned = READ_ONCE(be.pcl->next);
-
+ for (; be.pcl != Z_EROFS_PCLUSTER_TAIL; be.pcl = next) {
+ DBG_BUGON(!be.pcl);
+ next = READ_ONCE(be.pcl->next);
err = z_erofs_decompress_pcluster(&be, err) ?: err;
}
return err;
@@ -1487,7 +1445,7 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
}
static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
- struct z_erofs_decompress_frontend *f,
+ struct z_erofs_frontend *f,
struct z_erofs_pcluster *pcl,
unsigned int nr,
struct address_space *mc)
@@ -1514,12 +1472,8 @@ repeat:
DBG_BUGON(z_erofs_is_shortlived_page(bvec->bv_page));
folio = page_folio(zbv.page);
- /*
- * Handle preallocated cached folios. We tried to allocate such folios
- * without triggering direct reclaim. If allocation failed, inplace
- * file-backed folios will be used instead.
- */
- if (folio->private == (void *)Z_EROFS_PREALLOCATED_PAGE) {
+ /* For preallocated managed folios, add them to page cache here */
+ if (folio->private == Z_EROFS_PREALLOCATED_FOLIO) {
tocache = true;
goto out_tocache;
}
@@ -1631,18 +1585,13 @@ enum {
NR_JOBQUEUES,
};
-static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl,
- z_erofs_next_pcluster_t qtail[],
- z_erofs_next_pcluster_t owned_head)
+static void z_erofs_move_to_bypass_queue(struct z_erofs_pcluster *pcl,
+ struct z_erofs_pcluster *next,
+ struct z_erofs_pcluster **qtail[])
{
- z_erofs_next_pcluster_t *const submit_qtail = qtail[JQ_SUBMIT];
- z_erofs_next_pcluster_t *const bypass_qtail = qtail[JQ_BYPASS];
-
WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_TAIL);
-
- WRITE_ONCE(*submit_qtail, owned_head);
- WRITE_ONCE(*bypass_qtail, &pcl->next);
-
+ WRITE_ONCE(*qtail[JQ_SUBMIT], next);
+ WRITE_ONCE(*qtail[JQ_BYPASS], pcl);
qtail[JQ_BYPASS] = &pcl->next;
}
@@ -1671,15 +1620,15 @@ static void z_erofs_endio(struct bio *bio)
bio_put(bio);
}
-static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
+static void z_erofs_submit_queue(struct z_erofs_frontend *f,
struct z_erofs_decompressqueue *fgq,
bool *force_fg, bool readahead)
{
struct super_block *sb = f->inode->i_sb;
struct address_space *mc = MNGD_MAPPING(EROFS_SB(sb));
- z_erofs_next_pcluster_t qtail[NR_JOBQUEUES];
+ struct z_erofs_pcluster **qtail[NR_JOBQUEUES];
struct z_erofs_decompressqueue *q[NR_JOBQUEUES];
- z_erofs_next_pcluster_t owned_head = f->owned_head;
+ struct z_erofs_pcluster *pcl, *next;
/* bio is NULL initially, so no need to initialize last_{index,bdev} */
erofs_off_t last_pa;
unsigned int nr_bios = 0;
@@ -1695,22 +1644,19 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
qtail[JQ_SUBMIT] = &q[JQ_SUBMIT]->head;
/* by default, all need io submission */
- q[JQ_SUBMIT]->head = owned_head;
+ q[JQ_SUBMIT]->head = next = f->head;
do {
struct erofs_map_dev mdev;
- struct z_erofs_pcluster *pcl;
erofs_off_t cur, end;
struct bio_vec bvec;
unsigned int i = 0;
bool bypass = true;
- DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_NIL);
- pcl = container_of(owned_head, struct z_erofs_pcluster, next);
- owned_head = READ_ONCE(pcl->next);
-
+ pcl = next;
+ next = READ_ONCE(pcl->next);
if (z_erofs_is_inline_pcluster(pcl)) {
- move_to_bypass_jobqueue(pcl, qtail, owned_head);
+ z_erofs_move_to_bypass_queue(pcl, next, qtail);
continue;
}
@@ -1782,8 +1728,8 @@ drain_io:
if (!bypass)
qtail[JQ_SUBMIT] = &pcl->next;
else
- move_to_bypass_jobqueue(pcl, qtail, owned_head);
- } while (owned_head != Z_EROFS_PCLUSTER_TAIL);
+ z_erofs_move_to_bypass_queue(pcl, next, qtail);
+ } while (next != Z_EROFS_PCLUSTER_TAIL);
if (bio) {
if (erofs_is_fileio_mode(EROFS_SB(sb)))
@@ -1792,9 +1738,9 @@ drain_io:
erofs_fscache_submit_bio(bio);
else
submit_bio(bio);
- if (memstall)
- psi_memstall_leave(&pflags);
}
+ if (memstall)
+ psi_memstall_leave(&pflags);
/*
* although background is preferred, no one is pending for submission.
@@ -1807,17 +1753,16 @@ drain_io:
z_erofs_decompress_kickoff(q[JQ_SUBMIT], nr_bios);
}
-static int z_erofs_runqueue(struct z_erofs_decompress_frontend *f,
- unsigned int ra_folios)
+static int z_erofs_runqueue(struct z_erofs_frontend *f, unsigned int rapages)
{
struct z_erofs_decompressqueue io[NR_JOBQUEUES];
struct erofs_sb_info *sbi = EROFS_I_SB(f->inode);
- bool force_fg = z_erofs_is_sync_decompress(sbi, ra_folios);
+ bool force_fg = z_erofs_is_sync_decompress(sbi, rapages);
int err;
- if (f->owned_head == Z_EROFS_PCLUSTER_TAIL)
+ if (f->head == Z_EROFS_PCLUSTER_TAIL)
return 0;
- z_erofs_submit_queue(f, io, &force_fg, !!ra_folios);
+ z_erofs_submit_queue(f, io, &force_fg, !!rapages);
/* handle bypass queue (no i/o pclusters) immediately */
err = z_erofs_decompress_queue(&io[JQ_BYPASS], &f->pagepool);
@@ -1835,7 +1780,7 @@ static int z_erofs_runqueue(struct z_erofs_decompress_frontend *f,
* Since partial uptodate is still unimplemented for now, we have to use
* approximate readmore strategies as a start.
*/
-static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
+static void z_erofs_pcluster_readmore(struct z_erofs_frontend *f,
struct readahead_control *rac, bool backmost)
{
struct inode *inode = f->inode;
@@ -1890,12 +1835,10 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
static int z_erofs_read_folio(struct file *file, struct folio *folio)
{
struct inode *const inode = folio->mapping->host;
- struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
+ Z_EROFS_DEFINE_FRONTEND(f, inode, folio_pos(folio));
int err;
trace_erofs_read_folio(folio, false);
- f.headoffset = (erofs_off_t)folio->index << PAGE_SHIFT;
-
z_erofs_pcluster_readmore(&f, NULL, true);
err = z_erofs_scan_folio(&f, folio, false);
z_erofs_pcluster_readmore(&f, NULL, false);
@@ -1915,17 +1858,14 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio)
static void z_erofs_readahead(struct readahead_control *rac)
{
struct inode *const inode = rac->mapping->host;
- struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
+ Z_EROFS_DEFINE_FRONTEND(f, inode, readahead_pos(rac));
struct folio *head = NULL, *folio;
- unsigned int nr_folios;
+ unsigned int nrpages = readahead_count(rac);
int err;
- f.headoffset = readahead_pos(rac);
-
z_erofs_pcluster_readmore(&f, rac, true);
- nr_folios = readahead_count(rac);
- trace_erofs_readpages(inode, readahead_index(rac), nr_folios, false);
-
+ nrpages = readahead_count(rac);
+ trace_erofs_readpages(inode, readahead_index(rac), nrpages, false);
while ((folio = readahead_folio(rac))) {
folio->private = head;
head = folio;
@@ -1944,7 +1884,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
z_erofs_pcluster_readmore(&f, rac, false);
z_erofs_pcluster_end(&f);
- (void)z_erofs_runqueue(&f, nr_folios);
+ (void)z_erofs_runqueue(&f, nrpages);
erofs_put_metabuf(&f.map.buf);
erofs_release_pages(&f.pagepool);
}
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 4535f2f0a014..689437e99a5a 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -97,17 +97,48 @@ static int get_compacted_la_distance(unsigned int lobits,
return d1;
}
-static int unpack_compacted_index(struct z_erofs_maprecorder *m,
- unsigned int amortizedshift,
- erofs_off_t pos, bool lookahead)
+static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m,
+ unsigned long lcn, bool lookahead)
{
- struct erofs_inode *const vi = EROFS_I(m->inode);
+ struct inode *const inode = m->inode;
+ struct erofs_inode *const vi = EROFS_I(inode);
+ const erofs_off_t ebase = sizeof(struct z_erofs_map_header) +
+ ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8);
const unsigned int lclusterbits = vi->z_logical_clusterbits;
+ const unsigned int totalidx = erofs_iblks(inode);
+ unsigned int compacted_4b_initial, compacted_2b, amortizedshift;
unsigned int vcnt, lo, lobits, encodebits, nblk, bytes;
- bool big_pcluster;
+ bool big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
+ erofs_off_t pos;
u8 *in, type;
int i;
+ if (lcn >= totalidx || lclusterbits > 14)
+ return -EINVAL;
+
+ m->lcn = lcn;
+ /* used to align to 32-byte (compacted_2b) alignment */
+ compacted_4b_initial = ((32 - ebase % 32) / 4) & 7;
+ compacted_2b = 0;
+ if ((vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) &&
+ compacted_4b_initial < totalidx)
+ compacted_2b = rounddown(totalidx - compacted_4b_initial, 16);
+
+ pos = ebase;
+ amortizedshift = 2; /* compact_4b */
+ if (lcn >= compacted_4b_initial) {
+ pos += compacted_4b_initial * 4;
+ lcn -= compacted_4b_initial;
+ if (lcn < compacted_2b) {
+ amortizedshift = 1;
+ } else {
+ pos += compacted_2b * 2;
+ lcn -= compacted_2b;
+ }
+ }
+ pos += lcn * (1 << amortizedshift);
+
+ /* figure out the lcluster count in this pack */
if (1 << amortizedshift == 4 && lclusterbits <= 14)
vcnt = 2;
else if (1 << amortizedshift == 2 && lclusterbits <= 12)
@@ -122,7 +153,6 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
/* it doesn't equal to round_up(..) */
m->nextpackoff = round_down(pos, vcnt << amortizedshift) +
(vcnt << amortizedshift);
- big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
lobits = max(lclusterbits, ilog2(Z_EROFS_LI_D0_CBLKCNT) + 1U);
encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt;
bytes = pos & ((vcnt << amortizedshift) - 1);
@@ -207,53 +237,6 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
return 0;
}
-static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m,
- unsigned long lcn, bool lookahead)
-{
- struct inode *const inode = m->inode;
- struct erofs_inode *const vi = EROFS_I(inode);
- const erofs_off_t ebase = sizeof(struct z_erofs_map_header) +
- ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8);
- unsigned int totalidx = erofs_iblks(inode);
- unsigned int compacted_4b_initial, compacted_2b;
- unsigned int amortizedshift;
- erofs_off_t pos;
-
- if (lcn >= totalidx || vi->z_logical_clusterbits > 14)
- return -EINVAL;
-
- m->lcn = lcn;
- /* used to align to 32-byte (compacted_2b) alignment */
- compacted_4b_initial = (32 - ebase % 32) / 4;
- if (compacted_4b_initial == 32 / 4)
- compacted_4b_initial = 0;
-
- if ((vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) &&
- compacted_4b_initial < totalidx)
- compacted_2b = rounddown(totalidx - compacted_4b_initial, 16);
- else
- compacted_2b = 0;
-
- pos = ebase;
- if (lcn < compacted_4b_initial) {
- amortizedshift = 2;
- goto out;
- }
- pos += compacted_4b_initial * 4;
- lcn -= compacted_4b_initial;
-
- if (lcn < compacted_2b) {
- amortizedshift = 1;
- goto out;
- }
- pos += compacted_2b * 2;
- lcn -= compacted_2b;
- amortizedshift = 2;
-out:
- pos += lcn * (1 << amortizedshift);
- return unpack_compacted_index(m, amortizedshift, pos, lookahead);
-}
-
static int z_erofs_load_lcluster_from_disk(struct z_erofs_maprecorder *m,
unsigned int lcn, bool lookahead)
{
@@ -311,27 +294,23 @@ err_bogus:
static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
unsigned int initial_lcn)
{
- struct super_block *sb = m->inode->i_sb;
- struct erofs_inode *const vi = EROFS_I(m->inode);
- struct erofs_map_blocks *const map = m->map;
- const unsigned int lclusterbits = vi->z_logical_clusterbits;
- unsigned long lcn;
+ struct inode *inode = m->inode;
+ struct super_block *sb = inode->i_sb;
+ struct erofs_inode *vi = EROFS_I(inode);
+ bool bigpcl1 = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
+ bool bigpcl2 = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2;
+ unsigned long lcn = m->lcn + 1;
int err;
- DBG_BUGON(m->type != Z_EROFS_LCLUSTER_TYPE_PLAIN &&
- m->type != Z_EROFS_LCLUSTER_TYPE_HEAD1 &&
- m->type != Z_EROFS_LCLUSTER_TYPE_HEAD2);
+ DBG_BUGON(m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD);
DBG_BUGON(m->type != m->headtype);
- if (m->headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN ||
- ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD1) &&
- !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) ||
- ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) &&
- !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2))) {
- map->m_plen = 1ULL << lclusterbits;
- return 0;
- }
- lcn = m->lcn + 1;
+ if ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD1 && !bigpcl1) ||
+ ((m->headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN ||
+ m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) && !bigpcl2) ||
+ (lcn << vi->z_logical_clusterbits) >= inode->i_size)
+ m->compressedblks = 1;
+
if (m->compressedblks)
goto out;
@@ -356,9 +335,9 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
case Z_EROFS_LCLUSTER_TYPE_HEAD2:
/*
* if the 1st NONHEAD lcluster is actually PLAIN or HEAD type
- * rather than CBLKCNT, it's a 1 lcluster-sized pcluster.
+ * rather than CBLKCNT, it's a 1 block-sized pcluster.
*/
- m->compressedblks = 1 << (lclusterbits - sb->s_blocksize_bits);
+ m->compressedblks = 1;
break;
case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
if (m->delta[0] != 1)
@@ -373,7 +352,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
return -EFSCORRUPTED;
}
out:
- map->m_plen = erofs_pos(sb, m->compressedblks);
+ m->map->m_plen = erofs_pos(sb, m->compressedblks);
return 0;
err_bonus_cblkcnt:
erofs_err(sb, "bogus CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid);
diff --git a/fs/erofs/zutil.c b/fs/erofs/zutil.c
index 75704f58ecfa..55ff2ab5128e 100644
--- a/fs/erofs/zutil.c
+++ b/fs/erofs/zutil.c
@@ -87,8 +87,8 @@ int z_erofs_gbuf_growsize(unsigned int nrpages)
tmp_pages[j] = gbuf->pages[j];
do {
last = j;
- j = alloc_pages_bulk_array(GFP_KERNEL, nrpages,
- tmp_pages);
+ j = alloc_pages_bulk(GFP_KERNEL, nrpages,
+ tmp_pages);
if (last == j)
goto out;
} while (j != nrpages);
@@ -230,9 +230,10 @@ void erofs_shrinker_unregister(struct super_block *sb)
struct erofs_sb_info *const sbi = EROFS_SB(sb);
mutex_lock(&sbi->umount_mutex);
- /* clean up all remaining pclusters in memory */
- z_erofs_shrink_scan(sbi, ~0UL);
-
+ while (!xa_empty(&sbi->managed_pslots)) {
+ z_erofs_shrink_scan(sbi, ~0UL);
+ cond_resched();
+ }
spin_lock(&erofs_sb_list_lock);
list_del(&sbi->list);
spin_unlock(&erofs_sb_list_lock);
@@ -242,7 +243,7 @@ void erofs_shrinker_unregister(struct super_block *sb)
static unsigned long erofs_shrink_count(struct shrinker *shrink,
struct shrink_control *sc)
{
- return atomic_long_read(&erofs_global_shrink_cnt);
+ return atomic_long_read(&erofs_global_shrink_cnt) ?: SHRINK_EMPTY;
}
static unsigned long erofs_shrink_scan(struct shrinker *shrink,