summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/iomap/buffered-io.c4
-rw-r--r--fs/iomap/direct-io.c45
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c15
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c5
-rw-r--r--fs/xfs/libxfs/xfs_shared.h3
-rw-r--r--fs/xfs/xfs_icache.c6
-rw-r--r--fs/xfs/xfs_iops.c2
-rw-r--r--fs/xfs/xfs_mount.c8
-rw-r--r--fs/xfs/xfs_super.c28
-rw-r--r--include/linux/huge_mm.h28
-rw-r--r--include/linux/pagemap.h123
-rw-r--r--mm/filemap.c36
-rw-r--r--mm/huge_memory.c65
-rw-r--r--mm/readahead.c83
14 files changed, 366 insertions, 85 deletions
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index f420c53d86ac..d745f718bcde 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -2007,10 +2007,10 @@ iomap_writepages(struct address_space *mapping, struct writeback_control *wbc,
}
EXPORT_SYMBOL_GPL(iomap_writepages);
-static int __init iomap_init(void)
+static int __init iomap_buffered_init(void)
{
return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
offsetof(struct iomap_ioend, io_bio),
BIOSET_NEED_BVECS);
}
-fs_initcall(iomap_init);
+fs_initcall(iomap_buffered_init);
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index f3b43d223a46..c02b266bba52 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -11,6 +11,7 @@
#include <linux/iomap.h>
#include <linux/backing-dev.h>
#include <linux/uio.h>
+#include <linux/set_memory.h>
#include <linux/task_io_accounting_ops.h>
#include "trace.h"
@@ -27,6 +28,13 @@
#define IOMAP_DIO_WRITE (1U << 30)
#define IOMAP_DIO_DIRTY (1U << 31)
+/*
+ * Used for sub block zeroing in iomap_dio_zero()
+ */
+#define IOMAP_ZERO_PAGE_SIZE (SZ_64K)
+#define IOMAP_ZERO_PAGE_ORDER (get_order(IOMAP_ZERO_PAGE_SIZE))
+static struct page *zero_page;
+
struct iomap_dio {
struct kiocb *iocb;
const struct iomap_dio_ops *dops;
@@ -232,13 +240,20 @@ release_bio:
}
EXPORT_SYMBOL_GPL(iomap_dio_bio_end_io);
-static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
+static int iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
loff_t pos, unsigned len)
{
struct inode *inode = file_inode(dio->iocb->ki_filp);
- struct page *page = ZERO_PAGE(0);
struct bio *bio;
+ if (!len)
+ return 0;
+ /*
+ * Max block size supported is 64k
+ */
+ if (WARN_ON_ONCE(len > IOMAP_ZERO_PAGE_SIZE))
+ return -EINVAL;
+
bio = iomap_dio_alloc_bio(iter, dio, 1, REQ_OP_WRITE | REQ_SYNC | REQ_IDLE);
fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits,
GFP_KERNEL);
@@ -246,8 +261,9 @@ static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
bio->bi_private = dio;
bio->bi_end_io = iomap_dio_bio_end_io;
- __bio_add_page(bio, page, len, 0);
+ __bio_add_page(bio, zero_page, len, 0);
iomap_dio_submit_bio(iter, dio, bio, pos);
+ return 0;
}
/*
@@ -356,8 +372,10 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
if (need_zeroout) {
/* zero out from the start of the block to the write offset */
pad = pos & (fs_block_size - 1);
- if (pad)
- iomap_dio_zero(iter, dio, pos - pad, pad);
+
+ ret = iomap_dio_zero(iter, dio, pos - pad, pad);
+ if (ret)
+ goto out;
}
/*
@@ -431,7 +449,8 @@ zero_tail:
/* zero out from the end of the write to the end of the block */
pad = pos & (fs_block_size - 1);
if (pad)
- iomap_dio_zero(iter, dio, pos, fs_block_size - pad);
+ ret = iomap_dio_zero(iter, dio, pos,
+ fs_block_size - pad);
}
out:
/* Undo iter limitation to current extent */
@@ -753,3 +772,17 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
return iomap_dio_complete(dio);
}
EXPORT_SYMBOL_GPL(iomap_dio_rw);
+
+static int __init iomap_dio_init(void)
+{
+ zero_page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
+ IOMAP_ZERO_PAGE_ORDER);
+
+ if (!zero_page)
+ return -ENOMEM;
+
+ set_memory_ro((unsigned long)page_address(zero_page),
+ 1U << IOMAP_ZERO_PAGE_ORDER);
+ return 0;
+}
+fs_initcall(iomap_dio_init);
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index b9e98950eb3d..09f4cb061a6e 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -1138,10 +1138,7 @@ xfs_attr3_leaf_to_shortform(
trace_xfs_attr_leaf_to_sf(args);
- tmpbuffer = kmalloc(args->geo->blksize, GFP_KERNEL | __GFP_NOFAIL);
- if (!tmpbuffer)
- return -ENOMEM;
-
+ tmpbuffer = kvmalloc(args->geo->blksize, GFP_KERNEL | __GFP_NOFAIL);
memcpy(tmpbuffer, bp->b_addr, args->geo->blksize);
leaf = (xfs_attr_leafblock_t *)tmpbuffer;
@@ -1205,7 +1202,7 @@ xfs_attr3_leaf_to_shortform(
error = 0;
out:
- kfree(tmpbuffer);
+ kvfree(tmpbuffer);
return error;
}
@@ -1613,7 +1610,7 @@ xfs_attr3_leaf_compact(
trace_xfs_attr_leaf_compact(args);
- tmpbuffer = kmalloc(args->geo->blksize, GFP_KERNEL | __GFP_NOFAIL);
+ tmpbuffer = kvmalloc(args->geo->blksize, GFP_KERNEL | __GFP_NOFAIL);
memcpy(tmpbuffer, bp->b_addr, args->geo->blksize);
memset(bp->b_addr, 0, args->geo->blksize);
leaf_src = (xfs_attr_leafblock_t *)tmpbuffer;
@@ -1651,7 +1648,7 @@ xfs_attr3_leaf_compact(
*/
xfs_trans_log_buf(trans, bp, 0, args->geo->blksize - 1);
- kfree(tmpbuffer);
+ kvfree(tmpbuffer);
}
/*
@@ -2330,7 +2327,7 @@ xfs_attr3_leaf_unbalance(
struct xfs_attr_leafblock *tmp_leaf;
struct xfs_attr3_icleaf_hdr tmphdr;
- tmp_leaf = kzalloc(state->args->geo->blksize,
+ tmp_leaf = kvzalloc(state->args->geo->blksize,
GFP_KERNEL | __GFP_NOFAIL);
/*
@@ -2371,7 +2368,7 @@ xfs_attr3_leaf_unbalance(
}
memcpy(save_leaf, tmp_leaf, state->args->geo->blksize);
savehdr = tmphdr; /* struct copy */
- kfree(tmp_leaf);
+ kvfree(tmp_leaf);
}
xfs_attr3_leaf_hdr_to_disk(state->args->geo, save_leaf, &savehdr);
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 0af5b7a33d05..1921b689888b 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -3033,6 +3033,11 @@ xfs_ialloc_setup_geometry(
igeo->ialloc_align = mp->m_dalign;
else
igeo->ialloc_align = 0;
+
+ if (mp->m_sb.sb_blocksize > PAGE_SIZE)
+ igeo->min_folio_order = mp->m_sb.sb_blocklog - PAGE_SHIFT;
+ else
+ igeo->min_folio_order = 0;
}
/* Compute the location of the root directory inode that is laid out by mkfs. */
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 2f7413afbf46..33b84a3a83ff 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -224,6 +224,9 @@ struct xfs_ino_geometry {
/* precomputed value for di_flags2 */
uint64_t new_diflags2;
+ /* minimum folio order of a page cache allocation */
+ unsigned int min_folio_order;
+
};
#endif /* __XFS_SHARED_H__ */
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index cf629302d48e..0fcf235e5023 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -88,7 +88,8 @@ xfs_inode_alloc(
/* VFS doesn't initialise i_mode! */
VFS_I(ip)->i_mode = 0;
- mapping_set_large_folios(VFS_I(ip)->i_mapping);
+ mapping_set_folio_min_order(VFS_I(ip)->i_mapping,
+ M_IGEO(mp)->min_folio_order);
XFS_STATS_INC(mp, vn_active);
ASSERT(atomic_read(&ip->i_pincount) == 0);
@@ -325,7 +326,8 @@ xfs_reinit_inode(
inode->i_uid = uid;
inode->i_gid = gid;
inode->i_state = state;
- mapping_set_large_folios(inode->i_mapping);
+ mapping_set_folio_min_order(inode->i_mapping,
+ M_IGEO(mp)->min_folio_order);
return error;
}
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 1cdc8034f54d..6483b4c4cf35 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -567,7 +567,7 @@ xfs_stat_blksize(
return 1U << mp->m_allocsize_log;
}
- return PAGE_SIZE;
+ return max_t(uint32_t, PAGE_SIZE, mp->m_sb.sb_blocksize);
}
STATIC int
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 09eef1721ef4..c6933440f806 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -132,11 +132,15 @@ xfs_sb_validate_fsb_count(
xfs_sb_t *sbp,
uint64_t nblocks)
{
- ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
+ uint64_t max_bytes;
+
ASSERT(sbp->sb_blocklog >= BBSHIFT);
+ if (check_shl_overflow(nblocks, sbp->sb_blocklog, &max_bytes))
+ return -EFBIG;
+
/* Limited by ULONG_MAX of page cache index */
- if (nblocks >> (PAGE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
+ if (max_bytes >> PAGE_SHIFT > ULONG_MAX)
return -EFBIG;
return 0;
}
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 27e9f749c4c7..e8cc7900911e 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1638,16 +1638,28 @@ xfs_fs_fill_super(
goto out_free_sb;
}
- /*
- * Until this is fixed only page-sized or smaller data blocks work.
- */
if (mp->m_sb.sb_blocksize > PAGE_SIZE) {
- xfs_warn(mp,
- "File system with blocksize %d bytes. "
- "Only pagesize (%ld) or less will currently work.",
+ size_t max_folio_size = mapping_max_folio_size_supported();
+
+ if (!xfs_has_crc(mp)) {
+ xfs_warn(mp,
+"V4 Filesystem with blocksize %d bytes. Only pagesize (%ld) or less is supported.",
mp->m_sb.sb_blocksize, PAGE_SIZE);
- error = -ENOSYS;
- goto out_free_sb;
+ error = -ENOSYS;
+ goto out_free_sb;
+ }
+
+ if (mp->m_sb.sb_blocksize > max_folio_size) {
+ xfs_warn(mp,
+"block size (%u bytes) not supported; Only block size (%zu) or less is supported",
+ mp->m_sb.sb_blocksize, max_folio_size);
+ error = -ENOSYS;
+ goto out_free_sb;
+ }
+
+ xfs_warn(mp,
+"EXPERIMENTAL: V5 Filesystem with Large Block Size (%d bytes) enabled.",
+ mp->m_sb.sb_blocksize);
}
/* Ensure this filesystem fits in the page cache limits */
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index e25d9ebfdf89..7a570e0437c9 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -96,6 +96,8 @@ extern struct kobj_attribute thpsize_shmem_enabled_attr;
#define thp_vma_allowable_order(vma, vm_flags, tva_flags, order) \
(!!thp_vma_allowable_orders(vma, vm_flags, tva_flags, BIT(order)))
+#define split_folio(f) split_folio_to_list(f, NULL)
+
#ifdef CONFIG_PGTABLE_HAS_HUGE_LEAVES
#define HPAGE_PMD_SHIFT PMD_SHIFT
#define HPAGE_PUD_SHIFT PUD_SHIFT
@@ -317,9 +319,24 @@ unsigned long thp_get_unmapped_area_vmflags(struct file *filp, unsigned long add
bool can_split_folio(struct folio *folio, int *pextra_pins);
int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
unsigned int new_order);
+int min_order_for_split(struct folio *folio);
+int split_folio_to_list(struct folio *folio, struct list_head *list);
static inline int split_huge_page(struct page *page)
{
- return split_huge_page_to_list_to_order(page, NULL, 0);
+ struct folio *folio = page_folio(page);
+ int ret = min_order_for_split(folio);
+
+ if (ret < 0)
+ return ret;
+
+ /*
+ * split_huge_page() locks the page before splitting and
+ * expects the same page that has been split to be locked when
+ * returned. split_folio(page_folio(page)) cannot be used here
+ * because it converts the page to folio and passes the head
+ * page to be split.
+ */
+ return split_huge_page_to_list_to_order(page, NULL, ret);
}
void deferred_split_folio(struct folio *folio);
@@ -484,6 +501,12 @@ static inline int split_huge_page(struct page *page)
{
return 0;
}
+
+static inline int split_folio_to_list(struct folio *folio, struct list_head *list)
+{
+ return 0;
+}
+
static inline void deferred_split_folio(struct folio *folio) {}
#define split_huge_pmd(__vma, __pmd, __address) \
do { } while (0)
@@ -598,7 +621,4 @@ static inline int split_folio_to_order(struct folio *folio, int new_order)
return split_folio_to_list_to_order(folio, NULL, new_order);
}
-#define split_folio_to_list(f, l) split_folio_to_list_to_order(f, l, 0)
-#define split_folio(f) split_folio_to_order(f, 0)
-
#endif /* _LINUX_HUGE_MM_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index d9c7edb6422b..55b254d951da 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -204,14 +204,21 @@ enum mapping_flags {
AS_EXITING = 4, /* final truncate in progress */
/* writeback related tags are not used */
AS_NO_WRITEBACK_TAGS = 5,
- AS_LARGE_FOLIO_SUPPORT = 6,
- AS_RELEASE_ALWAYS, /* Call ->release_folio(), even if no private data */
- AS_STABLE_WRITES, /* must wait for writeback before modifying
+ AS_RELEASE_ALWAYS = 6, /* Call ->release_folio(), even if no private data */
+ AS_STABLE_WRITES = 7, /* must wait for writeback before modifying
folio contents */
- AS_INACCESSIBLE, /* Do not attempt direct R/W access to the mapping,
- including to move the mapping */
+ AS_INACCESSIBLE = 8, /* Do not attempt direct R/W access to the mapping */
+ /* Bits 16-25 are used for FOLIO_ORDER */
+ AS_FOLIO_ORDER_BITS = 5,
+ AS_FOLIO_ORDER_MIN = 16,
+ AS_FOLIO_ORDER_MAX = AS_FOLIO_ORDER_MIN + AS_FOLIO_ORDER_BITS,
};
+#define AS_FOLIO_ORDER_BITS_MASK ((1u << AS_FOLIO_ORDER_BITS) - 1)
+#define AS_FOLIO_ORDER_MIN_MASK (AS_FOLIO_ORDER_BITS_MASK << AS_FOLIO_ORDER_MIN)
+#define AS_FOLIO_ORDER_MAX_MASK (AS_FOLIO_ORDER_BITS_MASK << AS_FOLIO_ORDER_MAX)
+#define AS_FOLIO_ORDER_MASK (AS_FOLIO_ORDER_MIN_MASK | AS_FOLIO_ORDER_MAX_MASK)
+
/**
* mapping_set_error - record a writeback error in the address_space
* @mapping: the mapping in which an error should be set
@@ -367,9 +374,64 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
#define MAX_XAS_ORDER (XA_CHUNK_SHIFT * 2 - 1)
#define MAX_PAGECACHE_ORDER min(MAX_XAS_ORDER, PREFERRED_MAX_PAGECACHE_ORDER)
+/*
+ * mapping_max_folio_size_supported() - Check the max folio size supported
+ *
+ * The filesystem should call this function at mount time if there is a
+ * requirement on the folio mapping size in the page cache.
+ */
+static inline size_t mapping_max_folio_size_supported(void)
+{
+ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+ return 1U << (PAGE_SHIFT + MAX_PAGECACHE_ORDER);
+ return PAGE_SIZE;
+}
+
+/*
+ * mapping_set_folio_order_range() - Set the orders supported by a file.
+ * @mapping: The address space of the file.
+ * @min: Minimum folio order (between 0-MAX_PAGECACHE_ORDER inclusive).
+ * @max: Maximum folio order (between @min-MAX_PAGECACHE_ORDER inclusive).
+ *
+ * The filesystem should call this function in its inode constructor to
+ * indicate which base size (min) and maximum size (max) of folio the VFS
+ * can use to cache the contents of the file. This should only be used
+ * if the filesystem needs special handling of folio sizes (ie there is
+ * something the core cannot know).
+ * Do not tune it based on, eg, i_size.
+ *
+ * Context: This should not be called while the inode is active as it
+ * is non-atomic.
+ */
+static inline void mapping_set_folio_order_range(struct address_space *mapping,
+ unsigned int min,
+ unsigned int max)
+{
+ if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+ return;
+
+ if (min > MAX_PAGECACHE_ORDER)
+ min = MAX_PAGECACHE_ORDER;
+
+ if (max > MAX_PAGECACHE_ORDER)
+ max = MAX_PAGECACHE_ORDER;
+
+ if (max < min)
+ max = min;
+
+ mapping->flags = (mapping->flags & ~AS_FOLIO_ORDER_MASK) |
+ (min << AS_FOLIO_ORDER_MIN) | (max << AS_FOLIO_ORDER_MAX);
+}
+
+static inline void mapping_set_folio_min_order(struct address_space *mapping,
+ unsigned int min)
+{
+ mapping_set_folio_order_range(mapping, min, MAX_PAGECACHE_ORDER);
+}
+
/**
* mapping_set_large_folios() - Indicate the file supports large folios.
- * @mapping: The file.
+ * @mapping: The address space of the file.
*
* The filesystem should call this function in its inode constructor to
* indicate that the VFS can use large folios to cache the contents of
@@ -380,7 +442,43 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
*/
static inline void mapping_set_large_folios(struct address_space *mapping)
{
- __set_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
+ mapping_set_folio_order_range(mapping, 0, MAX_PAGECACHE_ORDER);
+}
+
+static inline unsigned int
+mapping_max_folio_order(const struct address_space *mapping)
+{
+ if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+ return 0;
+ return (mapping->flags & AS_FOLIO_ORDER_MAX_MASK) >> AS_FOLIO_ORDER_MAX;
+}
+
+static inline unsigned int
+mapping_min_folio_order(const struct address_space *mapping)
+{
+ if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+ return 0;
+ return (mapping->flags & AS_FOLIO_ORDER_MIN_MASK) >> AS_FOLIO_ORDER_MIN;
+}
+
+static inline unsigned long
+mapping_min_folio_nrpages(struct address_space *mapping)
+{
+ return 1UL << mapping_min_folio_order(mapping);
+}
+
+/**
+ * mapping_align_index() - Align index for this mapping.
+ * @mapping: The address_space.
+ *
+ * The index of a folio must be naturally aligned. If you are adding a
+ * new folio to the page cache and need to know what index to give it,
+ * call this function.
+ */
+static inline pgoff_t mapping_align_index(struct address_space *mapping,
+ pgoff_t index)
+{
+ return round_down(index, mapping_min_folio_nrpages(mapping));
}
/*
@@ -389,20 +487,17 @@ static inline void mapping_set_large_folios(struct address_space *mapping)
*/
static inline bool mapping_large_folio_support(struct address_space *mapping)
{
- /* AS_LARGE_FOLIO_SUPPORT is only reasonable for pagecache folios */
+ /* AS_FOLIO_ORDER is only reasonable for pagecache folios */
VM_WARN_ONCE((unsigned long)mapping & PAGE_MAPPING_ANON,
"Anonymous mapping always supports large folio");
- return IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
- test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
+ return mapping_max_folio_order(mapping) > 0;
}
/* Return the maximum folio size for this pagecache mapping, in bytes. */
-static inline size_t mapping_max_folio_size(struct address_space *mapping)
+static inline size_t mapping_max_folio_size(const struct address_space *mapping)
{
- if (mapping_large_folio_support(mapping))
- return PAGE_SIZE << MAX_PAGECACHE_ORDER;
- return PAGE_SIZE;
+ return PAGE_SIZE << mapping_max_folio_order(mapping);
}
static inline int filemap_nr_thps(struct address_space *mapping)
diff --git a/mm/filemap.c b/mm/filemap.c
index d62150418b91..d32210927453 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -859,6 +859,8 @@ noinline int __filemap_add_folio(struct address_space *mapping,
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
+ VM_BUG_ON_FOLIO(folio_order(folio) < mapping_min_folio_order(mapping),
+ folio);
mapping_set_update(&xas, mapping);
VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
@@ -1919,8 +1921,10 @@ repeat:
folio_wait_stable(folio);
no_page:
if (!folio && (fgp_flags & FGP_CREAT)) {
- unsigned order = FGF_GET_ORDER(fgp_flags);
+ unsigned int min_order = mapping_min_folio_order(mapping);
+ unsigned int order = max(min_order, FGF_GET_ORDER(fgp_flags));
int err;
+ index = mapping_align_index(mapping, index);
if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping))
gfp |= __GFP_WRITE;
@@ -1933,10 +1937,8 @@ no_page:
if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP))))
fgp_flags |= FGP_LOCK;
- if (!mapping_large_folio_support(mapping))
- order = 0;
- if (order > MAX_PAGECACHE_ORDER)
- order = MAX_PAGECACHE_ORDER;
+ if (order > mapping_max_folio_order(mapping))
+ order = mapping_max_folio_order(mapping);
/* If we're not aligned, allocate a smaller folio */
if (index & ((1UL << order) - 1))
order = __ffs(index);
@@ -1945,7 +1947,7 @@ no_page:
gfp_t alloc_gfp = gfp;
err = -ENOMEM;
- if (order > 0)
+ if (order > min_order)
alloc_gfp |= __GFP_NORETRY | __GFP_NOWARN;
folio = filemap_alloc_folio(alloc_gfp, order);
if (!folio)
@@ -1960,7 +1962,7 @@ no_page:
break;
folio_put(folio);
folio = NULL;
- } while (order-- > 0);
+ } while (order-- > min_order);
if (err == -EEXIST)
goto repeat;
@@ -2449,13 +2451,15 @@ unlock_mapping:
}
static int filemap_create_folio(struct file *file,
- struct address_space *mapping, pgoff_t index,
+ struct address_space *mapping, loff_t pos,
struct folio_batch *fbatch)
{
struct folio *folio;
int error;
+ unsigned int min_order = mapping_min_folio_order(mapping);
+ pgoff_t index;
- folio = filemap_alloc_folio(mapping_gfp_mask(mapping), 0);
+ folio = filemap_alloc_folio(mapping_gfp_mask(mapping), min_order);
if (!folio)
return -ENOMEM;
@@ -2473,6 +2477,7 @@ static int filemap_create_folio(struct file *file,
* well to keep locking rules simple.
*/
filemap_invalidate_lock_shared(mapping);
+ index = (pos >> (PAGE_SHIFT + min_order)) << min_order;
error = filemap_add_folio(mapping, folio, index,
mapping_gfp_constraint(mapping, GFP_KERNEL));
if (error == -EEXIST)
@@ -2533,8 +2538,7 @@ retry:
if (!folio_batch_count(fbatch)) {
if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ))
return -EAGAIN;
- err = filemap_create_folio(filp, mapping,
- iocb->ki_pos >> PAGE_SHIFT, fbatch);
+ err = filemap_create_folio(filp, mapping, iocb->ki_pos, fbatch);
if (err == AOP_TRUNCATED_PAGE)
goto retry;
return err;
@@ -3604,7 +3608,7 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
struct vm_area_struct *vma = vmf->vma;
struct file *file = vma->vm_file;
struct address_space *mapping = file->f_mapping;
- pgoff_t last_pgoff = start_pgoff;
+ pgoff_t file_end, last_pgoff = start_pgoff;
unsigned long addr;
XA_STATE(xas, &mapping->i_pages, start_pgoff);
struct folio *folio;
@@ -3630,6 +3634,10 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
goto out;
}
+ file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1;
+ if (end_pgoff > file_end)
+ end_pgoff = file_end;
+
folio_type = mm_counter_file(folio);
do {
unsigned long end;
@@ -3750,9 +3758,11 @@ static struct folio *do_read_cache_folio(struct address_space *mapping,
repeat:
folio = filemap_get_folio(mapping, index);
if (IS_ERR(folio)) {
- folio = filemap_alloc_folio(gfp, 0);
+ folio = filemap_alloc_folio(gfp,
+ mapping_min_folio_order(mapping));
if (!folio)
return ERR_PTR(-ENOMEM);
+ index = mapping_align_index(mapping, index);
err = filemap_add_folio(mapping, folio, index, gfp);
if (unlikely(err)) {
folio_put(folio);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f4be468e06a4..9931ff1d9a9d 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3082,6 +3082,9 @@ bool can_split_folio(struct folio *folio, int *pextra_pins)
* released, or if some unexpected race happened (e.g., anon VMA disappeared,
* truncation).
*
+ * Callers should ensure that the order respects the address space mapping
+ * min-order if one is set for non-anonymous folios.
+ *
* Returns -EINVAL when trying to split to an order that is incompatible
* with the folio. Splitting to order 0 is compatible with all folios.
*/
@@ -3163,6 +3166,7 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
mapping = NULL;
anon_vma_lock_write(anon_vma);
} else {
+ unsigned int min_order;
gfp_t gfp;
mapping = folio->mapping;
@@ -3173,6 +3177,14 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
goto out;
}
+ min_order = mapping_min_folio_order(folio->mapping);
+ if (new_order < min_order) {
+ VM_WARN_ONCE(1, "Cannot split mapped folio below min-order: %u",
+ min_order);
+ ret = -EINVAL;
+ goto out;
+ }
+
gfp = current_gfp_context(mapping_gfp_mask(mapping) &
GFP_RECLAIM_MASK);
@@ -3285,6 +3297,30 @@ out:
return ret;
}
+int min_order_for_split(struct folio *folio)
+{
+ if (folio_test_anon(folio))
+ return 0;
+
+ if (!folio->mapping) {
+ if (folio_test_pmd_mappable(folio))
+ count_vm_event(THP_SPLIT_PAGE_FAILED);
+ return -EBUSY;
+ }
+
+ return mapping_min_folio_order(folio->mapping);
+}
+
+int split_folio_to_list(struct folio *folio, struct list_head *list)
+{
+ int ret = min_order_for_split(folio);
+
+ if (ret < 0)
+ return ret;
+
+ return split_huge_page_to_list_to_order(&folio->page, list, ret);
+}
+
void __folio_undo_large_rmappable(struct folio *folio)
{
struct deferred_split *ds_queue;
@@ -3515,6 +3551,8 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
struct vm_area_struct *vma = vma_lookup(mm, addr);
struct page *page;
struct folio *folio;
+ struct address_space *mapping;
+ unsigned int target_order = new_order;
if (!vma)
break;
@@ -3535,7 +3573,13 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
if (!is_transparent_hugepage(folio))
goto next;
- if (new_order >= folio_order(folio))
+ if (!folio_test_anon(folio)) {
+ mapping = folio->mapping;
+ target_order = max(new_order,
+ mapping_min_folio_order(mapping));
+ }
+
+ if (target_order >= folio_order(folio))
goto next;
total++;
@@ -3551,9 +3595,14 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
if (!folio_trylock(folio))
goto next;
- if (!split_folio_to_order(folio, new_order))
+ if (!folio_test_anon(folio) && folio->mapping != mapping)
+ goto unlock;
+
+ if (!split_folio_to_order(folio, target_order))
split++;
+unlock:
+
folio_unlock(folio);
next:
folio_put(folio);
@@ -3578,6 +3627,8 @@ static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
pgoff_t index;
int nr_pages = 1;
unsigned long total = 0, split = 0;
+ unsigned int min_order;
+ unsigned int target_order;
file = getname_kernel(file_path);
if (IS_ERR(file))
@@ -3591,6 +3642,8 @@ static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
file_path, off_start, off_end);
mapping = candidate->f_mapping;
+ min_order = mapping_min_folio_order(mapping);
+ target_order = max(new_order, min_order);
for (index = off_start; index < off_end; index += nr_pages) {
struct folio *folio = filemap_get_folio(mapping, index);
@@ -3605,15 +3658,19 @@ static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
total++;
nr_pages = folio_nr_pages(folio);
- if (new_order >= folio_order(folio))
+ if (target_order >= folio_order(folio))
goto next;
if (!folio_trylock(folio))
goto next;
- if (!split_folio_to_order(folio, new_order))
+ if (folio->mapping != mapping)
+ goto unlock;
+
+ if (!split_folio_to_order(folio, target_order))
split++;
+unlock:
folio_unlock(folio);
next:
folio_put(folio);
diff --git a/mm/readahead.c b/mm/readahead.c
index 517c0be7ce66..2078c42777a6 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -206,9 +206,10 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
unsigned long nr_to_read, unsigned long lookahead_size)
{
struct address_space *mapping = ractl->mapping;
- unsigned long index = readahead_index(ractl);
+ unsigned long ra_folio_index, index = readahead_index(ractl);
gfp_t gfp_mask = readahead_gfp_mask(mapping);
- unsigned long i;
+ unsigned long mark, i = 0;
+ unsigned int min_nrpages = mapping_min_folio_nrpages(mapping);
/*
* Partway through the readahead operation, we will have added
@@ -223,10 +224,24 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
unsigned int nofs = memalloc_nofs_save();
filemap_invalidate_lock_shared(mapping);
+ index = mapping_align_index(mapping, index);
+
+ /*
+ * As iterator `i` is aligned to min_nrpages, round_up the
+ * difference between nr_to_read and lookahead_size to mark the
+ * index that only has lookahead or "async_region" to set the
+ * readahead flag.
+ */
+ ra_folio_index = round_up(readahead_index(ractl) + nr_to_read - lookahead_size,
+ min_nrpages);
+ mark = ra_folio_index - index;
+ nr_to_read += readahead_index(ractl) - index;
+ ractl->_index = index;
+
/*
* Preallocate as many pages as we will need.
*/
- for (i = 0; i < nr_to_read; i++) {
+ while (i < nr_to_read) {
struct folio *folio = xa_load(&mapping->i_pages, index + i);
int ret;
@@ -240,12 +255,13 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
* not worth getting one just for that.
*/
read_pages(ractl);
- ractl->_index++;
- i = ractl->_index + ractl->_nr_pages - index - 1;
+ ractl->_index += min_nrpages;
+ i = ractl->_index + ractl->_nr_pages - index;
continue;
}
- folio = filemap_alloc_folio(gfp_mask, 0);
+ folio = filemap_alloc_folio(gfp_mask,
+ mapping_min_folio_order(mapping));
if (!folio)
break;
@@ -255,14 +271,15 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
if (ret == -ENOMEM)
break;
read_pages(ractl);
- ractl->_index++;
- i = ractl->_index + ractl->_nr_pages - index - 1;
+ ractl->_index += min_nrpages;
+ i = ractl->_index + ractl->_nr_pages - index;
continue;
}
- if (i == nr_to_read - lookahead_size)
+ if (i == mark)
folio_set_readahead(folio);
ractl->_workingset |= folio_test_workingset(folio);
- ractl->_nr_pages++;
+ ractl->_nr_pages += min_nrpages;
+ i += min_nrpages;
}
/*
@@ -438,26 +455,41 @@ void page_cache_ra_order(struct readahead_control *ractl,
struct address_space *mapping = ractl->mapping;
pgoff_t start = readahead_index(ractl);
pgoff_t index = start;
+ unsigned int min_order = mapping_min_folio_order(mapping);
pgoff_t limit = (i_size_read(mapping->host) - 1) >> PAGE_SHIFT;
pgoff_t mark = index + ra->size - ra->async_size;
unsigned int nofs;
int err = 0;
gfp_t gfp = readahead_gfp_mask(mapping);
+ unsigned int min_ra_size = max(4, mapping_min_folio_nrpages(mapping));
- if (!mapping_large_folio_support(mapping) || ra->size < 4)
+ /*
+ * Fallback when size < min_nrpages as each folio should be
+ * at least min_nrpages anyway.
+ */
+ if (!mapping_large_folio_support(mapping) || ra->size < min_ra_size)
goto fallback;
limit = min(limit, index + ra->size - 1);
- if (new_order < MAX_PAGECACHE_ORDER)
+ if (new_order < mapping_max_folio_order(mapping))
new_order += 2;
- new_order = min_t(unsigned int, MAX_PAGECACHE_ORDER, new_order);
+ new_order = min(mapping_max_folio_order(mapping), new_order);
new_order = min_t(unsigned int, new_order, ilog2(ra->size));
+ new_order = max(new_order, min_order);
/* See comment in page_cache_ra_unbounded() */
nofs = memalloc_nofs_save();
filemap_invalidate_lock_shared(mapping);
+ /*
+ * If the new_order is greater than min_order and index is
+ * already aligned to new_order, then this will be noop as index
+ * aligned to new_order should also be aligned to min_order.
+ */
+ ractl->_index = mapping_align_index(mapping, index);
+ index = readahead_index(ractl);
+
while (index <= limit) {
unsigned int order = new_order;
@@ -465,7 +497,7 @@ void page_cache_ra_order(struct readahead_control *ractl,
if (index & ((1UL << order) - 1))
order = __ffs(index);
/* Don't allocate pages past EOF */
- while (index + (1UL << order) - 1 > limit)
+ while (order > min_order && index + (1UL << order) - 1 > limit)
order--;
err = ra_alloc_folio(ractl, index, mark, order, gfp);
if (err)
@@ -703,8 +735,15 @@ void readahead_expand(struct readahead_control *ractl,
struct file_ra_state *ra = ractl->ra;
pgoff_t new_index, new_nr_pages;
gfp_t gfp_mask = readahead_gfp_mask(mapping);
+ unsigned long min_nrpages = mapping_min_folio_nrpages(mapping);
+ unsigned int min_order = mapping_min_folio_order(mapping);
new_index = new_start / PAGE_SIZE;
+ /*
+ * Readahead code should have aligned the ractl->_index to
+ * min_nrpages before calling readahead aops.
+ */
+ VM_BUG_ON(!IS_ALIGNED(ractl->_index, min_nrpages));
/* Expand the leading edge downwards */
while (ractl->_index > new_index) {
@@ -714,9 +753,11 @@ void readahead_expand(struct readahead_control *ractl,
if (folio && !xa_is_value(folio))
return; /* Folio apparently present */
- folio = filemap_alloc_folio(gfp_mask, 0);
+ folio = filemap_alloc_folio(gfp_mask, min_order);
if (!folio)
return;
+
+ index = mapping_align_index(mapping, index);
if (filemap_add_folio(mapping, folio, index, gfp_mask) < 0) {
folio_put(folio);
return;
@@ -726,7 +767,7 @@ void readahead_expand(struct readahead_control *ractl,
ractl->_workingset = true;
psi_memstall_enter(&ractl->_pflags);
}
- ractl->_nr_pages++;
+ ractl->_nr_pages += min_nrpages;
ractl->_index = folio->index;
}
@@ -741,9 +782,11 @@ void readahead_expand(struct readahead_control *ractl,
if (folio && !xa_is_value(folio))
return; /* Folio apparently present */
- folio = filemap_alloc_folio(gfp_mask, 0);
+ folio = filemap_alloc_folio(gfp_mask, min_order);
if (!folio)
return;
+
+ index = mapping_align_index(mapping, index);
if (filemap_add_folio(mapping, folio, index, gfp_mask) < 0) {
folio_put(folio);
return;
@@ -753,10 +796,10 @@ void readahead_expand(struct readahead_control *ractl,
ractl->_workingset = true;
psi_memstall_enter(&ractl->_pflags);
}
- ractl->_nr_pages++;
+ ractl->_nr_pages += min_nrpages;
if (ra) {
- ra->size++;
- ra->async_size++;
+ ra->size += min_nrpages;
+ ra->async_size += min_nrpages;
}
}
}