summaryrefslogtreecommitdiff
path: root/mm/readahead.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/readahead.c')
-rw-r--r--mm/readahead.c108
1 files changed, 100 insertions, 8 deletions
diff --git a/mm/readahead.c b/mm/readahead.c
index f61943fd1741..21e5f9161cf2 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -262,7 +262,7 @@ static void read_pages(struct readahead_control *rac, struct list_head *pages,
blk_finish_plug(&plug);
- BUG_ON(!list_empty(pages));
+ BUG_ON(pages && !list_empty(pages));
BUG_ON(readahead_count(rac));
out:
@@ -361,7 +361,7 @@ EXPORT_SYMBOL_GPL(page_cache_ra_unbounded);
* behaviour which would occur if page allocations are causing VM writeback.
* We really don't want to intermingle reads and writes like that.
*/
-void do_page_cache_ra(struct readahead_control *ractl,
+static void do_page_cache_ra(struct readahead_control *ractl,
unsigned long nr_to_read, unsigned long lookahead_size)
{
struct inode *inode = ractl->mapping->host;
@@ -546,10 +546,102 @@ static int try_context_readahead(struct address_space *mapping,
}
/*
+ * There are some parts of the kernel which assume that PMD entries
+ * are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then,
+ * limit the maximum allocation order to PMD size. I'm not aware of any
+ * assumptions about maximum order if THP are disabled, but 8 seems like
+ * a good order (that's 1MB if you're using 4kB pages)
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
+#else
+#define MAX_PAGECACHE_ORDER 8
+#endif
+
+static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index,
+ pgoff_t mark, unsigned int order, gfp_t gfp)
+{
+ int err;
+ struct folio *folio = filemap_alloc_folio(gfp, order);
+
+ if (!folio)
+ return -ENOMEM;
+ if (mark - index < (1UL << order))
+ folio_set_readahead(folio);
+ err = filemap_add_folio(ractl->mapping, folio, index, gfp);
+ if (err)
+ folio_put(folio);
+ else
+ ractl->_nr_pages += 1UL << order;
+ return err;
+}
+
+void page_cache_ra_order(struct readahead_control *ractl,
+ struct file_ra_state *ra, unsigned int new_order)
+{
+ struct address_space *mapping = ractl->mapping;
+ pgoff_t index = readahead_index(ractl);
+ pgoff_t limit = (i_size_read(mapping->host) - 1) >> PAGE_SHIFT;
+ pgoff_t mark = index + ra->size - ra->async_size;
+ int err = 0;
+ gfp_t gfp = readahead_gfp_mask(mapping);
+
+ if (!mapping_large_folio_support(mapping) || ra->size < 4)
+ goto fallback;
+
+ limit = min(limit, index + ra->size - 1);
+
+ if (new_order < MAX_PAGECACHE_ORDER) {
+ new_order += 2;
+ if (new_order > MAX_PAGECACHE_ORDER)
+ new_order = MAX_PAGECACHE_ORDER;
+ while ((1 << new_order) > ra->size)
+ new_order--;
+ }
+
+ while (index <= limit) {
+ unsigned int order = new_order;
+
+ /* Align with smaller pages if needed */
+ if (index & ((1UL << order) - 1)) {
+ order = __ffs(index);
+ if (order == 1)
+ order = 0;
+ }
+ /* Don't allocate pages past EOF */
+ while (index + (1UL << order) - 1 > limit) {
+ if (--order == 1)
+ order = 0;
+ }
+ err = ra_alloc_folio(ractl, index, mark, order, gfp);
+ if (err)
+ break;
+ index += 1UL << order;
+ }
+
+ if (index > limit) {
+ ra->size += index - limit - 1;
+ ra->async_size += index - limit - 1;
+ }
+
+ read_pages(ractl, NULL, false);
+
+ /*
+ * If there were already pages in the page cache, then we may have
+ * left some gaps. Let the regular readahead code take care of this
+ * situation.
+ */
+ if (!err)
+ return;
+fallback:
+ do_page_cache_ra(ractl, ra->size, ra->async_size);
+}
+
+/*
* A minimal readahead algorithm for trivial sequential/random reads.
*/
static void ondemand_readahead(struct readahead_control *ractl,
- bool hit_readahead_marker, unsigned long req_size)
+ struct folio *folio, unsigned long req_size)
{
struct backing_dev_info *bdi = inode_to_bdi(ractl->mapping->host);
struct file_ra_state *ra = ractl->ra;
@@ -584,12 +676,12 @@ static void ondemand_readahead(struct readahead_control *ractl,
}
/*
- * Hit a marked page without valid readahead state.
+ * Hit a marked folio without valid readahead state.
* E.g. interleaved reads.
* Query the pagecache for async_size, which normally equals to
* readahead size. Ramp it up and use it as the new readahead size.
*/
- if (hit_readahead_marker) {
+ if (folio) {
pgoff_t start;
rcu_read_lock();
@@ -662,7 +754,7 @@ readit:
}
ractl->_index = ra->start;
- do_page_cache_ra(ractl, ra->size, ra->async_size);
+ page_cache_ra_order(ractl, ra, folio ? folio_order(folio) : 0);
}
void page_cache_sync_ra(struct readahead_control *ractl,
@@ -690,7 +782,7 @@ void page_cache_sync_ra(struct readahead_control *ractl,
}
/* do read-ahead */
- ondemand_readahead(ractl, false, req_count);
+ ondemand_readahead(ractl, NULL, req_count);
}
EXPORT_SYMBOL_GPL(page_cache_sync_ra);
@@ -713,7 +805,7 @@ void page_cache_async_ra(struct readahead_control *ractl,
return;
/* do read-ahead */
- ondemand_readahead(ractl, true, req_count);
+ ondemand_readahead(ractl, folio, req_count);
}
EXPORT_SYMBOL_GPL(page_cache_async_ra);