summaryrefslogtreecommitdiff
path: root/include/linux/netfs.h
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2024-07-02 00:40:22 +0100
committerChristian Brauner <brauner@kernel.org>2024-09-12 12:20:41 +0200
commitee4cdf7ba857a894ad1650d6ab77669cbbfa329e (patch)
tree8258e3b756adf109085d66a8b63cd08db03abad0 /include/linux/netfs.h
parent2e45b922977c07bb339d76fd45e68f9b907fef7d (diff)
netfs: Speed up buffered reading
Improve the efficiency of buffered reads in a number of ways: (1) Overhaul the algorithm in general so that it's a lot more compact and split the read submission code between buffered and unbuffered versions. The unbuffered version can be vastly simplified. (2) Read-result collection is handed off to a work queue rather than being done in the I/O thread. Multiple subrequests can be processes simultaneously. (3) When a subrequest is collected, any folios it fully spans are collected and "spare" data on either side is donated to either the previous or the next subrequest in the sequence. Notes: (*) Readahead expansion is massively slows down fio, presumably because it causes a load of extra allocations, both folio and xarray, up front before RPC requests can be transmitted. (*) RDMA with cifs does appear to work, both with SIW and RXE. (*) PG_private_2-based reading and copy-to-cache is split out into its own file and altered to use folio_queue. Note that the copy to the cache now creates a new write transaction against the cache and adds the folios to be copied into it. This allows it to use part of the writeback I/O code. Signed-off-by: David Howells <dhowells@redhat.com> cc: Jeff Layton <jlayton@kernel.org> cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20240814203850.2240469-20-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner <brauner@kernel.org>
Diffstat (limited to 'include/linux/netfs.h')
-rw-r--r--include/linux/netfs.h26
1 files changed, 18 insertions, 8 deletions
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index bd0e3d147822..c0f0c9c87d86 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -178,36 +178,43 @@ struct netfs_io_subrequest {
unsigned long long start; /* Where to start the I/O */
size_t len; /* Size of the I/O */
size_t transferred; /* Amount of data transferred */
+ size_t consumed; /* Amount of read data consumed */
+ size_t prev_donated; /* Amount of data donated from previous subreq */
+ size_t next_donated; /* Amount of data donated from next subreq */
refcount_t ref;
short error; /* 0 or error that occurred */
unsigned short debug_index; /* Index in list (for debugging output) */
unsigned int nr_segs; /* Number of segs in io_iter */
enum netfs_io_source source; /* Where to read from/write to */
unsigned char stream_nr; /* I/O stream this belongs to */
+ unsigned char curr_folioq_slot; /* Folio currently being read */
+ unsigned char curr_folio_order; /* Order of folio */
+ struct folio_queue *curr_folioq; /* Queue segment in which current folio resides */
unsigned long flags;
#define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */
#define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */
-#define NETFS_SREQ_SHORT_IO 2 /* Set if the I/O was short */
#define NETFS_SREQ_SEEK_DATA_READ 3 /* Set if ->read() should SEEK_DATA first */
#define NETFS_SREQ_NO_PROGRESS 4 /* Set if we didn't manage to read any data */
#define NETFS_SREQ_ONDEMAND 5 /* Set if it's from on-demand read mode */
#define NETFS_SREQ_BOUNDARY 6 /* Set if ends on hard boundary (eg. ceph object) */
+#define NETFS_SREQ_HIT_EOF 7 /* Set if short due to EOF */
#define NETFS_SREQ_IN_PROGRESS 8 /* Unlocked when the subrequest completes */
#define NETFS_SREQ_NEED_RETRY 9 /* Set if the filesystem requests a retry */
#define NETFS_SREQ_RETRYING 10 /* Set if we're retrying */
#define NETFS_SREQ_FAILED 11 /* Set if the subreq failed unretryably */
-#define NETFS_SREQ_HIT_EOF 12 /* Set if we hit the EOF */
};
enum netfs_io_origin {
NETFS_READAHEAD, /* This read was triggered by readahead */
NETFS_READPAGE, /* This read is a synchronous read */
+ NETFS_READ_GAPS, /* This read is a synchronous read to fill gaps */
NETFS_READ_FOR_WRITE, /* This read is to prepare a write */
NETFS_DIO_READ, /* This is a direct I/O read */
NETFS_WRITEBACK, /* This write was triggered by writepages */
NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */
NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */
NETFS_DIO_WRITE, /* This is a direct I/O write */
+ NETFS_PGPRIV2_COPY_TO_CACHE, /* [DEPRECATED] This is writing read data to the cache */
nr__netfs_io_origin
} __mode(byte);
@@ -224,6 +231,7 @@ struct netfs_io_request {
struct address_space *mapping; /* The mapping being accessed */
struct kiocb *iocb; /* AIO completion vector */
struct netfs_cache_resources cache_resources;
+ struct readahead_control *ractl; /* Readahead descriptor */
struct list_head proc_link; /* Link in netfs_iorequests */
struct list_head subrequests; /* Contributory I/O operations */
struct netfs_io_stream io_streams[2]; /* Streams of parallel I/O operations */
@@ -244,12 +252,10 @@ struct netfs_io_request {
unsigned int nr_group_rel; /* Number of refs to release on ->group */
spinlock_t lock; /* Lock for queuing subreqs */
atomic_t nr_outstanding; /* Number of ops in progress */
- atomic_t nr_copy_ops; /* Number of copy-to-cache ops in progress */
- size_t upper_len; /* Length can be extended to here */
unsigned long long submitted; /* Amount submitted for I/O so far */
unsigned long long len; /* Length of the request */
size_t transferred; /* Amount to be indicated as transferred */
- short error; /* 0 or error that occurred */
+ long error; /* 0 or error that occurred */
enum netfs_io_origin origin; /* Origin of the request */
bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */
u8 buffer_head_slot; /* First slot in ->buffer */
@@ -260,9 +266,9 @@ struct netfs_io_request {
unsigned long long collected_to; /* Point we've collected to */
unsigned long long cleaned_to; /* Position we've cleaned folios to */
pgoff_t no_unlock_folio; /* Don't unlock this folio after read */
+ size_t prev_donated; /* Fallback for subreq->prev_donated */
refcount_t ref;
unsigned long flags;
-#define NETFS_RREQ_INCOMPLETE_IO 0 /* Some ioreqs terminated short or with error */
#define NETFS_RREQ_COPY_TO_CACHE 1 /* Need to write to the cache */
#define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */
#define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */
@@ -274,6 +280,7 @@ struct netfs_io_request {
#define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */
#define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */
#define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */
+#define NETFS_RREQ_NEED_RETRY 14 /* Need to try retrying */
#define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark
* write to cache on read */
const struct netfs_request_ops *netfs_ops;
@@ -292,7 +299,7 @@ struct netfs_request_ops {
/* Read request handling */
void (*expand_readahead)(struct netfs_io_request *rreq);
- bool (*clamp_length)(struct netfs_io_subrequest *subreq);
+ int (*prepare_read)(struct netfs_io_subrequest *subreq);
void (*issue_read)(struct netfs_io_subrequest *subreq);
bool (*is_still_valid)(struct netfs_io_request *rreq);
int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
@@ -422,7 +429,10 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp);
vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group);
/* (Sub)request management API. */
-void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool);
+void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq,
+ bool was_async);
+void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq,
+ int error, bool was_async);
void netfs_get_subrequest(struct netfs_io_subrequest *subreq,
enum netfs_sreq_ref_trace what);
void netfs_put_subrequest(struct netfs_io_subrequest *subreq,