diff options
author | David Howells <dhowells@redhat.com> | 2024-07-02 00:40:22 +0100 |
---|---|---|
committer | Christian Brauner <brauner@kernel.org> | 2024-09-12 12:20:41 +0200 |
commit | ee4cdf7ba857a894ad1650d6ab77669cbbfa329e (patch) | |
tree | 8258e3b756adf109085d66a8b63cd08db03abad0 /include/linux/netfs.h | |
parent | 2e45b922977c07bb339d76fd45e68f9b907fef7d (diff) |
netfs: Speed up buffered reading
Improve the efficiency of buffered reads in a number of ways:
(1) Overhaul the algorithm in general so that it's a lot more compact and
split the read submission code between buffered and unbuffered
versions. The unbuffered version can be vastly simplified.
(2) Read-result collection is handed off to a work queue rather than being
done in the I/O thread. Multiple subrequests can be processes
simultaneously.
(3) When a subrequest is collected, any folios it fully spans are
collected and "spare" data on either side is donated to either the
previous or the next subrequest in the sequence.
Notes:
(*) Readahead expansion is massively slows down fio, presumably because it
causes a load of extra allocations, both folio and xarray, up front
before RPC requests can be transmitted.
(*) RDMA with cifs does appear to work, both with SIW and RXE.
(*) PG_private_2-based reading and copy-to-cache is split out into its own
file and altered to use folio_queue. Note that the copy to the cache
now creates a new write transaction against the cache and adds the
folios to be copied into it. This allows it to use part of the
writeback I/O code.
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20240814203850.2240469-20-dhowells@redhat.com/ # v2
Signed-off-by: Christian Brauner <brauner@kernel.org>
Diffstat (limited to 'include/linux/netfs.h')
-rw-r--r-- | include/linux/netfs.h | 26 |
1 files changed, 18 insertions, 8 deletions
diff --git a/include/linux/netfs.h b/include/linux/netfs.h index bd0e3d147822..c0f0c9c87d86 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -178,36 +178,43 @@ struct netfs_io_subrequest { unsigned long long start; /* Where to start the I/O */ size_t len; /* Size of the I/O */ size_t transferred; /* Amount of data transferred */ + size_t consumed; /* Amount of read data consumed */ + size_t prev_donated; /* Amount of data donated from previous subreq */ + size_t next_donated; /* Amount of data donated from next subreq */ refcount_t ref; short error; /* 0 or error that occurred */ unsigned short debug_index; /* Index in list (for debugging output) */ unsigned int nr_segs; /* Number of segs in io_iter */ enum netfs_io_source source; /* Where to read from/write to */ unsigned char stream_nr; /* I/O stream this belongs to */ + unsigned char curr_folioq_slot; /* Folio currently being read */ + unsigned char curr_folio_order; /* Order of folio */ + struct folio_queue *curr_folioq; /* Queue segment in which current folio resides */ unsigned long flags; #define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */ #define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */ -#define NETFS_SREQ_SHORT_IO 2 /* Set if the I/O was short */ #define NETFS_SREQ_SEEK_DATA_READ 3 /* Set if ->read() should SEEK_DATA first */ #define NETFS_SREQ_NO_PROGRESS 4 /* Set if we didn't manage to read any data */ #define NETFS_SREQ_ONDEMAND 5 /* Set if it's from on-demand read mode */ #define NETFS_SREQ_BOUNDARY 6 /* Set if ends on hard boundary (eg. ceph object) */ +#define NETFS_SREQ_HIT_EOF 7 /* Set if short due to EOF */ #define NETFS_SREQ_IN_PROGRESS 8 /* Unlocked when the subrequest completes */ #define NETFS_SREQ_NEED_RETRY 9 /* Set if the filesystem requests a retry */ #define NETFS_SREQ_RETRYING 10 /* Set if we're retrying */ #define NETFS_SREQ_FAILED 11 /* Set if the subreq failed unretryably */ -#define NETFS_SREQ_HIT_EOF 12 /* Set if we hit the EOF */ }; enum netfs_io_origin { NETFS_READAHEAD, /* This read was triggered by readahead */ NETFS_READPAGE, /* This read is a synchronous read */ + NETFS_READ_GAPS, /* This read is a synchronous read to fill gaps */ NETFS_READ_FOR_WRITE, /* This read is to prepare a write */ NETFS_DIO_READ, /* This is a direct I/O read */ NETFS_WRITEBACK, /* This write was triggered by writepages */ NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */ NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */ NETFS_DIO_WRITE, /* This is a direct I/O write */ + NETFS_PGPRIV2_COPY_TO_CACHE, /* [DEPRECATED] This is writing read data to the cache */ nr__netfs_io_origin } __mode(byte); @@ -224,6 +231,7 @@ struct netfs_io_request { struct address_space *mapping; /* The mapping being accessed */ struct kiocb *iocb; /* AIO completion vector */ struct netfs_cache_resources cache_resources; + struct readahead_control *ractl; /* Readahead descriptor */ struct list_head proc_link; /* Link in netfs_iorequests */ struct list_head subrequests; /* Contributory I/O operations */ struct netfs_io_stream io_streams[2]; /* Streams of parallel I/O operations */ @@ -244,12 +252,10 @@ struct netfs_io_request { unsigned int nr_group_rel; /* Number of refs to release on ->group */ spinlock_t lock; /* Lock for queuing subreqs */ atomic_t nr_outstanding; /* Number of ops in progress */ - atomic_t nr_copy_ops; /* Number of copy-to-cache ops in progress */ - size_t upper_len; /* Length can be extended to here */ unsigned long long submitted; /* Amount submitted for I/O so far */ unsigned long long len; /* Length of the request */ size_t transferred; /* Amount to be indicated as transferred */ - short error; /* 0 or error that occurred */ + long error; /* 0 or error that occurred */ enum netfs_io_origin origin; /* Origin of the request */ bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ u8 buffer_head_slot; /* First slot in ->buffer */ @@ -260,9 +266,9 @@ struct netfs_io_request { unsigned long long collected_to; /* Point we've collected to */ unsigned long long cleaned_to; /* Position we've cleaned folios to */ pgoff_t no_unlock_folio; /* Don't unlock this folio after read */ + size_t prev_donated; /* Fallback for subreq->prev_donated */ refcount_t ref; unsigned long flags; -#define NETFS_RREQ_INCOMPLETE_IO 0 /* Some ioreqs terminated short or with error */ #define NETFS_RREQ_COPY_TO_CACHE 1 /* Need to write to the cache */ #define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */ #define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ @@ -274,6 +280,7 @@ struct netfs_io_request { #define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */ #define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */ #define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */ +#define NETFS_RREQ_NEED_RETRY 14 /* Need to try retrying */ #define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark * write to cache on read */ const struct netfs_request_ops *netfs_ops; @@ -292,7 +299,7 @@ struct netfs_request_ops { /* Read request handling */ void (*expand_readahead)(struct netfs_io_request *rreq); - bool (*clamp_length)(struct netfs_io_subrequest *subreq); + int (*prepare_read)(struct netfs_io_subrequest *subreq); void (*issue_read)(struct netfs_io_subrequest *subreq); bool (*is_still_valid)(struct netfs_io_request *rreq); int (*check_write_begin)(struct file *file, loff_t pos, unsigned len, @@ -422,7 +429,10 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp); vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group); /* (Sub)request management API. */ -void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool); +void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq, + bool was_async); +void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq, + int error, bool was_async); void netfs_get_subrequest(struct netfs_io_subrequest *subreq, enum netfs_sreq_ref_trace what); void netfs_put_subrequest(struct netfs_io_subrequest *subreq, |