diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-08-28 11:59:52 -0700 | 
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-08-28 11:59:52 -0700 | 
| commit | 6016fc9162245c5b109305841f76cca59c20a273 (patch) | |
| tree | 175dc76b2f3e5daa20e21f6ce35484beafe0d966 /include/linux/fs.h | |
| parent | dd2c0198a8365dcc3bb6aed22313d56088e3af55 (diff) | |
| parent | 377698d4abe2cd118dd866d5ef19e2f1aa6b9758 (diff) | |
Merge tag 'iomap-6.6-merge-3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull iomap updates from Darrick Wong:
 "We've got some big changes for this release -- I'm very happy to be
  landing willy's work to enable large folios for the page cache for
  general read and write IOs when the fs can make contiguous space
  allocations, and Ritesh's work to track sub-folio dirty state to
  eliminate the write amplification problems inherent in using large
  folios.
  As a bonus, io_uring can now process write completions in the caller's
  context instead of bouncing through a workqueue, which should reduce
  io latency dramatically. IOWs, XFS should see a nice performance bump
  for both IO paths.
  Summary:
   - Make large writes to the page cache fill sparse parts of the cache
     with large folios, then use large memcpy calls for the large folio.
   - Track the per-block dirty state of each large folio so that a
     buffered write to a single byte on a large folio does not result in
     a (potentially) multi-megabyte writeback IO.
   - Allow some directio completions to be performed in the initiating
     task's context instead of punting through a workqueue. This will
     reduce latency for some io_uring requests"
* tag 'iomap-6.6-merge-3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (26 commits)
  iomap: support IOCB_DIO_CALLER_COMP
  io_uring/rw: add write support for IOCB_DIO_CALLER_COMP
  fs: add IOCB flags related to passing back dio completions
  iomap: add IOMAP_DIO_INLINE_COMP
  iomap: only set iocb->private for polled bio
  iomap: treat a write through cache the same as FUA
  iomap: use an unsigned type for IOMAP_DIO_* defines
  iomap: cleanup up iomap_dio_bio_end_io()
  iomap: Add per-block dirty state tracking to improve performance
  iomap: Allocate ifs in ->write_begin() early
  iomap: Refactor iomap_write_delalloc_punch() function out
  iomap: Use iomap_punch_t typedef
  iomap: Fix possible overflow condition in iomap_write_delalloc_scan
  iomap: Add some uptodate state handling helpers for ifs state bitmap
  iomap: Drop ifs argument from iomap_set_range_uptodate()
  iomap: Rename iomap_page to iomap_folio_state and others
  iomap: Copy larger chunks from userspace
  iomap: Create large folios in the buffered write path
  filemap: Allow __filemap_get_folio to allocate large folios
  filemap: Add fgf_t typedef
  ...
Diffstat (limited to 'include/linux/fs.h')
| -rw-r--r-- | include/linux/fs.h | 35 | 
1 files changed, 33 insertions, 2 deletions
diff --git a/include/linux/fs.h b/include/linux/fs.h index 4e270f3ed58e..dda08d973639 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -338,6 +338,20 @@ enum rw_hint {  #define IOCB_NOIO		(1 << 20)  /* can use bio alloc cache */  #define IOCB_ALLOC_CACHE	(1 << 21) +/* + * IOCB_DIO_CALLER_COMP can be set by the iocb owner, to indicate that the + * iocb completion can be passed back to the owner for execution from a safe + * context rather than needing to be punted through a workqueue. If this + * flag is set, the bio completion handling may set iocb->dio_complete to a + * handler function and iocb->private to context information for that handler. + * The issuer should call the handler with that context information from task + * context to complete the processing of the iocb. Note that while this + * provides a task context for the dio_complete() callback, it should only be + * used on the completion side for non-IO generating completions. It's fine to + * call blocking functions from this callback, but they should not wait for + * unrelated IO (like cache flushing, new IO generation, etc). + */ +#define IOCB_DIO_CALLER_COMP	(1 << 22)  /* for use in trace events */  #define TRACE_IOCB_STRINGS \ @@ -351,7 +365,8 @@ enum rw_hint {  	{ IOCB_WRITE,		"WRITE" }, \  	{ IOCB_WAITQ,		"WAITQ" }, \  	{ IOCB_NOIO,		"NOIO" }, \ -	{ IOCB_ALLOC_CACHE,	"ALLOC_CACHE" } +	{ IOCB_ALLOC_CACHE,	"ALLOC_CACHE" }, \ +	{ IOCB_DIO_CALLER_COMP,	"CALLER_COMP" }  struct kiocb {  	struct file		*ki_filp; @@ -360,7 +375,23 @@ struct kiocb {  	void			*private;  	int			ki_flags;  	u16			ki_ioprio; /* See linux/ioprio.h */ -	struct wait_page_queue	*ki_waitq; /* for async buffered IO */ +	union { +		/* +		 * Only used for async buffered reads, where it denotes the +		 * page waitqueue associated with completing the read. Valid +		 * IFF IOCB_WAITQ is set. +		 */ +		struct wait_page_queue	*ki_waitq; +		/* +		 * Can be used for O_DIRECT IO, where the completion handling +		 * is punted back to the issuer of the IO. May only be set +		 * if IOCB_DIO_CALLER_COMP is set by the issuer, and the issuer +		 * must then check for presence of this handler when ki_complete +		 * is invoked. The data passed in to this handler must be +		 * assigned to ->private when dio_complete is assigned. +		 */ +		ssize_t (*dio_complete)(void *data); +	};  };  static inline bool is_sync_kiocb(struct kiocb *kiocb)  | 
