diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-16 13:29:00 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-16 13:29:00 +0200 |
commit | 3a4d319a8fb5a9bbdf5b31ef32841eb286b1dcc2 (patch) | |
tree | a9d890edc874d231729308c164de5be310d33651 /include/uapi | |
parent | 69a3a0a45a2f72412c2ba31761cc9193bb746fef (diff) | |
parent | 7cc2a6eadcd7a5aa36ac63e6659f5c6138c7f4d2 (diff) |
Merge tag 'for-6.12/io_uring-20240913' of git://git.kernel.dk/linux
Pull io_uring updates from Jens Axboe:
- NAPI fixes and cleanups (Pavel, Olivier)
- Add support for absolute timeouts (Pavel)
- Fixes for io-wq/sqpoll affinities (Felix)
- Efficiency improvements for dealing with huge pages (Chenliang)
- Support for a minwait mode, where the application essentially has two
timouts - one smaller one that defines the batch timeout, and the
overall large one similar to what we had before. This enables
efficient use of batching based on count + timeout, while still
working well with periods of less intensive workloads
- Use ITER_UBUF for single segment sends
- Add support for incremental buffer consumption. Right now each
operation will always consume a full buffer. With incremental
consumption, a recv/read operation only consumes the part of the
buffer that it needs to satisfy the operation
- Add support for GCOV for io_uring, to help retain a high coverage of
test to code ratio
- Fix regression with ocfs2, where an odd -EOPNOTSUPP wasn't correctly
converted to a blocking retry
- Add support for cloning registered buffers from one ring to another
- Misc cleanups (Anuj, me)
* tag 'for-6.12/io_uring-20240913' of git://git.kernel.dk/linux: (35 commits)
io_uring: add IORING_REGISTER_COPY_BUFFERS method
io_uring/register: provide helper to get io_ring_ctx from 'fd'
io_uring/rsrc: add reference count to struct io_mapped_ubuf
io_uring/rsrc: clear 'slot' entry upfront
io_uring/io-wq: inherit cpuset of cgroup in io worker
io_uring/io-wq: do not allow pinning outside of cpuset
io_uring/rw: drop -EOPNOTSUPP check in __io_complete_rw_common()
io_uring/rw: treat -EOPNOTSUPP for IOCB_NOWAIT like -EAGAIN
io_uring/sqpoll: do not allow pinning outside of cpuset
io_uring/eventfd: move refs to refcount_t
io_uring: remove unused rsrc_put_fn
io_uring: add new line after variable declaration
io_uring: add GCOV_PROFILE_URING Kconfig option
io_uring/kbuf: add support for incremental buffer consumption
io_uring/kbuf: pass in 'len' argument for buffer commit
Revert "io_uring: Require zeroed sqe->len on provided-buffers send"
io_uring/kbuf: move io_ring_head_to_buf() to kbuf.h
io_uring/kbuf: add io_kbuf_commit() helper
io_uring/kbuf: shrink nr_iovs/mode in struct buf_sel_arg
io_uring: wire up min batch wake timeout
...
Diffstat (limited to 'include/uapi')
-rw-r--r-- | include/uapi/linux/io_uring.h | 42 |
1 files changed, 41 insertions, 1 deletions
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index adc2524fd8e3..9dc5bb428c8a 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -440,11 +440,21 @@ struct io_uring_cqe { * IORING_CQE_F_SOCK_NONEMPTY If set, more data to read after socket recv * IORING_CQE_F_NOTIF Set for notification CQEs. Can be used to distinct * them from sends. + * IORING_CQE_F_BUF_MORE If set, the buffer ID set in the completion will get + * more completions. In other words, the buffer is being + * partially consumed, and will be used by the kernel for + * more completions. This is only set for buffers used via + * the incremental buffer consumption, as provided by + * a ring buffer setup with IOU_PBUF_RING_INC. For any + * other provided buffer type, all completions with a + * buffer passed back is automatically returned to the + * application. */ #define IORING_CQE_F_BUFFER (1U << 0) #define IORING_CQE_F_MORE (1U << 1) #define IORING_CQE_F_SOCK_NONEMPTY (1U << 2) #define IORING_CQE_F_NOTIF (1U << 3) +#define IORING_CQE_F_BUF_MORE (1U << 4) #define IORING_CQE_BUFFER_SHIFT 16 @@ -507,6 +517,7 @@ struct io_cqring_offsets { #define IORING_ENTER_SQ_WAIT (1U << 2) #define IORING_ENTER_EXT_ARG (1U << 3) #define IORING_ENTER_REGISTERED_RING (1U << 4) +#define IORING_ENTER_ABS_TIMER (1U << 5) /* * Passed in for io_uring_setup(2). Copied back with updated info on success @@ -542,6 +553,7 @@ struct io_uring_params { #define IORING_FEAT_LINKED_FILE (1U << 12) #define IORING_FEAT_REG_REG_RING (1U << 13) #define IORING_FEAT_RECVSEND_BUNDLE (1U << 14) +#define IORING_FEAT_MIN_TIMEOUT (1U << 15) /* * io_uring_register(2) opcodes and arguments @@ -595,6 +607,11 @@ enum io_uring_register_op { IORING_REGISTER_NAPI = 27, IORING_UNREGISTER_NAPI = 28, + IORING_REGISTER_CLOCK = 29, + + /* copy registered buffers from source ring to current ring */ + IORING_REGISTER_COPY_BUFFERS = 30, + /* this goes last */ IORING_REGISTER_LAST, @@ -675,6 +692,21 @@ struct io_uring_restriction { __u32 resv2[3]; }; +struct io_uring_clock_register { + __u32 clockid; + __u32 __resv[3]; +}; + +enum { + IORING_REGISTER_SRC_REGISTERED = 1, +}; + +struct io_uring_copy_buffers { + __u32 src_fd; + __u32 flags; + __u32 pad[6]; +}; + struct io_uring_buf { __u64 addr; __u32 len; @@ -707,9 +739,17 @@ struct io_uring_buf_ring { * mmap(2) with the offset set as: * IORING_OFF_PBUF_RING | (bgid << IORING_OFF_PBUF_SHIFT) * to get a virtual mapping for the ring. + * IOU_PBUF_RING_INC: If set, buffers consumed from this buffer ring can be + * consumed incrementally. Normally one (or more) buffers + * are fully consumed. With incremental consumptions, it's + * feasible to register big ranges of buffers, and each + * use of it will consume only as much as it needs. This + * requires that both the kernel and application keep + * track of where the current read/recv index is at. */ enum io_uring_register_pbuf_ring_flags { IOU_PBUF_RING_MMAP = 1, + IOU_PBUF_RING_INC = 2, }; /* argument for IORING_(UN)REGISTER_PBUF_RING */ @@ -758,7 +798,7 @@ enum io_uring_register_restriction_op { struct io_uring_getevents_arg { __u64 sigmask; __u32 sigmask_sz; - __u32 pad; + __u32 min_wait_usec; __u64 ts; }; |