diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-11-01 11:09:19 -1000 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-11-01 11:09:19 -1000 |
commit | ffa059b262ba72571e7fefe7fa2b4ebb6776b277 (patch) | |
tree | d837908365664bc1a7192f89609546a60cff8e37 /io_uring/kbuf.c | |
parent | ca995ce438cc641c47d4b8e4abeb1878a3d07c5f (diff) | |
parent | 6ce4a93dbb5bd93bc2bdf14da63f9360a4dcd6a1 (diff) |
Merge tag 'for-6.7/io_uring-2023-10-30' of git://git.kernel.dk/linux
Pull io_uring updates from Jens Axboe:
"This contains the core io_uring updates, of which there are not many,
and adds support for using WAITID through io_uring and hence not
needing to block on these kinds of events.
Outside of that, tweaks to the legacy provided buffer handling and
some cleanups related to cancelations for uring_cmd support"
* tag 'for-6.7/io_uring-2023-10-30' of git://git.kernel.dk/linux:
io_uring/poll: use IOU_F_TWQ_LAZY_WAKE for wakeups
io_uring/kbuf: Use slab for struct io_buffer objects
io_uring/kbuf: Allow the full buffer id space for provided buffers
io_uring/kbuf: Fix check of BID wrapping in provided buffers
io_uring/rsrc: cleanup io_pin_pages()
io_uring: cancelable uring_cmd
io_uring: retain top 8bits of uring_cmd flags for kernel internal use
io_uring: add IORING_OP_WAITID support
exit: add internal include file with helpers
exit: add kernel_waitid_prepare() helper
exit: move core of do_wait() into helper
exit: abstract out should_wake helper for child_wait_callback()
io_uring/rw: add support for IORING_OP_READ_MULTISHOT
io_uring/rw: mark readv/writev as vectored in the opcode definition
io_uring/rw: split io_read() into a helper
Diffstat (limited to 'io_uring/kbuf.c')
-rw-r--r-- | io_uring/kbuf.c | 58 |
1 files changed, 33 insertions, 25 deletions
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 9123138aa9f4..fea06810b43d 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -19,12 +19,17 @@ #define BGID_ARRAY 64 +/* BIDs are addressed by a 16-bit field in a CQE */ +#define MAX_BIDS_PER_BGID (1 << 16) + +struct kmem_cache *io_buf_cachep; + struct io_provide_buf { struct file *file; __u64 addr; __u32 len; __u32 bgid; - __u16 nbufs; + __u32 nbufs; __u16 bid; }; @@ -255,6 +260,8 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, void io_destroy_buffers(struct io_ring_ctx *ctx) { struct io_buffer_list *bl; + struct list_head *item, *tmp; + struct io_buffer *buf; unsigned long index; int i; @@ -270,12 +277,9 @@ void io_destroy_buffers(struct io_ring_ctx *ctx) kfree(bl); } - while (!list_empty(&ctx->io_buffers_pages)) { - struct page *page; - - page = list_first_entry(&ctx->io_buffers_pages, struct page, lru); - list_del_init(&page->lru); - __free_page(page); + list_for_each_safe(item, tmp, &ctx->io_buffers_cache) { + buf = list_entry(item, struct io_buffer, list); + kmem_cache_free(io_buf_cachep, buf); } } @@ -289,7 +293,7 @@ int io_remove_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return -EINVAL; tmp = READ_ONCE(sqe->fd); - if (!tmp || tmp > USHRT_MAX) + if (!tmp || tmp > MAX_BIDS_PER_BGID) return -EINVAL; memset(p, 0, sizeof(*p)); @@ -332,7 +336,7 @@ int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe return -EINVAL; tmp = READ_ONCE(sqe->fd); - if (!tmp || tmp > USHRT_MAX) + if (!tmp || tmp > MAX_BIDS_PER_BGID) return -E2BIG; p->nbufs = tmp; p->addr = READ_ONCE(sqe->addr); @@ -352,17 +356,18 @@ int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe tmp = READ_ONCE(sqe->off); if (tmp > USHRT_MAX) return -E2BIG; - if (tmp + p->nbufs >= USHRT_MAX) + if (tmp + p->nbufs > MAX_BIDS_PER_BGID) return -EINVAL; p->bid = tmp; return 0; } +#define IO_BUFFER_ALLOC_BATCH 64 + static int io_refill_buffer_cache(struct io_ring_ctx *ctx) { - struct io_buffer *buf; - struct page *page; - int bufs_in_page; + struct io_buffer *bufs[IO_BUFFER_ALLOC_BATCH]; + int allocated; /* * Completions that don't happen inline (eg not under uring_lock) will @@ -382,22 +387,25 @@ static int io_refill_buffer_cache(struct io_ring_ctx *ctx) /* * No free buffers and no completion entries either. Allocate a new - * page worth of buffer entries and add those to our freelist. + * batch of buffer entries and add those to our freelist. */ - page = alloc_page(GFP_KERNEL_ACCOUNT); - if (!page) - return -ENOMEM; - - list_add(&page->lru, &ctx->io_buffers_pages); - buf = page_address(page); - bufs_in_page = PAGE_SIZE / sizeof(*buf); - while (bufs_in_page) { - list_add_tail(&buf->list, &ctx->io_buffers_cache); - buf++; - bufs_in_page--; + allocated = kmem_cache_alloc_bulk(io_buf_cachep, GFP_KERNEL_ACCOUNT, + ARRAY_SIZE(bufs), (void **) bufs); + if (unlikely(!allocated)) { + /* + * Bulk alloc is all-or-nothing. If we fail to get a batch, + * retry single alloc to be on the safe side. + */ + bufs[0] = kmem_cache_alloc(io_buf_cachep, GFP_KERNEL); + if (!bufs[0]) + return -ENOMEM; + allocated = 1; } + while (allocated) + list_add_tail(&bufs[--allocated]->list, &ctx->io_buffers_cache); + return 0; } |