summaryrefslogtreecommitdiff
path: root/io_uring/kbuf.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-11-01 11:09:19 -1000
committerLinus Torvalds <torvalds@linux-foundation.org>2023-11-01 11:09:19 -1000
commitffa059b262ba72571e7fefe7fa2b4ebb6776b277 (patch)
treed837908365664bc1a7192f89609546a60cff8e37 /io_uring/kbuf.c
parentca995ce438cc641c47d4b8e4abeb1878a3d07c5f (diff)
parent6ce4a93dbb5bd93bc2bdf14da63f9360a4dcd6a1 (diff)
Merge tag 'for-6.7/io_uring-2023-10-30' of git://git.kernel.dk/linux
Pull io_uring updates from Jens Axboe: "This contains the core io_uring updates, of which there are not many, and adds support for using WAITID through io_uring and hence not needing to block on these kinds of events. Outside of that, tweaks to the legacy provided buffer handling and some cleanups related to cancelations for uring_cmd support" * tag 'for-6.7/io_uring-2023-10-30' of git://git.kernel.dk/linux: io_uring/poll: use IOU_F_TWQ_LAZY_WAKE for wakeups io_uring/kbuf: Use slab for struct io_buffer objects io_uring/kbuf: Allow the full buffer id space for provided buffers io_uring/kbuf: Fix check of BID wrapping in provided buffers io_uring/rsrc: cleanup io_pin_pages() io_uring: cancelable uring_cmd io_uring: retain top 8bits of uring_cmd flags for kernel internal use io_uring: add IORING_OP_WAITID support exit: add internal include file with helpers exit: add kernel_waitid_prepare() helper exit: move core of do_wait() into helper exit: abstract out should_wake helper for child_wait_callback() io_uring/rw: add support for IORING_OP_READ_MULTISHOT io_uring/rw: mark readv/writev as vectored in the opcode definition io_uring/rw: split io_read() into a helper
Diffstat (limited to 'io_uring/kbuf.c')
-rw-r--r--io_uring/kbuf.c58
1 files changed, 33 insertions, 25 deletions
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index 9123138aa9f4..fea06810b43d 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -19,12 +19,17 @@
#define BGID_ARRAY 64
+/* BIDs are addressed by a 16-bit field in a CQE */
+#define MAX_BIDS_PER_BGID (1 << 16)
+
+struct kmem_cache *io_buf_cachep;
+
struct io_provide_buf {
struct file *file;
__u64 addr;
__u32 len;
__u32 bgid;
- __u16 nbufs;
+ __u32 nbufs;
__u16 bid;
};
@@ -255,6 +260,8 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx,
void io_destroy_buffers(struct io_ring_ctx *ctx)
{
struct io_buffer_list *bl;
+ struct list_head *item, *tmp;
+ struct io_buffer *buf;
unsigned long index;
int i;
@@ -270,12 +277,9 @@ void io_destroy_buffers(struct io_ring_ctx *ctx)
kfree(bl);
}
- while (!list_empty(&ctx->io_buffers_pages)) {
- struct page *page;
-
- page = list_first_entry(&ctx->io_buffers_pages, struct page, lru);
- list_del_init(&page->lru);
- __free_page(page);
+ list_for_each_safe(item, tmp, &ctx->io_buffers_cache) {
+ buf = list_entry(item, struct io_buffer, list);
+ kmem_cache_free(io_buf_cachep, buf);
}
}
@@ -289,7 +293,7 @@ int io_remove_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return -EINVAL;
tmp = READ_ONCE(sqe->fd);
- if (!tmp || tmp > USHRT_MAX)
+ if (!tmp || tmp > MAX_BIDS_PER_BGID)
return -EINVAL;
memset(p, 0, sizeof(*p));
@@ -332,7 +336,7 @@ int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
return -EINVAL;
tmp = READ_ONCE(sqe->fd);
- if (!tmp || tmp > USHRT_MAX)
+ if (!tmp || tmp > MAX_BIDS_PER_BGID)
return -E2BIG;
p->nbufs = tmp;
p->addr = READ_ONCE(sqe->addr);
@@ -352,17 +356,18 @@ int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
tmp = READ_ONCE(sqe->off);
if (tmp > USHRT_MAX)
return -E2BIG;
- if (tmp + p->nbufs >= USHRT_MAX)
+ if (tmp + p->nbufs > MAX_BIDS_PER_BGID)
return -EINVAL;
p->bid = tmp;
return 0;
}
+#define IO_BUFFER_ALLOC_BATCH 64
+
static int io_refill_buffer_cache(struct io_ring_ctx *ctx)
{
- struct io_buffer *buf;
- struct page *page;
- int bufs_in_page;
+ struct io_buffer *bufs[IO_BUFFER_ALLOC_BATCH];
+ int allocated;
/*
* Completions that don't happen inline (eg not under uring_lock) will
@@ -382,22 +387,25 @@ static int io_refill_buffer_cache(struct io_ring_ctx *ctx)
/*
* No free buffers and no completion entries either. Allocate a new
- * page worth of buffer entries and add those to our freelist.
+ * batch of buffer entries and add those to our freelist.
*/
- page = alloc_page(GFP_KERNEL_ACCOUNT);
- if (!page)
- return -ENOMEM;
-
- list_add(&page->lru, &ctx->io_buffers_pages);
- buf = page_address(page);
- bufs_in_page = PAGE_SIZE / sizeof(*buf);
- while (bufs_in_page) {
- list_add_tail(&buf->list, &ctx->io_buffers_cache);
- buf++;
- bufs_in_page--;
+ allocated = kmem_cache_alloc_bulk(io_buf_cachep, GFP_KERNEL_ACCOUNT,
+ ARRAY_SIZE(bufs), (void **) bufs);
+ if (unlikely(!allocated)) {
+ /*
+ * Bulk alloc is all-or-nothing. If we fail to get a batch,
+ * retry single alloc to be on the safe side.
+ */
+ bufs[0] = kmem_cache_alloc(io_buf_cachep, GFP_KERNEL);
+ if (!bufs[0])
+ return -ENOMEM;
+ allocated = 1;
}
+ while (allocated)
+ list_add_tail(&bufs[--allocated]->list, &ctx->io_buffers_cache);
+
return 0;
}