From 7500194a630b11236761df35fef300009d7d3f6f Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 30 Nov 2022 15:21:59 +0000 Subject: io_uring: reshuffle issue_flags Reshuffle issue flags to keep normal flags separate from the uring_cmd ctx-setup like flags. Shift the second type to the second byte so it's easier to add new ones in the future. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/d6e4696c883943082d248716f4cd568f37b17a74.1669821213.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 0ded9e271523..29e519752da4 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -9,16 +9,15 @@ enum io_uring_cmd_flags { IO_URING_F_COMPLETE_DEFER = 1, IO_URING_F_UNLOCKED = 2, + /* the request is executed from poll, it should not be freed */ + IO_URING_F_MULTISHOT = 4, /* int's last bit, sign checks are usually faster than a bit test */ IO_URING_F_NONBLOCK = INT_MIN, /* ctx state flags, for URING_CMD */ - IO_URING_F_SQE128 = 4, - IO_URING_F_CQE32 = 8, - IO_URING_F_IOPOLL = 16, - - /* the request is executed from poll, it should not be freed */ - IO_URING_F_MULTISHOT = 32, + IO_URING_F_SQE128 = (1 << 8), + IO_URING_F_CQE32 = (1 << 9), + IO_URING_F_IOPOLL = (1 << 10), }; struct io_uring_cmd { -- cgit v1.3.1 From e6aeb2721d3bad8379c43644d0380908e93b0187 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 7 Dec 2022 03:53:30 +0000 Subject: io_uring: complete all requests in task context This patch adds ctx->task_complete flag. If set, we'll complete all requests in the context of the original task. Note, this extends to completion CQE posting only but not io_kiocb cleanup / free, e.g. io-wq may free the requests in the free calllback. This flag will be used later for optimisations purposes. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/21ece72953f76bb2e77659a72a14326227ab6460.1670384893.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring.h | 2 ++ include/linux/io_uring_types.h | 2 ++ io_uring/io_uring.c | 14 +++++++++++--- 3 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 29e519752da4..934e5dd4ccc0 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -11,6 +11,8 @@ enum io_uring_cmd_flags { IO_URING_F_UNLOCKED = 2, /* the request is executed from poll, it should not be freed */ IO_URING_F_MULTISHOT = 4, + /* executed by io-wq */ + IO_URING_F_IOWQ = 8, /* int's last bit, sign checks are usually faster than a bit test */ IO_URING_F_NONBLOCK = INT_MIN, diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index accdfecee953..6be1e1359c89 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -208,6 +208,8 @@ struct io_ring_ctx { unsigned int drain_disabled: 1; unsigned int has_evfd: 1; unsigned int syscall_iopoll: 1; + /* all CQEs should be posted only by the submitter task */ + unsigned int task_complete: 1; } ____cacheline_aligned_in_smp; /* submission data */ diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index ba5ef1e675c1..13d56472dd49 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -921,8 +921,11 @@ static void __io_req_complete_post(struct io_kiocb *req) void io_req_complete_post(struct io_kiocb *req, unsigned issue_flags) { - if (!(issue_flags & IO_URING_F_UNLOCKED) || - !(req->ctx->flags & IORING_SETUP_IOPOLL)) { + if (req->ctx->task_complete && (issue_flags & IO_URING_F_IOWQ)) { + req->io_task_work.func = io_req_task_complete; + io_req_task_work_add(req); + } else if (!(issue_flags & IO_URING_F_UNLOCKED) || + !(req->ctx->flags & IORING_SETUP_IOPOLL)) { __io_req_complete_post(req); } else { struct io_ring_ctx *ctx = req->ctx; @@ -1830,7 +1833,7 @@ void io_wq_submit_work(struct io_wq_work *work) { struct io_kiocb *req = container_of(work, struct io_kiocb, work); const struct io_op_def *def = &io_op_defs[req->opcode]; - unsigned int issue_flags = IO_URING_F_UNLOCKED; + unsigned int issue_flags = IO_URING_F_UNLOCKED | IO_URING_F_IOWQ; bool needs_poll = false; int ret = 0, err = -ECANCELED; @@ -3490,6 +3493,11 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, if (!ctx) return -ENOMEM; + if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) && + !(ctx->flags & IORING_SETUP_IOPOLL) && + !(ctx->flags & IORING_SETUP_SQPOLL)) + ctx->task_complete = true; + /* * When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user * space applications don't need to do io completion events -- cgit v1.3.1 From d34b1b0b6779d4f5ee877b53cad90eef0f1cbe34 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 7 Dec 2022 03:53:32 +0000 Subject: io_uring: use tw for putting rsrc Use task_work for completing rsrc removals, it'll be needed later for spinlock optimisations. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/cbba5d53a11ee6fc2194dacea262c1d733c8b529.1670384893.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 1 + io_uring/io_uring.c | 1 + io_uring/rsrc.c | 19 +++++++++++++++++-- io_uring/rsrc.h | 1 + 4 files changed, 20 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 6be1e1359c89..dcd8a563ab52 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -328,6 +328,7 @@ struct io_ring_ctx { struct io_rsrc_data *buf_data; struct delayed_work rsrc_put_work; + struct callback_head rsrc_put_tw; struct llist_head rsrc_put_llist; struct list_head rsrc_ref_list; spinlock_t rsrc_ref_lock; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 13d56472dd49..a29cbf973287 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -326,6 +326,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) spin_lock_init(&ctx->rsrc_ref_lock); INIT_LIST_HEAD(&ctx->rsrc_ref_list); INIT_DELAYED_WORK(&ctx->rsrc_put_work, io_rsrc_put_work); + init_task_work(&ctx->rsrc_put_tw, io_rsrc_put_tw); init_llist_head(&ctx->rsrc_put_llist); init_llist_head(&ctx->work_llist); INIT_LIST_HEAD(&ctx->tctx_list); diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index d25309400a45..18de10c68a15 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -204,6 +204,14 @@ void io_rsrc_put_work(struct work_struct *work) } } +void io_rsrc_put_tw(struct callback_head *cb) +{ + struct io_ring_ctx *ctx = container_of(cb, struct io_ring_ctx, + rsrc_put_tw); + + io_rsrc_put_work(&ctx->rsrc_put_work.work); +} + void io_wait_rsrc_data(struct io_rsrc_data *data) { if (data && !atomic_dec_and_test(&data->refs)) @@ -242,8 +250,15 @@ static __cold void io_rsrc_node_ref_zero(struct percpu_ref *ref) } spin_unlock_irqrestore(&ctx->rsrc_ref_lock, flags); - if (first_add) - mod_delayed_work(system_wq, &ctx->rsrc_put_work, delay); + if (!first_add) + return; + + if (ctx->submitter_task) { + if (!task_work_add(ctx->submitter_task, &ctx->rsrc_put_tw, + ctx->notify_method)) + return; + } + mod_delayed_work(system_wq, &ctx->rsrc_put_work, delay); } static struct io_rsrc_node *io_rsrc_node_alloc(void) diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index 81445a477622..2b8743645efc 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -53,6 +53,7 @@ struct io_mapped_ubuf { struct bio_vec bvec[]; }; +void io_rsrc_put_tw(struct callback_head *cb); void io_rsrc_put_work(struct work_struct *work); void io_rsrc_refs_refill(struct io_ring_ctx *ctx); void io_wait_rsrc_data(struct io_rsrc_data *data); -- cgit v1.3.1