diff options
author | Jens Axboe <axboe@kernel.dk> | 2024-02-02 10:20:05 -0700 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2024-02-08 13:27:06 -0700 |
commit | af5d68f8892f8ee8f137648b79ceb2abc153a19b (patch) | |
tree | d76f98bfd630ee568e19f16aa2646de2b64dc1df /io_uring/sqpoll.c | |
parent | 2708af1adc11700c6c3ce4109e3b133079a36a78 (diff) |
io_uring/sqpoll: manage task_work privately
Decouple from task_work running, and cap the number of entries we process
at the time. If we exceed that number, push remaining entries to a retry
list that we'll process first next time.
We cap the number of entries to process at 8, which is fairly random.
We just want to get enough per-ctx batching here, while not processing
endlessly.
Since we manually run PF_IO_WORKER related task_work anyway as the task
never exits to userspace, with this we no longer need to add an actual
task_work item to the per-process list.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'io_uring/sqpoll.c')
-rw-r--r-- | io_uring/sqpoll.c | 29 |
1 files changed, 28 insertions, 1 deletions
diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index 65b5dbe3c850..28bf0e085d31 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -18,6 +18,7 @@ #include "sqpoll.h" #define IORING_SQPOLL_CAP_ENTRIES_VALUE 8 +#define IORING_TW_CAP_ENTRIES_VALUE 8 enum { IO_SQ_THREAD_SHOULD_STOP = 0, @@ -219,8 +220,31 @@ static bool io_sqd_handle_event(struct io_sq_data *sqd) return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); } +/* + * Run task_work, processing the retry_list first. The retry_list holds + * entries that we passed on in the previous run, if we had more task_work + * than we were asked to process. Newly queued task_work isn't run until the + * retry list has been fully processed. + */ +static unsigned int io_sq_tw(struct llist_node **retry_list, int max_entries) +{ + struct io_uring_task *tctx = current->io_uring; + unsigned int count = 0; + + if (*retry_list) { + *retry_list = io_handle_tw_list(*retry_list, &count, max_entries); + if (count >= max_entries) + return count; + max_entries -= count; + } + + *retry_list = tctx_task_work_run(tctx, max_entries, &count); + return count; +} + static int io_sq_thread(void *data) { + struct llist_node *retry_list = NULL; struct io_sq_data *sqd = data; struct io_ring_ctx *ctx; unsigned long timeout = 0; @@ -257,7 +281,7 @@ static int io_sq_thread(void *data) if (!sqt_spin && (ret > 0 || !wq_list_empty(&ctx->iopoll_list))) sqt_spin = true; } - if (io_run_task_work()) + if (io_sq_tw(&retry_list, IORING_TW_CAP_ENTRIES_VALUE)) sqt_spin = true; if (sqt_spin || !time_after(jiffies, timeout)) { @@ -312,6 +336,9 @@ static int io_sq_thread(void *data) timeout = jiffies + sqd->sq_thread_idle; } + if (retry_list) + io_sq_tw(&retry_list, UINT_MAX); + io_uring_cancel_generic(true, sqd); sqd->thread = NULL; list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) |