summaryrefslogtreecommitdiff
path: root/fs/io-wq.c
diff options
context:
space:
mode:
authorPavel Begunkov <asml.silence@gmail.com>2021-05-23 15:48:39 +0100
committerJens Axboe <axboe@kernel.dk>2021-05-25 19:39:58 -0600
commit17a91051fe63b40ec651b80097c9fff5b093fdc5 (patch)
tree2ff23057d8bb5cbbed0b123eb9a8b3dd803c1586 /fs/io-wq.c
parentba5ef6dc8a827a904794210a227cdb94828e8ae7 (diff)
io_uring/io-wq: close io-wq full-stop gap
There is an old problem with io-wq cancellation where requests should be killed and are in io-wq but are not discoverable, e.g. in @next_hashed or @linked vars of io_worker_handle_work(). It adds some unreliability to individual request canellation, but also may potentially get __io_uring_cancel() stuck. For instance: 1) An __io_uring_cancel()'s cancellation round have not found any request but there are some as desribed. 2) __io_uring_cancel() goes to sleep 3) Then workers wake up and try to execute those hidden requests that happen to be unbound. As we already cancel all requests of io-wq there, set IO_WQ_BIT_EXIT in advance, so preventing 3) from executing unbound requests. The workers will initially break looping because of getting a signal as they are threads of the dying/exec()'ing user task. Cc: stable@vger.kernel.org Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Link: https://lore.kernel.org/r/abfcf8c54cb9e8f7bfbad7e9a0cc5433cc70bdc2.1621781238.git.asml.silence@gmail.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'fs/io-wq.c')
-rw-r--r--fs/io-wq.c20
1 files changed, 9 insertions, 11 deletions
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 5361a9b4b47b..de9b7ba3ba01 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -979,13 +979,16 @@ static bool io_task_work_match(struct callback_head *cb, void *data)
return cwd->wqe->wq == data;
}
+void io_wq_exit_start(struct io_wq *wq)
+{
+ set_bit(IO_WQ_BIT_EXIT, &wq->state);
+}
+
static void io_wq_exit_workers(struct io_wq *wq)
{
struct callback_head *cb;
int node;
- set_bit(IO_WQ_BIT_EXIT, &wq->state);
-
if (!wq->task)
return;
@@ -1020,8 +1023,6 @@ static void io_wq_destroy(struct io_wq *wq)
cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
- io_wq_exit_workers(wq);
-
for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node];
struct io_cb_cancel_data match = {
@@ -1036,16 +1037,13 @@ static void io_wq_destroy(struct io_wq *wq)
kfree(wq);
}
-void io_wq_put(struct io_wq *wq)
-{
- if (refcount_dec_and_test(&wq->refs))
- io_wq_destroy(wq);
-}
-
void io_wq_put_and_exit(struct io_wq *wq)
{
+ WARN_ON_ONCE(!test_bit(IO_WQ_BIT_EXIT, &wq->state));
+
io_wq_exit_workers(wq);
- io_wq_put(wq);
+ if (refcount_dec_and_test(&wq->refs))
+ io_wq_destroy(wq);
}
static bool io_wq_worker_affinity(struct io_worker *worker, void *data)