diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-09-06 09:26:07 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-09-06 09:26:07 -0700 |
commit | 60f8fbaa954452104a1914e21c5cc109f7bf276a (patch) | |
tree | 8212a8c7d3efc455028f95a4520424db85d9e50f /fs/io_uring.c | |
parent | 20fbb11fe4ea99e02d77824613f1438bea456683 (diff) | |
parent | 2fc2a7a62eb58650e71b4550cf6fa6cc0a75b2d2 (diff) |
Merge tag 'for-5.15/io_uring-2021-09-04' of git://git.kernel.dk/linux-block
Pull io_uring fixes from Jens Axboe:
"As sometimes happens, two reports came in around the merge window open
that led to some fixes. Hence this one is a bit bigger than usual
followup fixes, but most of it will be going towards stable, outside
of the fixes that are addressing regressions from this merge window.
In detail:
- postgres is a heavy user of signals between tasks, and if we're
unlucky this can interfere with io-wq worker creation. Make sure
we're resilient against unrelated signal handling. This set of
changes also includes hardening against allocation failures, which
could previously had led to stalls.
- Some use cases that end up having a mix of bounded and unbounded
work would have starvation issues related to that. Split the
pending work lists to handle that better.
- Completion trace int -> unsigned -> long fix
- Fix issue with REGISTER_IOWQ_MAX_WORKERS and SQPOLL
- Fix regression with hash wait lock in this merge window
- Fix retry issued on block devices (Ming)
- Fix regression with links in this merge window (Pavel)
- Fix race with multi-shot poll and completions (Xiaoguang)
- Ensure regular file IO doesn't inadvertently skip completion
batching (Pavel)
- Ensure submissions are flushed after running task_work (Pavel)"
* tag 'for-5.15/io_uring-2021-09-04' of git://git.kernel.dk/linux-block:
io_uring: io_uring_complete() trace should take an integer
io_uring: fix possible poll event lost in multi shot mode
io_uring: prolong tctx_task_work() with flushing
io_uring: don't disable kiocb_done() CQE batching
io_uring: ensure IORING_REGISTER_IOWQ_MAX_WORKERS works with SQPOLL
io-wq: make worker creation resilient against signals
io-wq: get rid of FIXED worker flag
io-wq: only exit on fatal signals
io-wq: split bounded and unbounded work into separate lists
io-wq: fix queue stalling race
io_uring: don't submit half-prepared drain request
io_uring: fix queueing half-created requests
io-wq: ensure that hash wait lock is IRQ disabling
io_uring: retry in case of short read on block device
io_uring: IORING_OP_WRITE needs hash_reg_file set
io-wq: fix race between adding work and activating a free worker
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r-- | fs/io_uring.c | 76 |
1 files changed, 66 insertions, 10 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c index 6f35b1285865..d816c09c88a5 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1021,6 +1021,7 @@ static const struct io_op_def io_op_defs[] = { }, [IORING_OP_WRITE] = { .needs_file = 1, + .hash_reg_file = 1, .unbound_nonreg_file = 1, .pollout = 1, .plug = 1, @@ -1851,6 +1852,17 @@ static void io_req_complete_failed(struct io_kiocb *req, long res) io_req_complete_post(req, res, 0); } +static void io_req_complete_fail_submit(struct io_kiocb *req) +{ + /* + * We don't submit, fail them all, for that replace hardlinks with + * normal links. Extra REQ_F_LINK is tolerated. + */ + req->flags &= ~REQ_F_HARDLINK; + req->flags |= REQ_F_LINK; + io_req_complete_failed(req, req->result); +} + /* * Don't initialise the fields below on every allocation, but do that in * advance and keep them valid across allocations. @@ -2119,6 +2131,9 @@ static void tctx_task_work(struct callback_head *cb) while (1) { struct io_wq_work_node *node; + if (!tctx->task_list.first && locked && ctx->submit_state.compl_nr) + io_submit_flush_completions(ctx); + spin_lock_irq(&tctx->task_lock); node = tctx->task_list.first; INIT_WQ_LIST(&tctx->task_list); @@ -2673,7 +2688,7 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2, { if (__io_complete_rw_common(req, res)) return; - __io_req_complete(req, 0, req->result, io_put_rw_kbuf(req)); + __io_req_complete(req, issue_flags, req->result, io_put_rw_kbuf(req)); } static void io_complete_rw(struct kiocb *kiocb, long res, long res2) @@ -3410,6 +3425,12 @@ static inline int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter) return -EINVAL; } +static bool need_read_all(struct io_kiocb *req) +{ + return req->flags & REQ_F_ISREG || + S_ISBLK(file_inode(req->file)->i_mode); +} + static int io_read(struct io_kiocb *req, unsigned int issue_flags) { struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; @@ -3464,7 +3485,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) } else if (ret == -EIOCBQUEUED) { goto out_free; } else if (ret <= 0 || ret == io_size || !force_nonblock || - (req->flags & REQ_F_NOWAIT) || !(req->flags & REQ_F_ISREG)) { + (req->flags & REQ_F_NOWAIT) || !need_read_all(req)) { /* read all, failed, already did sync or don't want to retry */ goto done; } @@ -5249,7 +5270,7 @@ static void io_poll_remove_double(struct io_kiocb *req) } } -static bool io_poll_complete(struct io_kiocb *req, __poll_t mask) +static bool __io_poll_complete(struct io_kiocb *req, __poll_t mask) __must_hold(&req->ctx->completion_lock) { struct io_ring_ctx *ctx = req->ctx; @@ -5271,10 +5292,19 @@ static bool io_poll_complete(struct io_kiocb *req, __poll_t mask) if (flags & IORING_CQE_F_MORE) ctx->cq_extra++; - io_commit_cqring(ctx); return !(flags & IORING_CQE_F_MORE); } +static inline bool io_poll_complete(struct io_kiocb *req, __poll_t mask) + __must_hold(&req->ctx->completion_lock) +{ + bool done; + + done = __io_poll_complete(req, mask); + io_commit_cqring(req->ctx); + return done; +} + static void io_poll_task_func(struct io_kiocb *req, bool *locked) { struct io_ring_ctx *ctx = req->ctx; @@ -5285,7 +5315,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked) } else { bool done; - done = io_poll_complete(req, req->result); + done = __io_poll_complete(req, req->result); if (done) { io_poll_remove_double(req); hash_del(&req->hash_node); @@ -5293,6 +5323,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked) req->result = 0; add_wait_queue(req->poll.head, &req->poll.wait); } + io_commit_cqring(ctx); spin_unlock(&ctx->completion_lock); io_cqring_ev_posted(ctx); @@ -6398,6 +6429,11 @@ static bool io_drain_req(struct io_kiocb *req) int ret; u32 seq; + if (req->flags & REQ_F_FAIL) { + io_req_complete_fail_submit(req); + return true; + } + /* * If we need to drain a request in the middle of a link, drain the * head request and the next request/link after the current link. @@ -6914,7 +6950,7 @@ static inline void io_queue_sqe(struct io_kiocb *req) if (likely(!(req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL)))) { __io_queue_sqe(req); } else if (req->flags & REQ_F_FAIL) { - io_req_complete_failed(req, req->result); + io_req_complete_fail_submit(req); } else { int ret = io_req_prep_async(req); @@ -10498,26 +10534,46 @@ static int io_unregister_iowq_aff(struct io_ring_ctx *ctx) static int io_register_iowq_max_workers(struct io_ring_ctx *ctx, void __user *arg) { - struct io_uring_task *tctx = current->io_uring; + struct io_uring_task *tctx = NULL; + struct io_sq_data *sqd = NULL; __u32 new_count[2]; int i, ret; - if (!tctx || !tctx->io_wq) - return -EINVAL; if (copy_from_user(new_count, arg, sizeof(new_count))) return -EFAULT; for (i = 0; i < ARRAY_SIZE(new_count); i++) if (new_count[i] > INT_MAX) return -EINVAL; + if (ctx->flags & IORING_SETUP_SQPOLL) { + sqd = ctx->sq_data; + if (sqd) { + mutex_lock(&sqd->lock); + tctx = sqd->thread->io_uring; + } + } else { + tctx = current->io_uring; + } + + ret = -EINVAL; + if (!tctx || !tctx->io_wq) + goto err; + ret = io_wq_max_workers(tctx->io_wq, new_count); if (ret) - return ret; + goto err; + + if (sqd) + mutex_unlock(&sqd->lock); if (copy_to_user(arg, new_count, sizeof(new_count))) return -EFAULT; return 0; +err: + if (sqd) + mutex_unlock(&sqd->lock); + return ret; } static bool io_register_op_must_quiesce(int op) |