diff options
author | Jakub Kicinski <kuba@kernel.org> | 2022-11-29 13:04:52 -0800 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2022-11-29 13:04:52 -0800 |
commit | f2bb566f5c977ff010baaa9e5e14d9a75b06e5f2 (patch) | |
tree | 6359cc9169bd06bfb8b757a534c82886df605b71 /io_uring | |
parent | 7a945ce0c19bbdf821d5f7ce1515e7fb8e444465 (diff) | |
parent | 01f856ae6d0ca5ad0505b79bf2d22d7ca439b2a1 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
tools/lib/bpf/ringbuf.c
927cbb478adf ("libbpf: Handle size overflow for ringbuf mmap")
b486d19a0ab0 ("libbpf: checkpatch: Fixed code alignments in ringbuf.c")
https://lore.kernel.org/all/20221121122707.44d1446a@canb.auug.org.au/
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'io_uring')
-rw-r--r-- | io_uring/filetable.c | 2 | ||||
-rw-r--r-- | io_uring/io_uring.c | 2 | ||||
-rw-r--r-- | io_uring/io_uring.h | 13 | ||||
-rw-r--r-- | io_uring/net.c | 23 | ||||
-rw-r--r-- | io_uring/poll.c | 59 |
5 files changed, 71 insertions, 28 deletions
diff --git a/io_uring/filetable.c b/io_uring/filetable.c index 7b473259f3f4..68dfc6936aa7 100644 --- a/io_uring/filetable.c +++ b/io_uring/filetable.c @@ -101,8 +101,6 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, err: if (needs_switch) io_rsrc_node_switch(ctx, ctx->file_data); - if (ret) - fput(file); return ret; } diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 4a1e482747cc..8840cf3e20f2 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1768,7 +1768,7 @@ int io_poll_issue(struct io_kiocb *req, bool *locked) io_tw_lock(req->ctx, locked); if (unlikely(req->task->flags & PF_EXITING)) return -EFAULT; - return io_issue_sqe(req, IO_URING_F_NONBLOCK); + return io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_MULTISHOT); } struct io_wq_work *io_wq_free_work(struct io_wq_work *work) diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index e99a79f2df9b..50bc3af44953 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -17,8 +17,8 @@ enum { IOU_ISSUE_SKIP_COMPLETE = -EIOCBQUEUED, /* - * Intended only when both REQ_F_POLLED and REQ_F_APOLL_MULTISHOT - * are set to indicate to the poll runner that multishot should be + * Intended only when both IO_URING_F_MULTISHOT is passed + * to indicate to the poll runner that multishot should be * removed and the result is set on req->cqe.res. */ IOU_STOP_MULTISHOT = -ECANCELED, @@ -238,9 +238,14 @@ static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) static inline int io_run_task_work(void) { + /* + * Always check-and-clear the task_work notification signal. With how + * signaling works for task_work, we can find it set with nothing to + * run. We need to clear it for that case, like get_signal() does. + */ + if (test_thread_flag(TIF_NOTIFY_SIGNAL)) + clear_notify_signal(); if (task_work_pending(current)) { - if (test_thread_flag(TIF_NOTIFY_SIGNAL)) - clear_notify_signal(); __set_current_state(TASK_RUNNING); task_work_run(); return 1; diff --git a/io_uring/net.c b/io_uring/net.c index 15dea91625e2..ab83da7e80f0 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -67,8 +67,6 @@ struct io_sr_msg { struct io_kiocb *notif; }; -#define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED) - int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); @@ -591,7 +589,8 @@ static inline void io_recv_prep_retry(struct io_kiocb *req) * again (for multishot). */ static inline bool io_recv_finish(struct io_kiocb *req, int *ret, - unsigned int cflags, bool mshot_finished) + unsigned int cflags, bool mshot_finished, + unsigned issue_flags) { if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { io_req_set_res(req, *ret, cflags); @@ -614,7 +613,7 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret, io_req_set_res(req, *ret, cflags); - if (req->flags & REQ_F_POLLED) + if (issue_flags & IO_URING_F_MULTISHOT) *ret = IOU_STOP_MULTISHOT; else *ret = IOU_OK; @@ -773,8 +772,7 @@ retry_multishot: if (ret < min_ret) { if (ret == -EAGAIN && force_nonblock) { ret = io_setup_async_msg(req, kmsg, issue_flags); - if (ret == -EAGAIN && (req->flags & IO_APOLL_MULTI_POLLED) == - IO_APOLL_MULTI_POLLED) { + if (ret == -EAGAIN && (issue_flags & IO_URING_F_MULTISHOT)) { io_kbuf_recycle(req, issue_flags); return IOU_ISSUE_SKIP_COMPLETE; } @@ -803,7 +801,7 @@ retry_multishot: if (kmsg->msg.msg_inq) cflags |= IORING_CQE_F_SOCK_NONEMPTY; - if (!io_recv_finish(req, &ret, cflags, mshot_finished)) + if (!io_recv_finish(req, &ret, cflags, mshot_finished, issue_flags)) goto retry_multishot; if (mshot_finished) { @@ -869,7 +867,7 @@ retry_multishot: ret = sock_recvmsg(sock, &msg, flags); if (ret < min_ret) { if (ret == -EAGAIN && force_nonblock) { - if ((req->flags & IO_APOLL_MULTI_POLLED) == IO_APOLL_MULTI_POLLED) { + if (issue_flags & IO_URING_F_MULTISHOT) { io_kbuf_recycle(req, issue_flags); return IOU_ISSUE_SKIP_COMPLETE; } @@ -902,7 +900,7 @@ out_free: if (msg.msg_inq) cflags |= IORING_CQE_F_SOCK_NONEMPTY; - if (!io_recv_finish(req, &ret, cflags, ret <= 0)) + if (!io_recv_finish(req, &ret, cflags, ret <= 0, issue_flags)) goto retry_multishot; return ret; @@ -1289,8 +1287,7 @@ retry: * return EAGAIN to arm the poll infra since it * has already been done */ - if ((req->flags & IO_APOLL_MULTI_POLLED) == - IO_APOLL_MULTI_POLLED) + if (issue_flags & IO_URING_F_MULTISHOT) ret = IOU_ISSUE_SKIP_COMPLETE; return ret; } @@ -1315,9 +1312,7 @@ retry: goto retry; io_req_set_res(req, ret, 0); - if (req->flags & REQ_F_POLLED) - return IOU_STOP_MULTISHOT; - return IOU_OK; + return (issue_flags & IO_URING_F_MULTISHOT) ? IOU_STOP_MULTISHOT : IOU_OK; } int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) diff --git a/io_uring/poll.c b/io_uring/poll.c index f500506984ec..d9bf1767867e 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -40,7 +40,14 @@ struct io_poll_table { }; #define IO_POLL_CANCEL_FLAG BIT(31) -#define IO_POLL_REF_MASK GENMASK(30, 0) +#define IO_POLL_RETRY_FLAG BIT(30) +#define IO_POLL_REF_MASK GENMASK(29, 0) + +/* + * We usually have 1-2 refs taken, 128 is more than enough and we want to + * maximise the margin between this amount and the moment when it overflows. + */ +#define IO_POLL_REF_BIAS 128 #define IO_WQE_F_DOUBLE 1 @@ -58,6 +65,21 @@ static inline bool wqe_is_double(struct wait_queue_entry *wqe) return priv & IO_WQE_F_DOUBLE; } +static bool io_poll_get_ownership_slowpath(struct io_kiocb *req) +{ + int v; + + /* + * poll_refs are already elevated and we don't have much hope for + * grabbing the ownership. Instead of incrementing set a retry flag + * to notify the loop that there might have been some change. + */ + v = atomic_fetch_or(IO_POLL_RETRY_FLAG, &req->poll_refs); + if (v & IO_POLL_REF_MASK) + return false; + return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); +} + /* * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can * bump it and acquire ownership. It's disallowed to modify requests while not @@ -66,6 +88,8 @@ static inline bool wqe_is_double(struct wait_queue_entry *wqe) */ static inline bool io_poll_get_ownership(struct io_kiocb *req) { + if (unlikely(atomic_read(&req->poll_refs) >= IO_POLL_REF_BIAS)) + return io_poll_get_ownership_slowpath(req); return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); } @@ -228,6 +252,23 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) return IOU_POLL_DONE; if (v & IO_POLL_CANCEL_FLAG) return -ECANCELED; + /* + * cqe.res contains only events of the first wake up + * and all others are be lost. Redo vfs_poll() to get + * up to date state. + */ + if ((v & IO_POLL_REF_MASK) != 1) + req->cqe.res = 0; + if (v & IO_POLL_RETRY_FLAG) { + req->cqe.res = 0; + /* + * We won't find new events that came in between + * vfs_poll and the ref put unless we clear the flag + * in advance. + */ + atomic_andnot(IO_POLL_RETRY_FLAG, &req->poll_refs); + v &= ~IO_POLL_RETRY_FLAG; + } /* the mask was stashed in __io_poll_execute */ if (!req->cqe.res) { @@ -239,6 +280,8 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) continue; if (req->apoll_events & EPOLLONESHOT) return IOU_POLL_DONE; + if (io_is_uring_fops(req->file)) + return IOU_POLL_DONE; /* multishot, just fill a CQE and proceed */ if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { @@ -258,11 +301,15 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) return ret; } + /* force the next iteration to vfs_poll() */ + req->cqe.res = 0; + /* * Release all references, retry if someone tried to restart * task_work while we were executing it. */ - } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs)); + } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs) & + IO_POLL_REF_MASK); return IOU_POLL_NO_ACTION; } @@ -506,7 +553,6 @@ static int __io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags) { struct io_ring_ctx *ctx = req->ctx; - int v; INIT_HLIST_NODE(&req->hash_node); req->work.cancel_seq = atomic_read(&ctx->cancel_seq); @@ -574,11 +620,10 @@ static int __io_arm_poll_handler(struct io_kiocb *req, if (ipt->owning) { /* - * Release ownership. If someone tried to queue a tw while it was - * locked, kick it off for them. + * Try to release ownership. If we see a change of state, e.g. + * poll was waken up, queue up a tw, it'll deal with it. */ - v = atomic_dec_return(&req->poll_refs); - if (unlikely(v & IO_POLL_REF_MASK)) + if (atomic_cmpxchg(&req->poll_refs, 1, 0) != 1) __io_poll_execute(req, 0); } return 0; |