summaryrefslogtreecommitdiff
path: root/io_uring/net.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-09-02 16:37:01 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-09-02 16:37:01 -0700
commitcec53f4c8df0b3f45796127a31c10b86ec125f55 (patch)
treef1765cb76b1115c0b0ad58c52003b0db2024686a /io_uring/net.c
parent1551f8f21e007e608fff00cf27caac8504283b43 (diff)
parent916d72c10a4ca80ea51f1421e774cb765b53f28f (diff)
Merge tag 'io_uring-6.0-2022-09-02' of git://git.kernel.dk/linux-block
Pull io_uring fixes from Jens Axboe: - A single fix for over-eager retries for networking (Pavel) - Revert the notification slot support for zerocopy sends. It turns out that even after more than a year or development and testing, there's not full agreement on whether just using plain ordered notifications is Good Enough to avoid the complexity of using the notifications slots. Because of that, we decided that it's best left to a future final decision. We can always bring back this feature, but we can't really change it or remove it once we've released 6.0 with it enabled. The reverts leave the usual CQE notifications as the primary interface for knowing when data was sent, and when it was acked. (Pavel) * tag 'io_uring-6.0-2022-09-02' of git://git.kernel.dk/linux-block: selftests/net: return back io_uring zc send tests io_uring/net: simplify zerocopy send user API io_uring/notif: remove notif registration Revert "io_uring: rename IORING_OP_FILES_UPDATE" Revert "io_uring: add zc notification flush requests" selftests/net: temporarily disable io_uring zc test io_uring/net: fix overexcessive retries
Diffstat (limited to 'io_uring/net.c')
-rw-r--r--io_uring/net.c59
1 files changed, 35 insertions, 24 deletions
diff --git a/io_uring/net.c b/io_uring/net.c
index 0af8a02df580..7047c1342541 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -65,12 +65,12 @@ struct io_sendzc {
struct file *file;
void __user *buf;
size_t len;
- u16 slot_idx;
unsigned msg_flags;
unsigned flags;
unsigned addr_len;
void __user *addr;
size_t done_io;
+ struct io_kiocb *notif;
};
#define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
@@ -879,17 +879,31 @@ out_free:
return ret;
}
+void io_sendzc_cleanup(struct io_kiocb *req)
+{
+ struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc);
+
+ zc->notif->flags |= REQ_F_CQE_SKIP;
+ io_notif_flush(zc->notif);
+ zc->notif = NULL;
+}
+
int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc);
struct io_ring_ctx *ctx = req->ctx;
+ struct io_kiocb *notif;
- if (READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))
+ if (READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3) ||
+ READ_ONCE(sqe->__pad3[0]))
+ return -EINVAL;
+ /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */
+ if (req->flags & REQ_F_CQE_SKIP)
return -EINVAL;
zc->flags = READ_ONCE(sqe->ioprio);
if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST |
- IORING_RECVSEND_FIXED_BUF | IORING_RECVSEND_NOTIF_FLUSH))
+ IORING_RECVSEND_FIXED_BUF))
return -EINVAL;
if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
unsigned idx = READ_ONCE(sqe->buf_index);
@@ -900,11 +914,17 @@ int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
req->imu = READ_ONCE(ctx->user_bufs[idx]);
io_req_set_rsrc_node(req, ctx, 0);
}
+ notif = zc->notif = io_alloc_notif(ctx);
+ if (!notif)
+ return -ENOMEM;
+ notif->cqe.user_data = req->cqe.user_data;
+ notif->cqe.res = 0;
+ notif->cqe.flags = IORING_CQE_F_NOTIF;
+ req->flags |= REQ_F_NEED_CLEANUP;
zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
zc->len = READ_ONCE(sqe->len);
zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
- zc->slot_idx = READ_ONCE(sqe->notification_idx);
if (zc->msg_flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
@@ -956,7 +976,7 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb,
shinfo->nr_frags = frag;
from->bvec += bi.bi_idx;
from->nr_segs -= bi.bi_idx;
- from->count = bi.bi_size;
+ from->count -= copied;
from->iov_offset = bi.bi_bvec_done;
skb->data_len += copied;
@@ -976,33 +996,20 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb,
int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
{
struct sockaddr_storage __address, *addr = NULL;
- struct io_ring_ctx *ctx = req->ctx;
struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc);
- struct io_notif_slot *notif_slot;
- struct io_kiocb *notif;
struct msghdr msg;
struct iovec iov;
struct socket *sock;
- unsigned msg_flags;
+ unsigned msg_flags, cflags;
int ret, min_ret = 0;
if (!(req->flags & REQ_F_POLLED) &&
(zc->flags & IORING_RECVSEND_POLL_FIRST))
return -EAGAIN;
-
- if (issue_flags & IO_URING_F_UNLOCKED)
- return -EAGAIN;
sock = sock_from_file(req->file);
if (unlikely(!sock))
return -ENOTSOCK;
- notif_slot = io_get_notif_slot(ctx, zc->slot_idx);
- if (!notif_slot)
- return -EINVAL;
- notif = io_get_notif(ctx, notif_slot);
- if (!notif)
- return -ENOMEM;
-
msg.msg_name = NULL;
msg.msg_control = NULL;
msg.msg_controllen = 0;
@@ -1033,7 +1040,7 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
&msg.msg_iter);
if (unlikely(ret))
return ret;
- ret = io_notif_account_mem(notif, zc->len);
+ ret = io_notif_account_mem(zc->notif, zc->len);
if (unlikely(ret))
return ret;
}
@@ -1045,7 +1052,7 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
min_ret = iov_iter_count(&msg.msg_iter);
msg.msg_flags = msg_flags;
- msg.msg_ubuf = &io_notif_to_data(notif)->uarg;
+ msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg;
msg.sg_from_iter = io_sg_from_iter;
ret = sock_sendmsg(sock, &msg);
@@ -1060,18 +1067,22 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
req->flags |= REQ_F_PARTIAL_IO;
return io_setup_async_addr(req, addr, issue_flags);
}
+ if (ret < 0 && !zc->done_io)
+ zc->notif->flags |= REQ_F_CQE_SKIP;
if (ret == -ERESTARTSYS)
ret = -EINTR;
req_set_fail(req);
- } else if (zc->flags & IORING_RECVSEND_NOTIF_FLUSH) {
- io_notif_slot_flush_submit(notif_slot, 0);
}
if (ret >= 0)
ret += zc->done_io;
else if (zc->done_io)
ret = zc->done_io;
- io_req_set_res(req, ret, 0);
+
+ io_notif_flush(zc->notif);
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+ cflags = ret >= 0 ? IORING_CQE_F_MORE : 0;
+ io_req_set_res(req, ret, cflags);
return IOU_OK;
}