From 807abcb0883439af5ead73f3308310453b97b624 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 17 Jul 2020 17:09:27 -0600
Subject: io_uring: ensure double poll additions work with both request types

The double poll additions were centered around doing POLL_ADD on file
descriptors that use more than one waitqueue (typically one for read,
one for write) when being polled. However, it can also end up being
triggered for when we use poll triggered retry. For that case, we cannot
safely use req->io, as that could be used by the request type itself.

Add a second io_poll_iocb pointer in the structure we allocate for poll
based retry, and ensure we use the right one from the two paths.

Fixes: 18bceab101ad ("io_uring: allow POLL_ADD with double poll_wait() users")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 47 ++++++++++++++++++++++++++---------------------
 1 file changed, 26 insertions(+), 21 deletions(-)

(limited to 'fs/io_uring.c')

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 74bc4a04befa..53232ac3da17 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -605,6 +605,7 @@ enum {
 
 struct async_poll {
 	struct io_poll_iocb	poll;
+	struct io_poll_iocb	*double_poll;
 	struct io_wq_work	work;
 };
 
@@ -4159,9 +4160,9 @@ static bool io_poll_rewait(struct io_kiocb *req, struct io_poll_iocb *poll)
 	return false;
 }
 
-static void io_poll_remove_double(struct io_kiocb *req)
+static void io_poll_remove_double(struct io_kiocb *req, void *data)
 {
-	struct io_poll_iocb *poll = (struct io_poll_iocb *) req->io;
+	struct io_poll_iocb *poll = data;
 
 	lockdep_assert_held(&req->ctx->completion_lock);
 
@@ -4181,7 +4182,7 @@ static void io_poll_complete(struct io_kiocb *req, __poll_t mask, int error)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 
-	io_poll_remove_double(req);
+	io_poll_remove_double(req, req->io);
 	req->poll.done = true;
 	io_cqring_fill_event(req, error ? error : mangle_poll(mask));
 	io_commit_cqring(ctx);
@@ -4224,21 +4225,21 @@ static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
 			       int sync, void *key)
 {
 	struct io_kiocb *req = wait->private;
-	struct io_poll_iocb *poll = (struct io_poll_iocb *) req->io;
+	struct io_poll_iocb *poll = req->apoll->double_poll;
 	__poll_t mask = key_to_poll(key);
 
 	/* for instances that support it check for an event match first: */
 	if (mask && !(mask & poll->events))
 		return 0;
 
-	if (req->poll.head) {
+	if (poll && poll->head) {
 		bool done;
 
-		spin_lock(&req->poll.head->lock);
-		done = list_empty(&req->poll.wait.entry);
+		spin_lock(&poll->head->lock);
+		done = list_empty(&poll->wait.entry);
 		if (!done)
-			list_del_init(&req->poll.wait.entry);
-		spin_unlock(&req->poll.head->lock);
+			list_del_init(&poll->wait.entry);
+		spin_unlock(&poll->head->lock);
 		if (!done)
 			__io_async_wake(req, poll, mask, io_poll_task_func);
 	}
@@ -4258,7 +4259,8 @@ static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
 }
 
 static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
-			    struct wait_queue_head *head)
+			    struct wait_queue_head *head,
+			    struct io_poll_iocb **poll_ptr)
 {
 	struct io_kiocb *req = pt->req;
 
@@ -4269,7 +4271,7 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
 	 */
 	if (unlikely(poll->head)) {
 		/* already have a 2nd entry, fail a third attempt */
-		if (req->io) {
+		if (*poll_ptr) {
 			pt->error = -EINVAL;
 			return;
 		}
@@ -4281,7 +4283,7 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
 		io_init_poll_iocb(poll, req->poll.events, io_poll_double_wake);
 		refcount_inc(&req->refs);
 		poll->wait.private = req;
-		req->io = (void *) poll;
+		*poll_ptr = poll;
 	}
 
 	pt->error = 0;
@@ -4293,8 +4295,9 @@ static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
 			       struct poll_table_struct *p)
 {
 	struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
+	struct async_poll *apoll = pt->req->apoll;
 
-	__io_queue_proc(&pt->req->apoll->poll, pt, head);
+	__io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll);
 }
 
 static void io_sq_thread_drop_mm(struct io_ring_ctx *ctx)
@@ -4344,11 +4347,13 @@ static void io_async_task_func(struct callback_head *cb)
 		}
 	}
 
+	io_poll_remove_double(req, apoll->double_poll);
 	spin_unlock_irq(&ctx->completion_lock);
 
 	/* restore ->work in case we need to retry again */
 	if (req->flags & REQ_F_WORK_INITIALIZED)
 		memcpy(&req->work, &apoll->work, sizeof(req->work));
+	kfree(apoll->double_poll);
 	kfree(apoll);
 
 	if (!canceled) {
@@ -4436,7 +4441,6 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
 	struct async_poll *apoll;
 	struct io_poll_table ipt;
 	__poll_t mask, ret;
-	bool had_io;
 
 	if (!req->file || !file_can_poll(req->file))
 		return false;
@@ -4448,11 +4452,11 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
 	apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
 	if (unlikely(!apoll))
 		return false;
+	apoll->double_poll = NULL;
 
 	req->flags |= REQ_F_POLLED;
 	if (req->flags & REQ_F_WORK_INITIALIZED)
 		memcpy(&apoll->work, &req->work, sizeof(req->work));
-	had_io = req->io != NULL;
 
 	io_get_req_task(req);
 	req->apoll = apoll;
@@ -4470,13 +4474,11 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
 	ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask,
 					io_async_wake);
 	if (ret) {
-		ipt.error = 0;
-		/* only remove double add if we did it here */
-		if (!had_io)
-			io_poll_remove_double(req);
+		io_poll_remove_double(req, apoll->double_poll);
 		spin_unlock_irq(&ctx->completion_lock);
 		if (req->flags & REQ_F_WORK_INITIALIZED)
 			memcpy(&req->work, &apoll->work, sizeof(req->work));
+		kfree(apoll->double_poll);
 		kfree(apoll);
 		return false;
 	}
@@ -4507,11 +4509,13 @@ static bool io_poll_remove_one(struct io_kiocb *req)
 	bool do_complete;
 
 	if (req->opcode == IORING_OP_POLL_ADD) {
-		io_poll_remove_double(req);
+		io_poll_remove_double(req, req->io);
 		do_complete = __io_poll_remove_one(req, &req->poll);
 	} else {
 		struct async_poll *apoll = req->apoll;
 
+		io_poll_remove_double(req, apoll->double_poll);
+
 		/* non-poll requests have submit ref still */
 		do_complete = __io_poll_remove_one(req, &apoll->poll);
 		if (do_complete) {
@@ -4524,6 +4528,7 @@ static bool io_poll_remove_one(struct io_kiocb *req)
 			if (req->flags & REQ_F_WORK_INITIALIZED)
 				memcpy(&req->work, &apoll->work,
 				       sizeof(req->work));
+			kfree(apoll->double_poll);
 			kfree(apoll);
 		}
 	}
@@ -4624,7 +4629,7 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
 {
 	struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
 
-	__io_queue_proc(&pt->req->poll, pt, head);
+	__io_queue_proc(&pt->req->poll, pt, head, (struct io_poll_iocb **) &pt->req->io);
 }
 
 static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-- 
cgit v1.2.3-70-g09d2


From 61710e437f2807e26a3402543bdbb7217a9c8620 Mon Sep 17 00:00:00 2001
From: Daniele Albano <d.albano@gmail.com>
Date: Sat, 18 Jul 2020 14:15:16 -0600
Subject: io_uring: always allow drain/link/hardlink/async sqe flags

We currently filter these for timeout_remove/async_cancel/files_update,
but we only should be filtering for fixed file and buffer select. This
also causes a second read of sqe->flags, which isn't needed.

Just check req->flags for the relevant bits. This then allows these
commands to be used in links, for example, like everything else.

Signed-off-by: Daniele Albano <d.albano@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'fs/io_uring.c')

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 53232ac3da17..d99802ac166f 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4737,7 +4737,9 @@ static int io_timeout_remove_prep(struct io_kiocb *req,
 {
 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
 		return -EINVAL;
-	if (sqe->flags || sqe->ioprio || sqe->buf_index || sqe->len)
+	if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
+		return -EINVAL;
+	if (sqe->ioprio || sqe->buf_index || sqe->len)
 		return -EINVAL;
 
 	req->timeout.addr = READ_ONCE(sqe->addr);
@@ -4915,8 +4917,9 @@ static int io_async_cancel_prep(struct io_kiocb *req,
 {
 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
 		return -EINVAL;
-	if (sqe->flags || sqe->ioprio || sqe->off || sqe->len ||
-	    sqe->cancel_flags)
+	if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
+		return -EINVAL;
+	if (sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags)
 		return -EINVAL;
 
 	req->cancel.addr = READ_ONCE(sqe->addr);
@@ -4934,7 +4937,9 @@ static int io_async_cancel(struct io_kiocb *req)
 static int io_files_update_prep(struct io_kiocb *req,
 				const struct io_uring_sqe *sqe)
 {
-	if (sqe->flags || sqe->ioprio || sqe->rw_flags)
+	if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
+		return -EINVAL;
+	if (sqe->ioprio || sqe->rw_flags)
 		return -EINVAL;
 
 	req->files_update.offset = READ_ONCE(sqe->off);
-- 
cgit v1.2.3-70-g09d2


From 3e863ea3bb1a2203ae648eb272db0ce6a1a2072c Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 23 Jul 2020 20:17:20 +0300
Subject: io_uring: missed req_init_async() for IOSQE_ASYNC

IOSQE_ASYNC branch of io_queue_sqe() is another place where an
unitialised req->work can be accessed (i.e. prior io_req_init_async()).
Nothing really bad though, it just looses IO_WQ_WORK_CONCURRENT flag.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs/io_uring.c')

diff --git a/fs/io_uring.c b/fs/io_uring.c
index d99802ac166f..32b0064f806e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5730,6 +5730,7 @@ fail_req:
 		 * Never try inline submit of IOSQE_ASYNC is set, go straight
 		 * to async execution.
 		 */
+		io_req_init_async(req);
 		req->work.flags |= IO_WQ_WORK_CONCURRENT;
 		io_queue_async_work(req);
 	} else {
-- 
cgit v1.2.3-70-g09d2


From d5e16d8e23825304c6a9945116cc6b6f8d51f28c Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Fri, 24 Jul 2020 20:07:20 +0300
Subject: io_uring: fix ->work corruption with poll_add

req->work might be already initialised by the time it gets into
__io_arm_poll_handler(), which will corrupt it by using fields that are
in an union with req->work. Luckily, the only side effect is missing
put_creds(). Clean req->work before going there.

Suggested-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs/io_uring.c')

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 32b0064f806e..98e8079e67e7 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4658,6 +4658,10 @@ static int io_poll_add(struct io_kiocb *req)
 	struct io_poll_table ipt;
 	__poll_t mask;
 
+	/* ->work is in union with hash_node and others */
+	io_req_work_drop_env(req);
+	req->flags &= ~REQ_F_WORK_INITIALIZED;
+
 	INIT_HLIST_NODE(&req->hash_node);
 	INIT_LIST_HEAD(&req->list);
 	ipt.pt._qproc = io_poll_queue_proc;
-- 
cgit v1.2.3-70-g09d2


From 4ae6dbd683860b9edc254ea8acf5e04b5ae242e5 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Fri, 24 Jul 2020 20:07:21 +0300
Subject: io_uring: fix lockup in io_fail_links()

io_fail_links() doesn't consider REQ_F_COMP_LOCKED leading to nested
spin_lock(completion_lock) and lockup.

[  197.680409] rcu: INFO: rcu_preempt detected expedited stalls on
	CPUs/tasks: { 6-... } 18239 jiffies s: 1421 root: 0x40/.
[  197.680411] rcu: blocking rcu_node structures:
[  197.680412] Task dump for CPU 6:
[  197.680413] link-timeout    R  running task        0  1669
	1 0x8000008a
[  197.680414] Call Trace:
[  197.680420]  ? io_req_find_next+0xa0/0x200
[  197.680422]  ? io_put_req_find_next+0x2a/0x50
[  197.680423]  ? io_poll_task_func+0xcf/0x140
[  197.680425]  ? task_work_run+0x67/0xa0
[  197.680426]  ? do_exit+0x35d/0xb70
[  197.680429]  ? syscall_trace_enter+0x187/0x2c0
[  197.680430]  ? do_group_exit+0x43/0xa0
[  197.680448]  ? __x64_sys_exit_group+0x18/0x20
[  197.680450]  ? do_syscall_64+0x52/0xa0
[  197.680452]  ? entry_SYSCALL_64_after_hwframe+0x44/0xa9

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs/io_uring.c')

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 98e8079e67e7..493e5047e67c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4199,10 +4199,9 @@ static void io_poll_task_handler(struct io_kiocb *req, struct io_kiocb **nxt)
 
 	hash_del(&req->hash_node);
 	io_poll_complete(req, req->result, 0);
-	req->flags |= REQ_F_COMP_LOCKED;
-	io_put_req_find_next(req, nxt);
 	spin_unlock_irq(&ctx->completion_lock);
 
+	io_put_req_find_next(req, nxt);
 	io_cqring_ev_posted(ctx);
 }
 
-- 
cgit v1.2.3-70-g09d2