diff options
Diffstat (limited to 'fs/io_uring.c')
| -rw-r--r-- | fs/io_uring.c | 93 | 
1 files changed, 70 insertions, 23 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c index 0cac361bf6b8..bf548af0426c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1279,8 +1279,17 @@ static void io_prep_async_link(struct io_kiocb *req)  {  	struct io_kiocb *cur; -	io_for_each_link(cur, req) -		io_prep_async_work(cur); +	if (req->flags & REQ_F_LINK_TIMEOUT) { +		struct io_ring_ctx *ctx = req->ctx; + +		spin_lock_irq(&ctx->completion_lock); +		io_for_each_link(cur, req) +			io_prep_async_work(cur); +		spin_unlock_irq(&ctx->completion_lock); +	} else { +		io_for_each_link(cur, req) +			io_prep_async_work(cur); +	}  }  static void io_queue_async_work(struct io_kiocb *req) @@ -1294,6 +1303,17 @@ static void io_queue_async_work(struct io_kiocb *req)  	/* init ->work of the whole link before punting */  	io_prep_async_link(req); + +	/* +	 * Not expected to happen, but if we do have a bug where this _can_ +	 * happen, catch it here and ensure the request is marked as +	 * canceled. That will make io-wq go through the usual work cancel +	 * procedure rather than attempt to run this request (or create a new +	 * worker for it). +	 */ +	if (WARN_ON_ONCE(!same_thread_group(req->task, current))) +		req->work.flags |= IO_WQ_WORK_CANCEL; +  	trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,  					&req->work, req->flags);  	io_wq_enqueue(tctx->io_wq, &req->work); @@ -1939,9 +1959,13 @@ static void tctx_task_work(struct callback_head *cb)  			node = next;  		}  		if (wq_list_empty(&tctx->task_list)) { +			spin_lock_irq(&tctx->task_lock);  			clear_bit(0, &tctx->task_state); -			if (wq_list_empty(&tctx->task_list)) +			if (wq_list_empty(&tctx->task_list)) { +				spin_unlock_irq(&tctx->task_lock);  				break; +			} +			spin_unlock_irq(&tctx->task_lock);  			/* another tctx_task_work() is enqueued, yield */  			if (test_and_set_bit(0, &tctx->task_state))  				break; @@ -2036,6 +2060,12 @@ static void io_req_task_queue(struct io_kiocb *req)  	io_req_task_work_add(req);  } +static void io_req_task_queue_reissue(struct io_kiocb *req) +{ +	req->io_task_work.func = io_queue_async_work; +	io_req_task_work_add(req); +} +  static inline void io_queue_next(struct io_kiocb *req)  {  	struct io_kiocb *nxt = io_req_find_next(req); @@ -2205,7 +2235,7 @@ static inline bool io_run_task_work(void)   * Find and free completed poll iocbs   */  static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, -			       struct list_head *done) +			       struct list_head *done, bool resubmit)  {  	struct req_batch rb;  	struct io_kiocb *req; @@ -2220,11 +2250,11 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,  		req = list_first_entry(done, struct io_kiocb, inflight_entry);  		list_del(&req->inflight_entry); -		if (READ_ONCE(req->result) == -EAGAIN && +		if (READ_ONCE(req->result) == -EAGAIN && resubmit &&  		    !(req->flags & REQ_F_DONT_REISSUE)) {  			req->iopoll_completed = 0;  			req_ref_get(req); -			io_queue_async_work(req); +			io_req_task_queue_reissue(req);  			continue;  		} @@ -2244,7 +2274,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,  }  static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, -			long min) +			long min, bool resubmit)  {  	struct io_kiocb *req, *tmp;  	LIST_HEAD(done); @@ -2287,7 +2317,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,  	}  	if (!list_empty(&done)) -		io_iopoll_complete(ctx, nr_events, &done); +		io_iopoll_complete(ctx, nr_events, &done, resubmit);  	return ret;  } @@ -2305,7 +2335,7 @@ static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx)  	while (!list_empty(&ctx->iopoll_list)) {  		unsigned int nr_events = 0; -		io_do_iopoll(ctx, &nr_events, 0); +		io_do_iopoll(ctx, &nr_events, 0, false);  		/* let it sleep and repeat later if can't complete a request */  		if (nr_events == 0) @@ -2367,7 +2397,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)  			    list_empty(&ctx->iopoll_list))  				break;  		} -		ret = io_do_iopoll(ctx, &nr_events, min); +		ret = io_do_iopoll(ctx, &nr_events, min, true);  	} while (!ret && nr_events < min && !need_resched());  out:  	mutex_unlock(&ctx->uring_lock); @@ -2417,6 +2447,12 @@ static bool io_rw_should_reissue(struct io_kiocb *req)  	 */  	if (percpu_ref_is_dying(&ctx->refs))  		return false; +	/* +	 * Play it safe and assume not safe to re-import and reissue if we're +	 * not in the original thread group (or in task context). +	 */ +	if (!same_thread_group(req->task, current) || !in_task()) +		return false;  	return true;  }  #else @@ -2747,7 +2783,7 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,  		req->flags &= ~REQ_F_REISSUE;  		if (io_resubmit_prep(req)) {  			req_ref_get(req); -			io_queue_async_work(req); +			io_req_task_queue_reissue(req);  		} else {  			int cflags = 0; @@ -4802,6 +4838,7 @@ IO_NETOP_FN(recv);  struct io_poll_table {  	struct poll_table_struct pt;  	struct io_kiocb *req; +	int nr_entries;  	int error;  }; @@ -4902,7 +4939,6 @@ static bool io_poll_complete(struct io_kiocb *req, __poll_t mask)  	if (req->poll.events & EPOLLONESHOT)  		flags = 0;  	if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) { -		io_poll_remove_waitqs(req);  		req->poll.done = true;  		flags = 0;  	} @@ -4925,6 +4961,7 @@ static void io_poll_task_func(struct io_kiocb *req)  		done = io_poll_complete(req, req->result);  		if (done) { +			io_poll_remove_double(req);  			hash_del(&req->hash_node);  		} else {  			req->result = 0; @@ -4995,11 +5032,11 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,  	struct io_kiocb *req = pt->req;  	/* -	 * If poll->head is already set, it's because the file being polled -	 * uses multiple waitqueues for poll handling (eg one for read, one -	 * for write). Setup a separate io_poll_iocb if this happens. +	 * The file being polled uses multiple waitqueues for poll handling +	 * (e.g. one for read, one for write). Setup a separate io_poll_iocb +	 * if this happens.  	 */ -	if (unlikely(poll->head)) { +	if (unlikely(pt->nr_entries)) {  		struct io_poll_iocb *poll_one = poll;  		/* already have a 2nd entry, fail a third attempt */ @@ -5027,7 +5064,7 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,  		*poll_ptr = poll;  	} -	pt->error = 0; +	pt->nr_entries++;  	poll->head = head;  	if (poll->events & EPOLLEXCLUSIVE) @@ -5104,11 +5141,16 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req,  	ipt->pt._key = mask;  	ipt->req = req; -	ipt->error = -EINVAL; +	ipt->error = 0; +	ipt->nr_entries = 0;  	mask = vfs_poll(req->file, &ipt->pt) & poll->events; +	if (unlikely(!ipt->nr_entries) && !ipt->error) +		ipt->error = -EINVAL;  	spin_lock_irq(&ctx->completion_lock); +	if (ipt->error || (mask && (poll->events & EPOLLONESHOT))) +		io_poll_remove_double(req);  	if (likely(poll->head)) {  		spin_lock(&poll->head->lock);  		if (unlikely(list_empty(&poll->wait.entry))) { @@ -5179,7 +5221,6 @@ static int io_arm_poll_handler(struct io_kiocb *req)  	ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask,  					io_async_wake);  	if (ret || ipt.error) { -		io_poll_remove_double(req);  		spin_unlock_irq(&ctx->completion_lock);  		if (ret)  			return IO_APOLL_READY; @@ -6792,7 +6833,7 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)  		mutex_lock(&ctx->uring_lock);  		if (!list_empty(&ctx->iopoll_list)) -			io_do_iopoll(ctx, &nr_events, 0); +			io_do_iopoll(ctx, &nr_events, 0, true);  		/*  		 * Don't submit if refs are dying, good for io_uring_register(), @@ -7899,15 +7940,19 @@ static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx,  	struct io_wq_data data;  	unsigned int concurrency; +	mutex_lock(&ctx->uring_lock);  	hash = ctx->hash_map;  	if (!hash) {  		hash = kzalloc(sizeof(*hash), GFP_KERNEL); -		if (!hash) +		if (!hash) { +			mutex_unlock(&ctx->uring_lock);  			return ERR_PTR(-ENOMEM); +		}  		refcount_set(&hash->refs, 1);  		init_waitqueue_head(&hash->wait);  		ctx->hash_map = hash;  	} +	mutex_unlock(&ctx->uring_lock);  	data.hash = hash;  	data.task = task; @@ -7981,9 +8026,11 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,  		f = fdget(p->wq_fd);  		if (!f.file)  			return -ENXIO; -		fdput(f); -		if (f.file->f_op != &io_uring_fops) +		if (f.file->f_op != &io_uring_fops) { +			fdput(f);  			return -EINVAL; +		} +		fdput(f);  	}  	if (ctx->flags & IORING_SETUP_SQPOLL) {  		struct task_struct *tsk;  | 
