diff options
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/block_dev.c | 12 | ||||
| -rw-r--r-- | fs/io_uring.c | 439 | ||||
| -rw-r--r-- | fs/iomap.c | 12 | 
3 files changed, 230 insertions, 233 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c index e9faa52bb489..78d3257435c0 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -336,12 +336,14 @@ static void blkdev_bio_end_io(struct bio *bio)  	if (should_dirty) {  		bio_check_pages_dirty(bio);  	} else { -		struct bio_vec *bvec; -		int i; -		struct bvec_iter_all iter_all; +		if (!bio_flagged(bio, BIO_NO_PAGE_REF)) { +			struct bvec_iter_all iter_all; +			struct bio_vec *bvec; +			int i; -		bio_for_each_segment_all(bvec, bio, i, iter_all) -			put_page(bvec->bv_page); +			bio_for_each_segment_all(bvec, bio, i, iter_all) +				put_page(bvec->bv_page); +		}  		bio_put(bio);  	}  } diff --git a/fs/io_uring.c b/fs/io_uring.c index c88088d92613..6aaa30580a2b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -189,17 +189,28 @@ struct sqe_submit {  	bool				needs_fixed_file;  }; +/* + * First field must be the file pointer in all the + * iocb unions! See also 'struct kiocb' in <linux/fs.h> + */  struct io_poll_iocb {  	struct file			*file;  	struct wait_queue_head		*head;  	__poll_t			events; -	bool				woken; +	bool				done;  	bool				canceled;  	struct wait_queue_entry		wait;  }; +/* + * NOTE! Each of the iocb union members has the file pointer + * as the first entry in their struct definition. So you can + * access the file pointer through any of the sub-structs, + * or directly as just 'ki_filp' in this struct. + */  struct io_kiocb {  	union { +		struct file		*file;  		struct kiocb		rw;  		struct io_poll_iocb	poll;  	}; @@ -214,6 +225,7 @@ struct io_kiocb {  #define REQ_F_IOPOLL_COMPLETED	2	/* polled IO has completed */  #define REQ_F_FIXED_FILE	4	/* ctx owns file */  #define REQ_F_SEQ_PREV		8	/* sequential with previous */ +#define REQ_F_PREPPED		16	/* prep already done */  	u64			user_data;  	u64			error; @@ -355,20 +367,25 @@ static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,  	}  } -static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 ki_user_data, +static void io_cqring_ev_posted(struct io_ring_ctx *ctx) +{ +	if (waitqueue_active(&ctx->wait)) +		wake_up(&ctx->wait); +	if (waitqueue_active(&ctx->sqo_wait)) +		wake_up(&ctx->sqo_wait); +} + +static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 user_data,  				long res, unsigned ev_flags)  {  	unsigned long flags;  	spin_lock_irqsave(&ctx->completion_lock, flags); -	io_cqring_fill_event(ctx, ki_user_data, res, ev_flags); +	io_cqring_fill_event(ctx, user_data, res, ev_flags);  	io_commit_cqring(ctx);  	spin_unlock_irqrestore(&ctx->completion_lock, flags); -	if (waitqueue_active(&ctx->wait)) -		wake_up(&ctx->wait); -	if (waitqueue_active(&ctx->sqo_wait)) -		wake_up(&ctx->sqo_wait); +	io_cqring_ev_posted(ctx);  }  static void io_ring_drop_ctx_refs(struct io_ring_ctx *ctx, unsigned refs) @@ -382,13 +399,14 @@ static void io_ring_drop_ctx_refs(struct io_ring_ctx *ctx, unsigned refs)  static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,  				   struct io_submit_state *state)  { +	gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;  	struct io_kiocb *req;  	if (!percpu_ref_tryget(&ctx->refs))  		return NULL;  	if (!state) { -		req = kmem_cache_alloc(req_cachep, __GFP_NOWARN); +		req = kmem_cache_alloc(req_cachep, gfp);  		if (unlikely(!req))  			goto out;  	} else if (!state->free_reqs) { @@ -396,10 +414,18 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,  		int ret;  		sz = min_t(size_t, state->ios_left, ARRAY_SIZE(state->reqs)); -		ret = kmem_cache_alloc_bulk(req_cachep, __GFP_NOWARN, sz, -						state->reqs); -		if (unlikely(ret <= 0)) -			goto out; +		ret = kmem_cache_alloc_bulk(req_cachep, gfp, sz, state->reqs); + +		/* +		 * Bulk alloc is all-or-nothing. If we fail to get a batch, +		 * retry single alloc to be on the safe side. +		 */ +		if (unlikely(ret <= 0)) { +			state->reqs[0] = kmem_cache_alloc(req_cachep, gfp); +			if (!state->reqs[0]) +				goto out; +			ret = 1; +		}  		state->free_reqs = ret - 1;  		state->cur_req = 1;  		req = state->reqs[0]; @@ -411,7 +437,8 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,  	req->ctx = ctx;  	req->flags = 0; -	refcount_set(&req->refs, 0); +	/* one is dropped after submission, the other at completion */ +	refcount_set(&req->refs, 2);  	return req;  out:  	io_ring_drop_ctx_refs(ctx, 1); @@ -429,10 +456,16 @@ static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr)  static void io_free_req(struct io_kiocb *req)  { -	if (!refcount_read(&req->refs) || refcount_dec_and_test(&req->refs)) { -		io_ring_drop_ctx_refs(req->ctx, 1); -		kmem_cache_free(req_cachep, req); -	} +	if (req->file && !(req->flags & REQ_F_FIXED_FILE)) +		fput(req->file); +	io_ring_drop_ctx_refs(req->ctx, 1); +	kmem_cache_free(req_cachep, req); +} + +static void io_put_req(struct io_kiocb *req) +{ +	if (refcount_dec_and_test(&req->refs)) +		io_free_req(req);  }  /* @@ -442,44 +475,34 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,  			       struct list_head *done)  {  	void *reqs[IO_IOPOLL_BATCH]; -	int file_count, to_free; -	struct file *file = NULL;  	struct io_kiocb *req; +	int to_free; -	file_count = to_free = 0; +	to_free = 0;  	while (!list_empty(done)) {  		req = list_first_entry(done, struct io_kiocb, list);  		list_del(&req->list);  		io_cqring_fill_event(ctx, req->user_data, req->error, 0); - -		reqs[to_free++] = req;  		(*nr_events)++; -		/* -		 * Batched puts of the same file, to avoid dirtying the -		 * file usage count multiple times, if avoidable. -		 */ -		if (!(req->flags & REQ_F_FIXED_FILE)) { -			if (!file) { -				file = req->rw.ki_filp; -				file_count = 1; -			} else if (file == req->rw.ki_filp) { -				file_count++; +		if (refcount_dec_and_test(&req->refs)) { +			/* If we're not using fixed files, we have to pair the +			 * completion part with the file put. Use regular +			 * completions for those, only batch free for fixed +			 * file. +			 */ +			if (req->flags & REQ_F_FIXED_FILE) { +				reqs[to_free++] = req; +				if (to_free == ARRAY_SIZE(reqs)) +					io_free_req_many(ctx, reqs, &to_free);  			} else { -				fput_many(file, file_count); -				file = req->rw.ki_filp; -				file_count = 1; +				io_free_req(req);  			}  		} - -		if (to_free == ARRAY_SIZE(reqs)) -			io_free_req_many(ctx, reqs, &to_free);  	} -	io_commit_cqring(ctx); -	if (file) -		fput_many(file, file_count); +	io_commit_cqring(ctx);  	io_free_req_many(ctx, reqs, &to_free);  } @@ -602,21 +625,14 @@ static void kiocb_end_write(struct kiocb *kiocb)  	}  } -static void io_fput(struct io_kiocb *req) -{ -	if (!(req->flags & REQ_F_FIXED_FILE)) -		fput(req->rw.ki_filp); -} -  static void io_complete_rw(struct kiocb *kiocb, long res, long res2)  {  	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);  	kiocb_end_write(kiocb); -	io_fput(req);  	io_cqring_add_event(req->ctx, req->user_data, res, 0); -	io_free_req(req); +	io_put_req(req);  }  static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) @@ -731,31 +747,18 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,  	const struct io_uring_sqe *sqe = s->sqe;  	struct io_ring_ctx *ctx = req->ctx;  	struct kiocb *kiocb = &req->rw; -	unsigned ioprio, flags; -	int fd, ret; +	unsigned ioprio; +	int ret; +	if (!req->file) +		return -EBADF;  	/* For -EAGAIN retry, everything is already prepped */ -	if (kiocb->ki_filp) +	if (req->flags & REQ_F_PREPPED)  		return 0; -	flags = READ_ONCE(sqe->flags); -	fd = READ_ONCE(sqe->fd); +	if (force_nonblock && !io_file_supports_async(req->file)) +		force_nonblock = false; -	if (flags & IOSQE_FIXED_FILE) { -		if (unlikely(!ctx->user_files || -		    (unsigned) fd >= ctx->nr_user_files)) -			return -EBADF; -		kiocb->ki_filp = ctx->user_files[fd]; -		req->flags |= REQ_F_FIXED_FILE; -	} else { -		if (s->needs_fixed_file) -			return -EBADF; -		kiocb->ki_filp = io_file_get(state, fd); -		if (unlikely(!kiocb->ki_filp)) -			return -EBADF; -		if (force_nonblock && !io_file_supports_async(kiocb->ki_filp)) -			force_nonblock = false; -	}  	kiocb->ki_pos = READ_ONCE(sqe->off);  	kiocb->ki_flags = iocb_flags(kiocb->ki_filp);  	kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp)); @@ -764,7 +767,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,  	if (ioprio) {  		ret = ioprio_check_cap(ioprio);  		if (ret) -			goto out_fput; +			return ret;  		kiocb->ki_ioprio = ioprio;  	} else @@ -772,38 +775,26 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,  	ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));  	if (unlikely(ret)) -		goto out_fput; +		return ret;  	if (force_nonblock) {  		kiocb->ki_flags |= IOCB_NOWAIT;  		req->flags |= REQ_F_FORCE_NONBLOCK;  	}  	if (ctx->flags & IORING_SETUP_IOPOLL) { -		ret = -EOPNOTSUPP;  		if (!(kiocb->ki_flags & IOCB_DIRECT) ||  		    !kiocb->ki_filp->f_op->iopoll) -			goto out_fput; +			return -EOPNOTSUPP;  		req->error = 0;  		kiocb->ki_flags |= IOCB_HIPRI;  		kiocb->ki_complete = io_complete_rw_iopoll;  	} else { -		if (kiocb->ki_flags & IOCB_HIPRI) { -			ret = -EINVAL; -			goto out_fput; -		} +		if (kiocb->ki_flags & IOCB_HIPRI) +			return -EINVAL;  		kiocb->ki_complete = io_complete_rw;  	} +	req->flags |= REQ_F_PREPPED;  	return 0; -out_fput: -	if (!(flags & IOSQE_FIXED_FILE)) { -		/* -		 * in case of error, we didn't use this file reference. drop it. -		 */ -		if (state) -			state->used_refs--; -		io_file_put(state, kiocb->ki_filp); -	} -	return ret;  }  static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret) @@ -864,6 +855,9 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw,  	iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len);  	if (offset)  		iov_iter_advance(iter, offset); + +	/* don't drop a reference to these pages */ +	iter->type |= ITER_BVEC_FLAG_NO_REF;  	return 0;  } @@ -887,7 +881,7 @@ static int io_import_iovec(struct io_ring_ctx *ctx, int rw,  	opcode = READ_ONCE(sqe->opcode);  	if (opcode == IORING_OP_READ_FIXED ||  	    opcode == IORING_OP_WRITE_FIXED) { -		ssize_t ret = io_import_fixed(ctx, rw, sqe, iter); +		int ret = io_import_fixed(ctx, rw, sqe, iter);  		*iovec = NULL;  		return ret;  	} @@ -945,31 +939,29 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len)  	async_list->io_end = io_end;  } -static ssize_t io_read(struct io_kiocb *req, const struct sqe_submit *s, -		       bool force_nonblock, struct io_submit_state *state) +static int io_read(struct io_kiocb *req, const struct sqe_submit *s, +		   bool force_nonblock, struct io_submit_state *state)  {  	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;  	struct kiocb *kiocb = &req->rw;  	struct iov_iter iter;  	struct file *file;  	size_t iov_count; -	ssize_t ret; +	int ret;  	ret = io_prep_rw(req, s, force_nonblock, state);  	if (ret)  		return ret;  	file = kiocb->ki_filp; -	ret = -EBADF;  	if (unlikely(!(file->f_mode & FMODE_READ))) -		goto out_fput; -	ret = -EINVAL; +		return -EBADF;  	if (unlikely(!file->f_op->read_iter)) -		goto out_fput; +		return -EINVAL;  	ret = io_import_iovec(req->ctx, READ, s, &iovec, &iter);  	if (ret) -		goto out_fput; +		return ret;  	iov_count = iov_iter_count(&iter);  	ret = rw_verify_area(READ, file, &kiocb->ki_pos, iov_count); @@ -991,38 +983,32 @@ static ssize_t io_read(struct io_kiocb *req, const struct sqe_submit *s,  		}  	}  	kfree(iovec); -out_fput: -	/* Hold on to the file for -EAGAIN */ -	if (unlikely(ret && ret != -EAGAIN)) -		io_fput(req);  	return ret;  } -static ssize_t io_write(struct io_kiocb *req, const struct sqe_submit *s, -			bool force_nonblock, struct io_submit_state *state) +static int io_write(struct io_kiocb *req, const struct sqe_submit *s, +		    bool force_nonblock, struct io_submit_state *state)  {  	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;  	struct kiocb *kiocb = &req->rw;  	struct iov_iter iter;  	struct file *file;  	size_t iov_count; -	ssize_t ret; +	int ret;  	ret = io_prep_rw(req, s, force_nonblock, state);  	if (ret)  		return ret; -	ret = -EBADF;  	file = kiocb->ki_filp;  	if (unlikely(!(file->f_mode & FMODE_WRITE))) -		goto out_fput; -	ret = -EINVAL; +		return -EBADF;  	if (unlikely(!file->f_op->write_iter)) -		goto out_fput; +		return -EINVAL;  	ret = io_import_iovec(req->ctx, WRITE, s, &iovec, &iter);  	if (ret) -		goto out_fput; +		return ret;  	iov_count = iov_iter_count(&iter); @@ -1054,10 +1040,6 @@ static ssize_t io_write(struct io_kiocb *req, const struct sqe_submit *s,  	}  out_free:  	kfree(iovec); -out_fput: -	/* Hold on to the file for -EAGAIN */ -	if (unlikely(ret && ret != -EAGAIN)) -		io_fput(req);  	return ret;  } @@ -1072,29 +1054,19 @@ static int io_nop(struct io_kiocb *req, u64 user_data)  	if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))  		return -EINVAL; -	/* -	 * Twilight zone - it's possible that someone issued an opcode that -	 * has a file attached, then got -EAGAIN on submission, and changed -	 * the sqe before we retried it from async context. Avoid dropping -	 * a file reference for this malicious case, and flag the error. -	 */ -	if (req->rw.ki_filp) { -		err = -EBADF; -		io_fput(req); -	}  	io_cqring_add_event(ctx, user_data, err, 0); -	io_free_req(req); +	io_put_req(req);  	return 0;  }  static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)  {  	struct io_ring_ctx *ctx = req->ctx; -	unsigned flags; -	int fd; -	/* Prep already done */ -	if (req->rw.ki_filp) +	if (!req->file) +		return -EBADF; +	/* Prep already done (EAGAIN retry) */ +	if (req->flags & REQ_F_PREPPED)  		return 0;  	if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) @@ -1102,20 +1074,7 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)  	if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))  		return -EINVAL; -	fd = READ_ONCE(sqe->fd); -	flags = READ_ONCE(sqe->flags); - -	if (flags & IOSQE_FIXED_FILE) { -		if (unlikely(!ctx->user_files || fd >= ctx->nr_user_files)) -			return -EBADF; -		req->rw.ki_filp = ctx->user_files[fd]; -		req->flags |= REQ_F_FIXED_FILE; -	} else { -		req->rw.ki_filp = fget(fd); -		if (unlikely(!req->rw.ki_filp)) -			return -EBADF; -	} - +	req->flags |= REQ_F_PREPPED;  	return 0;  } @@ -1144,9 +1103,8 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,  				end > 0 ? end : LLONG_MAX,  				fsync_flags & IORING_FSYNC_DATASYNC); -	io_fput(req);  	io_cqring_add_event(req->ctx, sqe->user_data, ret, 0); -	io_free_req(req); +	io_put_req(req);  	return 0;  } @@ -1204,15 +1162,16 @@ static int io_poll_remove(struct io_kiocb *req, const struct io_uring_sqe *sqe)  	spin_unlock_irq(&ctx->completion_lock);  	io_cqring_add_event(req->ctx, sqe->user_data, ret, 0); -	io_free_req(req); +	io_put_req(req);  	return 0;  } -static void io_poll_complete(struct io_kiocb *req, __poll_t mask) +static void io_poll_complete(struct io_ring_ctx *ctx, struct io_kiocb *req, +			     __poll_t mask)  { -	io_cqring_add_event(req->ctx, req->user_data, mangle_poll(mask), 0); -	io_fput(req); -	io_free_req(req); +	req->poll.done = true; +	io_cqring_fill_event(ctx, req->user_data, mangle_poll(mask), 0); +	io_commit_cqring(ctx);  }  static void io_poll_complete_work(struct work_struct *work) @@ -1240,9 +1199,11 @@ static void io_poll_complete_work(struct work_struct *work)  		return;  	}  	list_del_init(&req->list); +	io_poll_complete(ctx, req, mask);  	spin_unlock_irq(&ctx->completion_lock); -	io_poll_complete(req, mask); +	io_cqring_ev_posted(ctx); +	io_put_req(req);  }  static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, @@ -1253,29 +1214,25 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,  	struct io_kiocb *req = container_of(poll, struct io_kiocb, poll);  	struct io_ring_ctx *ctx = req->ctx;  	__poll_t mask = key_to_poll(key); - -	poll->woken = true; +	unsigned long flags;  	/* for instances that support it check for an event match first: */ -	if (mask) { -		unsigned long flags; +	if (mask && !(mask & poll->events)) +		return 0; -		if (!(mask & poll->events)) -			return 0; +	list_del_init(&poll->wait.entry); -		/* try to complete the iocb inline if we can: */ -		if (spin_trylock_irqsave(&ctx->completion_lock, flags)) { -			list_del(&req->list); -			spin_unlock_irqrestore(&ctx->completion_lock, flags); +	if (mask && spin_trylock_irqsave(&ctx->completion_lock, flags)) { +		list_del(&req->list); +		io_poll_complete(ctx, req, mask); +		spin_unlock_irqrestore(&ctx->completion_lock, flags); -			list_del_init(&poll->wait.entry); -			io_poll_complete(req, mask); -			return 1; -		} +		io_cqring_ev_posted(ctx); +		io_put_req(req); +	} else { +		queue_work(ctx->sqo_wq, &req->work);  	} -	list_del_init(&poll->wait.entry); -	queue_work(ctx->sqo_wq, &req->work);  	return 1;  } @@ -1305,36 +1262,23 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)  	struct io_poll_iocb *poll = &req->poll;  	struct io_ring_ctx *ctx = req->ctx;  	struct io_poll_table ipt; -	unsigned flags; +	bool cancel = false;  	__poll_t mask;  	u16 events; -	int fd;  	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))  		return -EINVAL;  	if (sqe->addr || sqe->ioprio || sqe->off || sqe->len || sqe->buf_index)  		return -EINVAL; +	if (!poll->file) +		return -EBADF;  	INIT_WORK(&req->work, io_poll_complete_work);  	events = READ_ONCE(sqe->poll_events);  	poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP; -	flags = READ_ONCE(sqe->flags); -	fd = READ_ONCE(sqe->fd); - -	if (flags & IOSQE_FIXED_FILE) { -		if (unlikely(!ctx->user_files || fd >= ctx->nr_user_files)) -			return -EBADF; -		poll->file = ctx->user_files[fd]; -		req->flags |= REQ_F_FIXED_FILE; -	} else { -		poll->file = fget(fd); -	} -	if (unlikely(!poll->file)) -		return -EBADF; -  	poll->head = NULL; -	poll->woken = false; +	poll->done = false;  	poll->canceled = false;  	ipt.pt._qproc = io_poll_queue_proc; @@ -1346,56 +1290,44 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)  	INIT_LIST_HEAD(&poll->wait.entry);  	init_waitqueue_func_entry(&poll->wait, io_poll_wake); -	/* one for removal from waitqueue, one for this function */ -	refcount_set(&req->refs, 2); -  	mask = vfs_poll(poll->file, &ipt.pt) & poll->events; -	if (unlikely(!poll->head)) { -		/* we did not manage to set up a waitqueue, done */ -		goto out; -	}  	spin_lock_irq(&ctx->completion_lock); -	spin_lock(&poll->head->lock); -	if (poll->woken) { -		/* wake_up context handles the rest */ -		mask = 0; +	if (likely(poll->head)) { +		spin_lock(&poll->head->lock); +		if (unlikely(list_empty(&poll->wait.entry))) { +			if (ipt.error) +				cancel = true; +			ipt.error = 0; +			mask = 0; +		} +		if (mask || ipt.error) +			list_del_init(&poll->wait.entry); +		else if (cancel) +			WRITE_ONCE(poll->canceled, true); +		else if (!poll->done) /* actually waiting for an event */ +			list_add_tail(&req->list, &ctx->cancel_list); +		spin_unlock(&poll->head->lock); +	} +	if (mask) { /* no async, we'd stolen it */ +		req->error = mangle_poll(mask);  		ipt.error = 0; -	} else if (mask || ipt.error) { -		/* if we get an error or a mask we are done */ -		WARN_ON_ONCE(list_empty(&poll->wait.entry)); -		list_del_init(&poll->wait.entry); -	} else { -		/* actually waiting for an event */ -		list_add_tail(&req->list, &ctx->cancel_list); +		io_poll_complete(ctx, req, mask);  	} -	spin_unlock(&poll->head->lock);  	spin_unlock_irq(&ctx->completion_lock); -out: -	if (unlikely(ipt.error)) { -		if (!(flags & IOSQE_FIXED_FILE)) -			fput(poll->file); -		/* -		 * Drop one of our refs to this req, __io_submit_sqe() will -		 * drop the other one since we're returning an error. -		 */ -		io_free_req(req); -		return ipt.error; +	if (mask) { +		io_cqring_ev_posted(ctx); +		io_put_req(req);  	} - -	if (mask) -		io_poll_complete(req, mask); -	io_free_req(req); -	return 0; +	return ipt.error;  }  static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,  			   const struct sqe_submit *s, bool force_nonblock,  			   struct io_submit_state *state)  { -	ssize_t ret; -	int opcode; +	int ret, opcode;  	if (unlikely(s->index >= ctx->sq_entries))  		return -EINVAL; @@ -1524,10 +1456,13 @@ restart:  					break;  				cond_resched();  			} while (1); + +			/* drop submission reference */ +			io_put_req(req);  		}  		if (ret) {  			io_cqring_add_event(ctx, sqe->user_data, ret, 0); -			io_free_req(req); +			io_put_req(req);  		}  		/* async context always use a copy of the sqe */ @@ -1614,11 +1549,55 @@ static bool io_add_to_prev_work(struct async_list *list, struct io_kiocb *req)  	return ret;  } +static bool io_op_needs_file(const struct io_uring_sqe *sqe) +{ +	int op = READ_ONCE(sqe->opcode); + +	switch (op) { +	case IORING_OP_NOP: +	case IORING_OP_POLL_REMOVE: +		return false; +	default: +		return true; +	} +} + +static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s, +			   struct io_submit_state *state, struct io_kiocb *req) +{ +	unsigned flags; +	int fd; + +	flags = READ_ONCE(s->sqe->flags); +	fd = READ_ONCE(s->sqe->fd); + +	if (!io_op_needs_file(s->sqe)) { +		req->file = NULL; +		return 0; +	} + +	if (flags & IOSQE_FIXED_FILE) { +		if (unlikely(!ctx->user_files || +		    (unsigned) fd >= ctx->nr_user_files)) +			return -EBADF; +		req->file = ctx->user_files[fd]; +		req->flags |= REQ_F_FIXED_FILE; +	} else { +		if (s->needs_fixed_file) +			return -EBADF; +		req->file = io_file_get(state, fd); +		if (unlikely(!req->file)) +			return -EBADF; +	} + +	return 0; +} +  static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,  			 struct io_submit_state *state)  {  	struct io_kiocb *req; -	ssize_t ret; +	int ret;  	/* enforce forwards compatibility on users */  	if (unlikely(s->sqe->flags & ~IOSQE_FIXED_FILE)) @@ -1628,7 +1607,9 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,  	if (unlikely(!req))  		return -EAGAIN; -	req->rw.ki_filp = NULL; +	ret = io_req_set_file(ctx, s, state, req); +	if (unlikely(ret)) +		goto out;  	ret = __io_submit_sqe(ctx, req, s, true, state);  	if (ret == -EAGAIN) { @@ -1649,11 +1630,23 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,  				INIT_WORK(&req->work, io_sq_wq_submit_work);  				queue_work(ctx->sqo_wq, &req->work);  			} -			ret = 0; + +			/* +			 * Queued up for async execution, worker will release +			 * submit reference when the iocb is actually +			 * submitted. +			 */ +			return 0;  		}  	} + +out: +	/* drop submission reference */ +	io_put_req(req); + +	/* and drop final reference, if we failed */  	if (ret) -		io_free_req(req); +		io_put_req(req);  	return ret;  } diff --git a/fs/iomap.c b/fs/iomap.c index 97cb9d486a7d..abdd18e404f8 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -1589,12 +1589,14 @@ static void iomap_dio_bio_end_io(struct bio *bio)  	if (should_dirty) {  		bio_check_pages_dirty(bio);  	} else { -		struct bio_vec *bvec; -		int i; -		struct bvec_iter_all iter_all; +		if (!bio_flagged(bio, BIO_NO_PAGE_REF)) { +			struct bvec_iter_all iter_all; +			struct bio_vec *bvec; +			int i; -		bio_for_each_segment_all(bvec, bio, i, iter_all) -			put_page(bvec->bv_page); +			bio_for_each_segment_all(bvec, bio, i, iter_all) +				put_page(bvec->bv_page); +		}  		bio_put(bio);  	}  }  | 
