summaryrefslogtreecommitdiff
path: root/fs/io_uring.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r--fs/io_uring.c134
1 files changed, 60 insertions, 74 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 0b91b0631173..bb25e3997d41 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -619,6 +619,8 @@ struct io_kiocb {
bool needs_fixed_file;
u8 opcode;
+ u16 buf_index;
+
struct io_ring_ctx *ctx;
struct list_head list;
unsigned int flags;
@@ -680,8 +682,6 @@ struct io_op_def {
unsigned needs_mm : 1;
/* needs req->file assigned */
unsigned needs_file : 1;
- /* needs req->file assigned IFF fd is >= 0 */
- unsigned fd_non_neg : 1;
/* hash wq insertion if file is a regular file */
unsigned hash_reg_file : 1;
/* unbound wq insertion if file is a non-regular file */
@@ -784,8 +784,6 @@ static const struct io_op_def io_op_defs[] = {
.needs_file = 1,
},
[IORING_OP_OPENAT] = {
- .needs_file = 1,
- .fd_non_neg = 1,
.file_table = 1,
.needs_fs = 1,
},
@@ -799,8 +797,6 @@ static const struct io_op_def io_op_defs[] = {
},
[IORING_OP_STATX] = {
.needs_mm = 1,
- .needs_file = 1,
- .fd_non_neg = 1,
.needs_fs = 1,
.file_table = 1,
},
@@ -837,8 +833,6 @@ static const struct io_op_def io_op_defs[] = {
.buffer_select = 1,
},
[IORING_OP_OPENAT2] = {
- .needs_file = 1,
- .fd_non_neg = 1,
.file_table = 1,
.needs_fs = 1,
},
@@ -932,6 +926,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
goto err;
ctx->flags = p->flags;
+ init_waitqueue_head(&ctx->sqo_wait);
init_waitqueue_head(&ctx->cq_wait);
INIT_LIST_HEAD(&ctx->cq_overflow_list);
init_completion(&ctx->completions[0]);
@@ -1402,10 +1397,6 @@ static void io_free_req_many(struct io_ring_ctx *ctx, struct req_batch *rb)
for (i = 0; i < rb->to_free; i++) {
struct io_kiocb *req = rb->reqs[i];
- if (req->flags & REQ_F_FIXED_FILE) {
- req->file = NULL;
- percpu_ref_put(req->fixed_file_refs);
- }
if (req->flags & REQ_F_INFLIGHT)
inflight++;
__io_req_aux_free(req);
@@ -1678,7 +1669,7 @@ static inline bool io_req_multi_free(struct req_batch *rb, struct io_kiocb *req)
if ((req->flags & REQ_F_LINK_HEAD) || io_is_fallback_req(req))
return false;
- if (!(req->flags & REQ_F_FIXED_FILE) || req->io)
+ if (req->file || req->io)
rb->need_iter++;
rb->reqs[rb->to_free++] = req;
@@ -2112,9 +2103,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
req->rw.addr = READ_ONCE(sqe->addr);
req->rw.len = READ_ONCE(sqe->len);
- /* we own ->private, reuse it for the buffer index / buffer ID */
- req->rw.kiocb.private = (void *) (unsigned long)
- READ_ONCE(sqe->buf_index);
+ req->buf_index = READ_ONCE(sqe->buf_index);
return 0;
}
@@ -2157,7 +2146,7 @@ static ssize_t io_import_fixed(struct io_kiocb *req, int rw,
struct io_ring_ctx *ctx = req->ctx;
size_t len = req->rw.len;
struct io_mapped_ubuf *imu;
- unsigned index, buf_index;
+ u16 index, buf_index;
size_t offset;
u64 buf_addr;
@@ -2165,7 +2154,7 @@ static ssize_t io_import_fixed(struct io_kiocb *req, int rw,
if (unlikely(!ctx->user_bufs))
return -EFAULT;
- buf_index = (unsigned long) req->rw.kiocb.private;
+ buf_index = req->buf_index;
if (unlikely(buf_index >= ctx->nr_user_bufs))
return -EFAULT;
@@ -2281,10 +2270,10 @@ static void __user *io_rw_buffer_select(struct io_kiocb *req, size_t *len,
bool needs_lock)
{
struct io_buffer *kbuf;
- int bgid;
+ u16 bgid;
kbuf = (struct io_buffer *) (unsigned long) req->rw.addr;
- bgid = (int) (unsigned long) req->rw.kiocb.private;
+ bgid = req->buf_index;
kbuf = io_buffer_select(req, len, bgid, kbuf, needs_lock);
if (IS_ERR(kbuf))
return kbuf;
@@ -2375,7 +2364,7 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
}
/* buffer index only valid with fixed read/write, or buffer select */
- if (req->rw.kiocb.private && !(req->flags & REQ_F_BUFFER_SELECT))
+ if (req->buf_index && !(req->flags & REQ_F_BUFFER_SELECT))
return -EINVAL;
if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE) {
@@ -2775,16 +2764,16 @@ static int io_splice(struct io_kiocb *req, bool force_nonblock)
struct file *out = sp->file_out;
unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
loff_t *poff_in, *poff_out;
- long ret;
+ long ret = 0;
if (force_nonblock)
return -EAGAIN;
poff_in = (sp->off_in == -1) ? NULL : &sp->off_in;
poff_out = (sp->off_out == -1) ? NULL : &sp->off_out;
- ret = do_splice(in, poff_in, out, poff_out, sp->len, flags);
- if (force_nonblock && ret == -EAGAIN)
- return -EAGAIN;
+
+ if (sp->len)
+ ret = do_splice(in, poff_in, out, poff_out, sp->len, flags);
io_put_file(req, in, (sp->flags & SPLICE_F_FD_IN_FIXED));
req->flags &= ~REQ_F_NEED_CLEANUP;
@@ -4146,12 +4135,14 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
req->result = mask;
init_task_work(&req->task_work, func);
/*
- * If this fails, then the task is exiting. Punt to one of the io-wq
- * threads to ensure the work gets run, we can't always rely on exit
- * cancelation taking care of this.
+ * If this fails, then the task is exiting. When a task exits, the
+ * work gets canceled, so just cancel this request as well instead
+ * of executing it. We can't safely execute it anyway, as we may not
+ * have the needed state needed for it anyway.
*/
ret = task_work_add(tsk, &req->task_work, true);
if (unlikely(ret)) {
+ WRITE_ONCE(poll->canceled, true);
tsk = io_wq_get_task(req->ctx->io_wq);
task_work_add(tsk, &req->task_work, true);
}
@@ -5022,12 +5013,13 @@ static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (!req_need_defer(req) && list_empty_careful(&ctx->defer_list))
return 0;
- if (!req->io && io_alloc_async_ctx(req))
- return -EAGAIN;
-
- ret = io_req_defer_prep(req, sqe);
- if (ret < 0)
- return ret;
+ if (!req->io) {
+ if (io_alloc_async_ctx(req))
+ return -EAGAIN;
+ ret = io_req_defer_prep(req, sqe);
+ if (ret < 0)
+ return ret;
+ }
spin_lock_irq(&ctx->completion_lock);
if (!req_need_defer(req) && list_empty(&ctx->defer_list)) {
@@ -5314,7 +5306,8 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
return ret;
- if (ctx->flags & IORING_SETUP_IOPOLL) {
+ /* If the op doesn't have a file, we're not polling for it */
+ if ((ctx->flags & IORING_SETUP_IOPOLL) && req->file) {
const bool in_async = io_wq_current_is_worker();
if (req->result == -EAGAIN)
@@ -5368,15 +5361,6 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
io_steal_work(req, workptr);
}
-static int io_req_needs_file(struct io_kiocb *req, int fd)
-{
- if (!io_op_defs[req->opcode].needs_file)
- return 0;
- if ((fd == -1 || fd == AT_FDCWD) && io_op_defs[req->opcode].fd_non_neg)
- return 0;
- return 1;
-}
-
static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
int index)
{
@@ -5414,14 +5398,11 @@ static int io_file_get(struct io_submit_state *state, struct io_kiocb *req,
}
static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
- int fd, unsigned int flags)
+ int fd)
{
bool fixed;
- if (!io_req_needs_file(req, fd))
- return 0;
-
- fixed = (flags & IOSQE_FIXED_FILE);
+ fixed = (req->flags & REQ_F_FIXED_FILE) != 0;
if (unlikely(!fixed && req->needs_fixed_file))
return -EBADF;
@@ -5627,9 +5608,15 @@ fail_req:
io_double_put_req(req);
}
} else if (req->flags & REQ_F_FORCE_ASYNC) {
- ret = io_req_defer_prep(req, sqe);
- if (unlikely(ret < 0))
- goto fail_req;
+ if (!req->io) {
+ ret = -EAGAIN;
+ if (io_alloc_async_ctx(req))
+ goto fail_req;
+ ret = io_req_defer_prep(req, sqe);
+ if (unlikely(ret < 0))
+ goto fail_req;
+ }
+
/*
* Never try inline submit of IOSQE_ASYNC is set, go straight
* to async execution.
@@ -5798,7 +5785,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
struct io_submit_state *state, bool async)
{
unsigned int sqe_flags;
- int id, fd;
+ int id;
/*
* All io need record the previous position, if LINK vs DARIN,
@@ -5850,8 +5837,10 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
IOSQE_ASYNC | IOSQE_FIXED_FILE |
IOSQE_BUFFER_SELECT | IOSQE_IO_LINK);
- fd = READ_ONCE(sqe->fd);
- return io_req_set_file(state, req, fd, sqe_flags);
+ if (!io_op_defs[req->opcode].needs_file)
+ return 0;
+
+ return io_req_set_file(state, req, READ_ONCE(sqe->fd));
}
static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
@@ -6043,6 +6032,7 @@ static int io_sq_thread(void *data)
finish_wait(&ctx->sqo_wait, &wait);
ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP;
+ ret = 0;
continue;
}
finish_wait(&ctx->sqo_wait, &wait);
@@ -6856,7 +6846,6 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
{
int ret;
- init_waitqueue_head(&ctx->sqo_wait);
mmgrab(current->mm);
ctx->sqo_mm = current->mm;
@@ -7360,11 +7349,9 @@ static int io_uring_release(struct inode *inode, struct file *file)
static void io_uring_cancel_files(struct io_ring_ctx *ctx,
struct files_struct *files)
{
- struct io_kiocb *req;
- DEFINE_WAIT(wait);
-
while (!list_empty_careful(&ctx->inflight_list)) {
- struct io_kiocb *cancel_req = NULL;
+ struct io_kiocb *cancel_req = NULL, *req;
+ DEFINE_WAIT(wait);
spin_lock_irq(&ctx->inflight_lock);
list_for_each_entry(req, &ctx->inflight_list, inflight_entry) {
@@ -7404,6 +7391,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
*/
if (refcount_sub_and_test(2, &cancel_req->refs)) {
io_put_req(cancel_req);
+ finish_wait(&ctx->inflight_wait, &wait);
continue;
}
}
@@ -7411,8 +7399,8 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
io_wq_cancel_work(ctx->io_wq, &cancel_req->work);
io_put_req(cancel_req);
schedule();
+ finish_wait(&ctx->inflight_wait, &wait);
}
- finish_wait(&ctx->inflight_wait, &wait);
}
static int io_uring_flush(struct file *file, void *data)
@@ -7761,7 +7749,8 @@ err:
return ret;
}
-static int io_uring_create(unsigned entries, struct io_uring_params *p)
+static int io_uring_create(unsigned entries, struct io_uring_params *p,
+ struct io_uring_params __user *params)
{
struct user_struct *user = NULL;
struct io_ring_ctx *ctx;
@@ -7853,6 +7842,14 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p)
p->cq_off.overflow = offsetof(struct io_rings, cq_overflow);
p->cq_off.cqes = offsetof(struct io_rings, cqes);
+ p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
+ IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
+ IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL;
+
+ if (copy_to_user(params, p, sizeof(*p))) {
+ ret = -EFAULT;
+ goto err;
+ }
/*
* Install ring fd as the very last thing, so we don't risk someone
* having closed it before we finish setup
@@ -7861,9 +7858,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p)
if (ret < 0)
goto err;
- p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
- IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
- IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL;
trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
return ret;
err:
@@ -7879,7 +7873,6 @@ err:
static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
{
struct io_uring_params p;
- long ret;
int i;
if (copy_from_user(&p, params, sizeof(p)))
@@ -7894,14 +7887,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ))
return -EINVAL;
- ret = io_uring_create(entries, &p);
- if (ret < 0)
- return ret;
-
- if (copy_to_user(params, &p, sizeof(p)))
- return -EFAULT;
-
- return ret;
+ return io_uring_create(entries, &p, params);
}
SYSCALL_DEFINE2(io_uring_setup, u32, entries,