diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-05-13 12:23:17 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-05-13 12:23:17 -0700 |
commit | f4e8d80292859809ea135e9f4c43bae47e4f58bc (patch) | |
tree | dcf96c36c22e526b3ae9898e65429cb8f6a551fe | |
parent | ef31ea6c2774c015946d2ffa26795766f7caaa42 (diff) | |
parent | 3a93daea2fb27fcefa85662654ba583a5d0c7231 (diff) |
Merge tag 'vfs-6.10.rw' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs rw iterator updates from Christian Brauner:
"The core fs signalfd, userfaultfd, and timerfd subsystems did still
use f_op->read() instead of f_op->read_iter(). Convert them over since
we should aim to get rid of f_op->read() at some point.
Aside from that io_uring and others want to mark files as FMODE_NOWAIT
so it can make use of per-IO nonblocking hints to enable more
efficient IO. Converting those users to f_op->read_iter() allows them
to be marked with FMODE_NOWAIT"
* tag 'vfs-6.10.rw' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
signalfd: convert to ->read_iter()
userfaultfd: convert to ->read_iter()
timerfd: convert to ->read_iter()
new helper: copy_to_iter_full()
-rw-r--r-- | fs/signalfd.c | 44 | ||||
-rw-r--r-- | fs/timerfd.c | 36 | ||||
-rw-r--r-- | fs/userfaultfd.c | 44 | ||||
-rw-r--r-- | include/linux/uio.h | 10 | ||||
-rw-r--r-- | include/net/udp.h | 9 |
5 files changed, 93 insertions, 50 deletions
diff --git a/fs/signalfd.c b/fs/signalfd.c index e20d1484c663..4a5614442dbf 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -68,8 +68,7 @@ static __poll_t signalfd_poll(struct file *file, poll_table *wait) /* * Copied from copy_siginfo_to_user() in kernel/signal.c */ -static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, - kernel_siginfo_t const *kinfo) +static int signalfd_copyinfo(struct iov_iter *to, kernel_siginfo_t const *kinfo) { struct signalfd_siginfo new; @@ -146,10 +145,10 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, break; } - if (copy_to_user(uinfo, &new, sizeof(struct signalfd_siginfo))) + if (!copy_to_iter_full(&new, sizeof(struct signalfd_siginfo), to)) return -EFAULT; - return sizeof(*uinfo); + return sizeof(struct signalfd_siginfo); } static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info, @@ -199,28 +198,27 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info * error code. The "count" parameter must be at least the size of a * "struct signalfd_siginfo". */ -static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count, - loff_t *ppos) +static ssize_t signalfd_read_iter(struct kiocb *iocb, struct iov_iter *to) { + struct file *file = iocb->ki_filp; struct signalfd_ctx *ctx = file->private_data; - struct signalfd_siginfo __user *siginfo; - int nonblock = file->f_flags & O_NONBLOCK; + size_t count = iov_iter_count(to); ssize_t ret, total = 0; kernel_siginfo_t info; + bool nonblock; count /= sizeof(struct signalfd_siginfo); if (!count) return -EINVAL; - siginfo = (struct signalfd_siginfo __user *) buf; + nonblock = file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT; do { ret = signalfd_dequeue(ctx, &info, nonblock); if (unlikely(ret <= 0)) break; - ret = signalfd_copyinfo(siginfo, &info); + ret = signalfd_copyinfo(to, &info); if (ret < 0) break; - siginfo++; total += ret; nonblock = 1; } while (--count); @@ -246,7 +244,7 @@ static const struct file_operations signalfd_fops = { #endif .release = signalfd_release, .poll = signalfd_poll, - .read = signalfd_read, + .read_iter = signalfd_read_iter, .llseek = noop_llseek, }; @@ -265,20 +263,34 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags) signotset(mask); if (ufd == -1) { + struct file *file; + ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->sigmask = *mask; + ufd = get_unused_fd_flags(flags & O_CLOEXEC); + if (ufd < 0) { + kfree(ctx); + return ufd; + } + + file = anon_inode_getfile("[signalfd]", &signalfd_fops, ctx, + O_RDWR | (flags & O_NONBLOCK)); + if (IS_ERR(file)) { + put_unused_fd(ufd); + kfree(ctx); + return ufd; + } + file->f_mode |= FMODE_NOWAIT; + /* * When we call this, the initialization must be complete, since * anon_inode_getfd() will install the fd. */ - ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx, - O_RDWR | (flags & (O_CLOEXEC | O_NONBLOCK))); - if (ufd < 0) - kfree(ctx); + fd_install(ufd, file); } else { struct fd f = fdget(ufd); if (!f.file) diff --git a/fs/timerfd.c b/fs/timerfd.c index e9c96a0c79f1..4bf2f8bfec11 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -262,17 +262,18 @@ static __poll_t timerfd_poll(struct file *file, poll_table *wait) return events; } -static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, - loff_t *ppos) +static ssize_t timerfd_read_iter(struct kiocb *iocb, struct iov_iter *to) { + struct file *file = iocb->ki_filp; struct timerfd_ctx *ctx = file->private_data; ssize_t res; u64 ticks = 0; - if (count < sizeof(ticks)) + if (iov_iter_count(to) < sizeof(ticks)) return -EINVAL; + spin_lock_irq(&ctx->wqh.lock); - if (file->f_flags & O_NONBLOCK) + if (file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT) res = -EAGAIN; else res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks); @@ -312,8 +313,11 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, ctx->ticks = 0; } spin_unlock_irq(&ctx->wqh.lock); - if (ticks) - res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks); + if (ticks) { + res = copy_to_iter(&ticks, sizeof(ticks), to); + if (!res) + res = -EFAULT; + } return res; } @@ -384,7 +388,7 @@ static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg static const struct file_operations timerfd_fops = { .release = timerfd_release, .poll = timerfd_poll, - .read = timerfd_read, + .read_iter = timerfd_read_iter, .llseek = noop_llseek, .show_fdinfo = timerfd_show, .unlocked_ioctl = timerfd_ioctl, @@ -407,6 +411,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) { int ufd; struct timerfd_ctx *ctx; + struct file *file; /* Check the TFD_* constants for consistency. */ BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC); @@ -443,11 +448,22 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) ctx->moffs = ktime_mono_to_real(0); - ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, - O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); - if (ufd < 0) + ufd = get_unused_fd_flags(flags & TFD_SHARED_FCNTL_FLAGS); + if (ufd < 0) { + kfree(ctx); + return ufd; + } + + file = anon_inode_getfile("[timerfd]", &timerfd_fops, ctx, + O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); + if (IS_ERR(file)) { + put_unused_fd(ufd); kfree(ctx); + return PTR_ERR(file); + } + file->f_mode |= FMODE_NOWAIT; + fd_install(ufd, file); return ufd; } diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 292f5fd50104..2a564f813314 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -31,6 +31,7 @@ #include <linux/hugetlb.h> #include <linux/swapops.h> #include <linux/miscdevice.h> +#include <linux/uio.h> static int sysctl_unprivileged_userfaultfd __read_mostly; @@ -282,7 +283,7 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx, /* * Verify the pagetables are still not ok after having reigstered into * the fault_pending_wqh to avoid userland having to UFFDIO_WAKE any - * userfault that has already been resolved, if userfaultfd_read and + * userfault that has already been resolved, if userfaultfd_read_iter and * UFFDIO_COPY|ZEROPAGE are being run simultaneously on two different * threads. */ @@ -1181,34 +1182,34 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, return ret; } -static ssize_t userfaultfd_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) +static ssize_t userfaultfd_read_iter(struct kiocb *iocb, struct iov_iter *to) { + struct file *file = iocb->ki_filp; struct userfaultfd_ctx *ctx = file->private_data; ssize_t _ret, ret = 0; struct uffd_msg msg; - int no_wait = file->f_flags & O_NONBLOCK; struct inode *inode = file_inode(file); + bool no_wait; if (!userfaultfd_is_initialized(ctx)) return -EINVAL; + no_wait = file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT; for (;;) { - if (count < sizeof(msg)) + if (iov_iter_count(to) < sizeof(msg)) return ret ? ret : -EINVAL; _ret = userfaultfd_ctx_read(ctx, no_wait, &msg, inode); if (_ret < 0) return ret ? ret : _ret; - if (copy_to_user((__u64 __user *) buf, &msg, sizeof(msg))) + _ret = !copy_to_iter_full(&msg, sizeof(msg), to); + if (_ret) return ret ? ret : -EFAULT; ret += sizeof(msg); - buf += sizeof(msg); - count -= sizeof(msg); /* * Allow to read more than one fault at time but only * block if waiting for the very first one. */ - no_wait = O_NONBLOCK; + no_wait = true; } } @@ -2176,7 +2177,7 @@ static const struct file_operations userfaultfd_fops = { #endif .release = userfaultfd_release, .poll = userfaultfd_poll, - .read = userfaultfd_read, + .read_iter = userfaultfd_read_iter, .unlocked_ioctl = userfaultfd_ioctl, .compat_ioctl = compat_ptr_ioctl, .llseek = noop_llseek, @@ -2196,6 +2197,7 @@ static void init_once_userfaultfd_ctx(void *mem) static int new_userfaultfd(int flags) { struct userfaultfd_ctx *ctx; + struct file *file; int fd; BUG_ON(!current->mm); @@ -2219,16 +2221,26 @@ static int new_userfaultfd(int flags) init_rwsem(&ctx->map_changing_lock); atomic_set(&ctx->mmap_changing, 0); ctx->mm = current->mm; - /* prevent the mm struct to be freed */ - mmgrab(ctx->mm); + + fd = get_unused_fd_flags(flags & UFFD_SHARED_FCNTL_FLAGS); + if (fd < 0) + goto err_out; /* Create a new inode so that the LSM can block the creation. */ - fd = anon_inode_create_getfd("[userfaultfd]", &userfaultfd_fops, ctx, + file = anon_inode_create_getfile("[userfaultfd]", &userfaultfd_fops, ctx, O_RDONLY | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL); - if (fd < 0) { - mmdrop(ctx->mm); - kmem_cache_free(userfaultfd_ctx_cachep, ctx); + if (IS_ERR(file)) { + put_unused_fd(fd); + fd = PTR_ERR(file); + goto err_out; } + /* prevent the mm struct to be freed */ + mmgrab(ctx->mm); + file->f_mode |= FMODE_NOWAIT; + fd_install(fd, file); + return fd; +err_out: + kmem_cache_free(userfaultfd_ctx_cachep, ctx); return fd; } diff --git a/include/linux/uio.h b/include/linux/uio.h index 00cebe2b70de..7020adedfa08 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -206,6 +206,16 @@ size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) } static __always_inline __must_check +bool copy_to_iter_full(const void *addr, size_t bytes, struct iov_iter *i) +{ + size_t copied = copy_to_iter(addr, bytes, i); + if (likely(copied == bytes)) + return true; + iov_iter_revert(i, copied); + return false; +} + +static __always_inline __must_check bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) { size_t copied = copy_from_iter(addr, bytes, i); diff --git a/include/net/udp.h b/include/net/udp.h index 488a6d2babcc..c4e05b14b648 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -379,14 +379,7 @@ static inline bool udp_skb_is_linear(struct sk_buff *skb) static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, struct iov_iter *to) { - int n; - - n = copy_to_iter(skb->data + off, len, to); - if (n == len) - return 0; - - iov_iter_revert(to, n); - return -EFAULT; + return copy_to_iter_full(skb->data + off, len, to) ? 0 : -EFAULT; } /* |