From cf587db2ee0261c74d04f61f39783db88a0b65e4 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 10 Mar 2023 16:03:23 -0600 Subject: kernel: Allow a kernel thread's name to be set in copy_process This patch allows kernel users to pass in the thread name so it can be set during creation instead of having to use set_task_comm after the thread is created. Signed-off-by: Mike Christie Acked-by: Michael S. Tsirkin Signed-off-by: Christian Brauner (Microsoft) --- include/linux/sched/task.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux/sched/task.h') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 357e0068497c..32c9f01af0a6 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -23,6 +23,7 @@ struct kernel_clone_args { int __user *pidfd; int __user *child_tid; int __user *parent_tid; + const char *name; int exit_signal; unsigned long stack; unsigned long stack_size; @@ -91,7 +92,8 @@ extern void exit_itimers(struct task_struct *); extern pid_t kernel_clone(struct kernel_clone_args *kargs); struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node); struct task_struct *fork_idle(int); -extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); +extern pid_t kernel_thread(int (*fn)(void *), void *arg, const char *name, + unsigned long flags); extern pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags); extern long kernel_wait4(pid_t, int __user *, int, struct rusage *); int kernel_wait(pid_t pid, int *stat); -- cgit v1.2.3-70-g09d2 From c81cc5819faf5dd77124f5086aa654482281ac37 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 10 Mar 2023 16:03:25 -0600 Subject: kernel: Make io_thread and kthread bit fields We only set args->io_thread/kthread to 0 or 1 then test if they are set, so make them bit fields. Signed-off-by: Mike Christie Acked-by: Michael S. Tsirkin Signed-off-by: Christian Brauner (Microsoft) --- include/linux/sched/task.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/sched/task.h') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 32c9f01af0a6..268c77a42155 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -25,6 +25,8 @@ struct kernel_clone_args { int __user *parent_tid; const char *name; int exit_signal; + u32 kthread:1; + u32 io_thread:1; unsigned long stack; unsigned long stack_size; unsigned long tls; @@ -32,8 +34,6 @@ struct kernel_clone_args { /* Number of elements in *set_tid */ size_t set_tid_size; int cgroup; - int io_thread; - int kthread; int idle; int (*fn)(void *); void *fn_arg; -- cgit v1.2.3-70-g09d2 From 54e6842d0775ba76db65cbe21311c3ca466e663d Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 10 Mar 2023 16:03:26 -0600 Subject: fork/vm: Move common PF_IO_WORKER behavior to new flag This adds a new flag, PF_USER_WORKER, that's used for behavior common to to both PF_IO_WORKER and users like vhost which will use a new helper instead of create_io_thread because they require different behavior for operations like signal handling. The common behavior PF_USER_WORKER covers is the vm reclaim handling. Signed-off-by: Mike Christie Acked-by: Michael S. Tsirkin Signed-off-by: Christian Brauner (Microsoft) --- include/linux/sched.h | 2 +- include/linux/sched/task.h | 1 + kernel/fork.c | 3 +++ mm/vmscan.c | 4 ++-- 4 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux/sched/task.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 63d242164b1a..e1e605b1255b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1729,7 +1729,7 @@ extern struct pid *cad_pid; #define PF_MEMALLOC 0x00000800 /* Allocating memory */ #define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ -#define PF__HOLE__00004000 0x00004000 +#define PF_USER_WORKER 0x00004000 /* Kernel thread cloned from userspace thread */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ #define PF__HOLE__00010000 0x00010000 #define PF_KSWAPD 0x00020000 /* I am kswapd */ diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 268c77a42155..2950e83d5382 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -27,6 +27,7 @@ struct kernel_clone_args { int exit_signal; u32 kthread:1; u32 io_thread:1; + u32 user_worker:1; unsigned long stack; unsigned long stack_size; unsigned long tls; diff --git a/kernel/fork.c b/kernel/fork.c index 81dfb2532148..fb8ec192c199 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2103,6 +2103,8 @@ static __latent_entropy struct task_struct *copy_process( p->flags &= ~PF_KTHREAD; if (args->kthread) p->flags |= PF_KTHREAD; + if (args->user_worker) + p->flags |= PF_USER_WORKER; if (args->io_thread) { /* * Mark us an IO worker, and block any signal that isn't @@ -2630,6 +2632,7 @@ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node) .fn = fn, .fn_arg = arg, .io_thread = 1, + .user_worker = 1, }; return copy_process(NULL, 0, node, &args); diff --git a/mm/vmscan.c b/mm/vmscan.c index 9c1c5e8b24b8..7ba6bfdd9a5f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1151,12 +1151,12 @@ void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason) DEFINE_WAIT(wait); /* - * Do not throttle IO workers, kthreads other than kswapd or + * Do not throttle user workers, kthreads other than kswapd or * workqueues. They may be required for reclaim to make * forward progress (e.g. journalling workqueues or kthreads). */ if (!current_is_kswapd() && - current->flags & (PF_IO_WORKER|PF_KTHREAD)) { + current->flags & (PF_USER_WORKER|PF_KTHREAD)) { cond_resched(); return; } -- cgit v1.2.3-70-g09d2 From 11f3f500ec8a75c96087f3bed87aa2b1c5de7498 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 10 Mar 2023 16:03:27 -0600 Subject: fork: add kernel_clone_args flag to not dup/clone files Each vhost device gets a thread that is used to perform IO and management operations. Instead of a thread that is accessing a device, the thread is part of the device, so when it creates a thread using a helper based on copy_process we can't dup or clone the parent's files/FDS because it would do an extra increment on ourself. Later, when we do: Qemu process exits: do_exit -> exit_files -> put_files_struct -> close_files we would leak the device's resources because of that extra refcount on the fd or file_struct. This patch adds a no_files option so these worker threads can prevent taking an extra refcount on themselves. Signed-off-by: Mike Christie Acked-by: Christian Brauner Acked-by: Michael S. Tsirkin Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- include/linux/sched/task.h | 1 + kernel/fork.c | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux/sched/task.h') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 2950e83d5382..4f816048794f 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -28,6 +28,7 @@ struct kernel_clone_args { u32 kthread:1; u32 io_thread:1; u32 user_worker:1; + u32 no_files:1; unsigned long stack; unsigned long stack_size; unsigned long tls; diff --git a/kernel/fork.c b/kernel/fork.c index fb8ec192c199..f8d530f4406e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1627,7 +1627,8 @@ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) return 0; } -static int copy_files(unsigned long clone_flags, struct task_struct *tsk) +static int copy_files(unsigned long clone_flags, struct task_struct *tsk, + int no_files) { struct files_struct *oldf, *newf; int error = 0; @@ -1639,6 +1640,11 @@ static int copy_files(unsigned long clone_flags, struct task_struct *tsk) if (!oldf) goto out; + if (no_files) { + tsk->files = NULL; + goto out; + } + if (clone_flags & CLONE_FILES) { atomic_inc(&oldf->count); goto out; @@ -2259,7 +2265,7 @@ static __latent_entropy struct task_struct *copy_process( retval = copy_semundo(clone_flags, p); if (retval) goto bad_fork_cleanup_security; - retval = copy_files(clone_flags, p); + retval = copy_files(clone_flags, p, args->no_files); if (retval) goto bad_fork_cleanup_semundo; retval = copy_fs(clone_flags, p); -- cgit v1.2.3-70-g09d2 From 094717586bf71ac20ae3b240d2654d826634b21e Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 10 Mar 2023 16:03:28 -0600 Subject: fork: Add kernel_clone_args flag to ignore signals Since: commit 10ab825bdef8 ("change kernel threads to ignore signals instead of blocking them") kthreads have been ignoring signals by default, and the vhost layer has never had a need to change that. This patch adds an option flag, USER_WORKER_SIG_IGN, handled in copy_process() after copy_sighand() and copy_signals() so vhost_tasks added in the next patches can continue to ignore singals. Signed-off-by: Christian Brauner Signed-off-by: Mike Christie Acked-by: Michael S. Tsirkin Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- include/linux/sched/task.h | 1 + kernel/fork.c | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include/linux/sched/task.h') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 4f816048794f..00c54bfac0b5 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -29,6 +29,7 @@ struct kernel_clone_args { u32 io_thread:1; u32 user_worker:1; u32 no_files:1; + u32 ignore_signals:1; unsigned long stack; unsigned long stack_size; unsigned long tls; diff --git a/kernel/fork.c b/kernel/fork.c index f8d530f4406e..2918c685ede5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2290,6 +2290,9 @@ static __latent_entropy struct task_struct *copy_process( if (retval) goto bad_fork_cleanup_io; + if (args->ignore_signals) + ignore_signals(p); + stackleak_task_init(p); if (pid != &init_struct_pid) { -- cgit v1.2.3-70-g09d2 From 89c8e98d8cfb0656dbeb648572df5b13e372247d Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 10 Mar 2023 16:03:29 -0600 Subject: fork: allow kernel code to call copy_process The next patch adds helpers like create_io_thread, but for use by the vhost layer. There are several functions, so they are in their own file instead of cluttering up fork.c. This patch allows that new file to call copy_process. Signed-off-by: Mike Christie Acked-by: Michael S. Tsirkin Signed-off-by: Christian Brauner (Microsoft) --- include/linux/sched/task.h | 2 ++ kernel/fork.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux/sched/task.h') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 00c54bfac0b5..537cbf9a2ade 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -93,6 +93,8 @@ extern void exit_files(struct task_struct *); extern void exit_itimers(struct task_struct *); extern pid_t kernel_clone(struct kernel_clone_args *kargs); +struct task_struct *copy_process(struct pid *pid, int trace, int node, + struct kernel_clone_args *args); struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node); struct task_struct *fork_idle(int); extern pid_t kernel_thread(int (*fn)(void *), void *arg, const char *name, diff --git a/kernel/fork.c b/kernel/fork.c index 2918c685ede5..3363f284c6cd 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2016,7 +2016,7 @@ static void rv_task_fork(struct task_struct *p) * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ -static __latent_entropy struct task_struct *copy_process( +__latent_entropy struct task_struct *copy_process( struct pid *pid, int trace, int node, -- cgit v1.2.3-70-g09d2 From f9010dbdce911ee1f1af1398a24b1f9f992e0080 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 1 Jun 2023 13:32:32 -0500 Subject: fork, vhost: Use CLONE_THREAD to fix freezer/ps regression When switching from kthreads to vhost_tasks two bugs were added: 1. The vhost worker tasks's now show up as processes so scripts doing ps or ps a would not incorrectly detect the vhost task as another process. 2. kthreads disabled freeze by setting PF_NOFREEZE, but vhost tasks's didn't disable or add support for them. To fix both bugs, this switches the vhost task to be thread in the process that does the VHOST_SET_OWNER ioctl, and has vhost_worker call get_signal to support SIGKILL/SIGSTOP and freeze signals. Note that SIGKILL/STOP support is required because CLONE_THREAD requires CLONE_SIGHAND which requires those 2 signals to be supported. This is a modified version of the patch written by Mike Christie which was a modified version of patch originally written by Linus. Much of what depended upon PF_IO_WORKER now depends on PF_USER_WORKER. Including ignoring signals, setting up the register state, and having get_signal return instead of calling do_group_exit. Tidied up the vhost_task abstraction so that the definition of vhost_task only needs to be visible inside of vhost_task.c. Making it easier to review the code and tell what needs to be done where. As part of this the main loop has been moved from vhost_worker into vhost_task_fn. vhost_worker now returns true if work was done. The main loop has been updated to call get_signal which handles SIGSTOP, freezing, and collects the message that tells the thread to exit as part of process exit. This collection clears __fatal_signal_pending. This collection is not guaranteed to clear signal_pending() so clear that explicitly so the schedule() sleeps. For now the vhost thread continues to exist and run work until the last file descriptor is closed and the release function is called as part of freeing struct file. To avoid hangs in the coredump rendezvous and when killing threads in a multi-threaded exec. The coredump code and de_thread have been modified to ignore vhost threads. Remvoing the special case for exec appears to require teaching vhost_dev_flush how to directly complete transactions in case the vhost thread is no longer running. Removing the special case for coredump rendezvous requires either the above fix needed for exec or moving the coredump rendezvous into get_signal. Fixes: 6e890c5d5021 ("vhost: use vhost_tasks for worker threads") Signed-off-by: Eric W. Biederman Co-developed-by: Mike Christie Signed-off-by: Mike Christie Acked-by: Michael S. Tsirkin Signed-off-by: Linus Torvalds --- arch/x86/include/asm/fpu/sched.h | 2 +- arch/x86/kernel/fpu/context.h | 2 +- arch/x86/kernel/fpu/core.c | 2 +- drivers/vhost/vhost.c | 22 +++------- fs/coredump.c | 4 +- include/linux/sched/task.h | 1 - include/linux/sched/vhost_task.h | 15 ++----- kernel/exit.c | 5 ++- kernel/fork.c | 13 +++--- kernel/signal.c | 8 ++-- kernel/vhost_task.c | 92 ++++++++++++++++++++++++++-------------- 11 files changed, 89 insertions(+), 77 deletions(-) (limited to 'include/linux/sched/task.h') diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h index c2d6cd78ed0c..78fcde7b1f07 100644 --- a/arch/x86/include/asm/fpu/sched.h +++ b/arch/x86/include/asm/fpu/sched.h @@ -39,7 +39,7 @@ extern void fpu_flush_thread(void); static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) { if (cpu_feature_enabled(X86_FEATURE_FPU) && - !(current->flags & (PF_KTHREAD | PF_IO_WORKER))) { + !(current->flags & (PF_KTHREAD | PF_USER_WORKER))) { save_fpregs_to_fpstate(old_fpu); /* * The save operation preserved register state, so the diff --git a/arch/x86/kernel/fpu/context.h b/arch/x86/kernel/fpu/context.h index 9fcfa5c4dad7..af5cbdd9bd29 100644 --- a/arch/x86/kernel/fpu/context.h +++ b/arch/x86/kernel/fpu/context.h @@ -57,7 +57,7 @@ static inline void fpregs_restore_userregs(void) struct fpu *fpu = ¤t->thread.fpu; int cpu = smp_processor_id(); - if (WARN_ON_ONCE(current->flags & (PF_KTHREAD | PF_IO_WORKER))) + if (WARN_ON_ONCE(current->flags & (PF_KTHREAD | PF_USER_WORKER))) return; if (!fpregs_state_valid(fpu, cpu)) { diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index caf33486dc5e..1015af1ae562 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -426,7 +426,7 @@ void kernel_fpu_begin_mask(unsigned int kfpu_mask) this_cpu_write(in_kernel_fpu, true); - if (!(current->flags & (PF_KTHREAD | PF_IO_WORKER)) && + if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER)) && !test_thread_flag(TIF_NEED_FPU_LOAD)) { set_thread_flag(TIF_NEED_FPU_LOAD); save_fpregs_to_fpstate(¤t->thread.fpu); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index a92af08e7864..074273020849 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -256,7 +256,7 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) * test_and_set_bit() implies a memory barrier. */ llist_add(&work->node, &dev->worker->work_list); - wake_up_process(dev->worker->vtsk->task); + vhost_task_wake(dev->worker->vtsk); } } EXPORT_SYMBOL_GPL(vhost_work_queue); @@ -333,31 +333,19 @@ static void vhost_vq_reset(struct vhost_dev *dev, __vhost_vq_meta_reset(vq); } -static int vhost_worker(void *data) +static bool vhost_worker(void *data) { struct vhost_worker *worker = data; struct vhost_work *work, *work_next; struct llist_node *node; - for (;;) { - /* mb paired w/ kthread_stop */ - set_current_state(TASK_INTERRUPTIBLE); - - if (vhost_task_should_stop(worker->vtsk)) { - __set_current_state(TASK_RUNNING); - break; - } - - node = llist_del_all(&worker->work_list); - if (!node) - schedule(); - + node = llist_del_all(&worker->work_list); + if (node) { node = llist_reverse_order(node); /* make sure flag is seen after deletion */ smp_wmb(); llist_for_each_entry_safe(work, work_next, node, node) { clear_bit(VHOST_WORK_QUEUED, &work->flags); - __set_current_state(TASK_RUNNING); kcov_remote_start_common(worker->kcov_handle); work->fn(work); kcov_remote_stop(); @@ -365,7 +353,7 @@ static int vhost_worker(void *data) } } - return 0; + return !!node; } static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq) diff --git a/fs/coredump.c b/fs/coredump.c index ece7badf701b..88740c51b942 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -371,7 +371,9 @@ static int zap_process(struct task_struct *start, int exit_code) if (t != current && !(t->flags & PF_POSTCOREDUMP)) { sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); - nr++; + /* The vhost_worker does not particpate in coredumps */ + if ((t->flags & (PF_USER_WORKER | PF_IO_WORKER)) != PF_USER_WORKER) + nr++; } } diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 537cbf9a2ade..e0f5ac90a228 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -29,7 +29,6 @@ struct kernel_clone_args { u32 io_thread:1; u32 user_worker:1; u32 no_files:1; - u32 ignore_signals:1; unsigned long stack; unsigned long stack_size; unsigned long tls; diff --git a/include/linux/sched/vhost_task.h b/include/linux/sched/vhost_task.h index 6123c10b99cf..837a23624a66 100644 --- a/include/linux/sched/vhost_task.h +++ b/include/linux/sched/vhost_task.h @@ -2,22 +2,13 @@ #ifndef _LINUX_VHOST_TASK_H #define _LINUX_VHOST_TASK_H -#include -struct task_struct; +struct vhost_task; -struct vhost_task { - int (*fn)(void *data); - void *data; - struct completion exited; - unsigned long flags; - struct task_struct *task; -}; - -struct vhost_task *vhost_task_create(int (*fn)(void *), void *arg, +struct vhost_task *vhost_task_create(bool (*fn)(void *), void *arg, const char *name); void vhost_task_start(struct vhost_task *vtsk); void vhost_task_stop(struct vhost_task *vtsk); -bool vhost_task_should_stop(struct vhost_task *vtsk); +void vhost_task_wake(struct vhost_task *vtsk); #endif diff --git a/kernel/exit.c b/kernel/exit.c index 34b90e2e7cf7..edb50b4c9972 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -411,7 +411,10 @@ static void coredump_task_exit(struct task_struct *tsk) tsk->flags |= PF_POSTCOREDUMP; core_state = tsk->signal->core_state; spin_unlock_irq(&tsk->sighand->siglock); - if (core_state) { + + /* The vhost_worker does not particpate in coredumps */ + if (core_state && + ((tsk->flags & (PF_IO_WORKER | PF_USER_WORKER)) != PF_USER_WORKER)) { struct core_thread self; self.task = current; diff --git a/kernel/fork.c b/kernel/fork.c index ed4e01daccaa..81cba91f30bb 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2336,16 +2336,16 @@ __latent_entropy struct task_struct *copy_process( p->flags &= ~PF_KTHREAD; if (args->kthread) p->flags |= PF_KTHREAD; - if (args->user_worker) - p->flags |= PF_USER_WORKER; - if (args->io_thread) { + if (args->user_worker) { /* - * Mark us an IO worker, and block any signal that isn't + * Mark us a user worker, and block any signal that isn't * fatal or STOP */ - p->flags |= PF_IO_WORKER; + p->flags |= PF_USER_WORKER; siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP)); } + if (args->io_thread) + p->flags |= PF_IO_WORKER; if (args->name) strscpy_pad(p->comm, args->name, sizeof(p->comm)); @@ -2517,9 +2517,6 @@ __latent_entropy struct task_struct *copy_process( if (retval) goto bad_fork_cleanup_io; - if (args->ignore_signals) - ignore_signals(p); - stackleak_task_init(p); if (pid != &init_struct_pid) { diff --git a/kernel/signal.c b/kernel/signal.c index 8f6330f0e9ca..2547fa73bde5 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1368,7 +1368,9 @@ int zap_other_threads(struct task_struct *p) while_each_thread(p, t) { task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); - count++; + /* Don't require de_thread to wait for the vhost_worker */ + if ((t->flags & (PF_IO_WORKER | PF_USER_WORKER)) != PF_USER_WORKER) + count++; /* Don't bother with already dead threads */ if (t->exit_state) @@ -2861,11 +2863,11 @@ relock: } /* - * PF_IO_WORKER threads will catch and exit on fatal signals + * PF_USER_WORKER threads will catch and exit on fatal signals * themselves. They have cleanup that must be performed, so * we cannot call do_exit() on their behalf. */ - if (current->flags & PF_IO_WORKER) + if (current->flags & PF_USER_WORKER) goto out; /* diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c index b7cbd66f889e..f80d5c51ae67 100644 --- a/kernel/vhost_task.c +++ b/kernel/vhost_task.c @@ -12,58 +12,88 @@ enum vhost_task_flags { VHOST_TASK_FLAGS_STOP, }; +struct vhost_task { + bool (*fn)(void *data); + void *data; + struct completion exited; + unsigned long flags; + struct task_struct *task; +}; + static int vhost_task_fn(void *data) { struct vhost_task *vtsk = data; - int ret; + bool dead = false; + + for (;;) { + bool did_work; + + /* mb paired w/ vhost_task_stop */ + if (test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags)) + break; + + if (!dead && signal_pending(current)) { + struct ksignal ksig; + /* + * Calling get_signal will block in SIGSTOP, + * or clear fatal_signal_pending, but remember + * what was set. + * + * This thread won't actually exit until all + * of the file descriptors are closed, and + * the release function is called. + */ + dead = get_signal(&ksig); + if (dead) + clear_thread_flag(TIF_SIGPENDING); + } + + did_work = vtsk->fn(vtsk->data); + if (!did_work) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + } + } - ret = vtsk->fn(vtsk->data); complete(&vtsk->exited); - do_exit(ret); + do_exit(0); +} + +/** + * vhost_task_wake - wakeup the vhost_task + * @vtsk: vhost_task to wake + * + * wake up the vhost_task worker thread + */ +void vhost_task_wake(struct vhost_task *vtsk) +{ + wake_up_process(vtsk->task); } +EXPORT_SYMBOL_GPL(vhost_task_wake); /** * vhost_task_stop - stop a vhost_task * @vtsk: vhost_task to stop * - * Callers must call vhost_task_should_stop and return from their worker - * function when it returns true; + * vhost_task_fn ensures the worker thread exits after + * VHOST_TASK_FLAGS_SOP becomes true. */ void vhost_task_stop(struct vhost_task *vtsk) { - pid_t pid = vtsk->task->pid; - set_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags); - wake_up_process(vtsk->task); + vhost_task_wake(vtsk); /* * Make sure vhost_task_fn is no longer accessing the vhost_task before - * freeing it below. If userspace crashed or exited without closing, - * then the vhost_task->task could already be marked dead so - * kernel_wait will return early. + * freeing it below. */ wait_for_completion(&vtsk->exited); - /* - * If we are just closing/removing a device and the parent process is - * not exiting then reap the task. - */ - kernel_wait4(pid, NULL, __WCLONE, NULL); kfree(vtsk); } EXPORT_SYMBOL_GPL(vhost_task_stop); /** - * vhost_task_should_stop - should the vhost task return from the work function - * @vtsk: vhost_task to stop - */ -bool vhost_task_should_stop(struct vhost_task *vtsk) -{ - return test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags); -} -EXPORT_SYMBOL_GPL(vhost_task_should_stop); - -/** - * vhost_task_create - create a copy of a process to be used by the kernel - * @fn: thread stack + * vhost_task_create - create a copy of a task to be used by the kernel + * @fn: vhost worker function * @arg: data to be passed to fn * @name: the thread's name * @@ -71,17 +101,17 @@ EXPORT_SYMBOL_GPL(vhost_task_should_stop); * failure. The returned task is inactive, and the caller must fire it up * through vhost_task_start(). */ -struct vhost_task *vhost_task_create(int (*fn)(void *), void *arg, +struct vhost_task *vhost_task_create(bool (*fn)(void *), void *arg, const char *name) { struct kernel_clone_args args = { - .flags = CLONE_FS | CLONE_UNTRACED | CLONE_VM, + .flags = CLONE_FS | CLONE_UNTRACED | CLONE_VM | + CLONE_THREAD | CLONE_SIGHAND, .exit_signal = 0, .fn = vhost_task_fn, .name = name, .user_worker = 1, .no_files = 1, - .ignore_signals = 1, }; struct vhost_task *vtsk; struct task_struct *tsk; -- cgit v1.2.3-70-g09d2