From 133e2d3e81de5d9706cab2dd1d52d231c27382e5 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sun, 12 Jun 2022 23:07:22 -0700 Subject: fs/exec: allow to unshare a time namespace on vfork+exec Right now, a new process can't be forked in another time namespace if it shares mm with its parent. It is prohibited, because each time namespace has its own vvar page that is mapped into a process address space. When a process calls exec, it gets a new mm and so it could be "legal" to switch time namespace in that case. This was not implemented and now if we want to do this, we need to add another clone flag to not break backward compatibility. We don't have any user requests to switch times on exec except the vfork+exec combination, so there is no reason to add a new clone flag. As for vfork+exec, this should be safe to allow switching timens with the current clone flag. Right now, vfork (CLONE_VFORK | CLONE_VM) fails if a child is forked into another time namespace. With this change, vfork creates a new process in parent's timens, and the following exec does the actual switch to the target time namespace. Suggested-by: Florian Weimer Signed-off-by: Andrei Vagin Acked-by: Christian Brauner (Microsoft) Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220613060723.197407-1-avagin@gmail.com --- fs/exec.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 0989fb8472a1..347e8f55bc2b 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -65,6 +65,7 @@ #include #include #include +#include #include #include @@ -982,10 +983,12 @@ static int exec_mmap(struct mm_struct *mm) { struct task_struct *tsk; struct mm_struct *old_mm, *active_mm; + bool vfork; int ret; /* Notify parent that we're no longer interested in the old VM */ tsk = current; + vfork = !!tsk->vfork_done; old_mm = current->mm; exec_mm_release(tsk, old_mm); if (old_mm) @@ -1030,6 +1033,10 @@ static int exec_mmap(struct mm_struct *mm) tsk->mm->vmacache_seqnum = 0; vmacache_flush(tsk); task_unlock(tsk); + + if (vfork) + timens_on_fork(tsk->nsproxy, tsk); + if (old_mm) { mmap_read_unlock(old_mm); BUG_ON(active_mm != old_mm); -- cgit v1.2.3-70-g09d2 From 5036793d7dbd0b14aec51526441a50b01c7bf66d Mon Sep 17 00:00:00 2001 From: Zhang Jiaming Date: Wed, 29 Jun 2022 15:29:32 +0800 Subject: exec: Fix a spelling mistake Change 'wont't' to 'won't'. Signed-off-by: Zhang Jiaming Reviewed-by: Souptick Joarder (HPE) Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220629072932.27506-1-jiaming@nfschina.com --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 347e8f55bc2b..0fc56c70c870 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1156,7 +1156,7 @@ static int de_thread(struct task_struct *tsk) /* * We are going to release_task()->ptrace_unlink() silently, * the tracer can sleep in do_wait(). EXIT_DEAD guarantees - * the tracer wont't block again waiting for this thread. + * the tracer won't block again waiting for this thread. */ if (unlikely(leader->ptrace)) __wake_up_parent(leader, leader->parent); -- cgit v1.2.3-70-g09d2 From c6e8e36c6ae4b11bed5643317afb66b6c3cadba8 Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Sun, 24 Jul 2022 23:25:23 +0200 Subject: exec: Call kmap_local_page() in copy_string_kernel() The use of kmap_atomic() is being deprecated in favor of kmap_local_page(). With kmap_local_page(), the mappings are per thread, CPU local and not globally visible. Furthermore, the mappings can be acquired from any context (including interrupts). Therefore, replace kmap_atomic() with kmap_local_page() in copy_string_kernel(). Instead of open-coding local mapping + memcpy(), use memcpy_to_page(). Delete a redundant call to flush_dcache_page(). Tested with xfstests on a QEMU/ KVM x86_32 VM, 6GB RAM, booting a kernel with HIGHMEM64GB enabled. Suggested-by: Ira Weiny Signed-off-by: Fabio M. De Francesco Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220724212523.13317-1-fmdefrancesco@gmail.com --- fs/exec.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 0fc56c70c870..7e842d1bcb67 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -631,7 +631,6 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm) unsigned int bytes_to_copy = min_t(unsigned int, len, min_not_zero(offset_in_page(pos), PAGE_SIZE)); struct page *page; - char *kaddr; pos -= bytes_to_copy; arg -= bytes_to_copy; @@ -640,11 +639,8 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm) page = get_arg_page(bprm, pos, 1); if (!page) return -E2BIG; - kaddr = kmap_atomic(page); flush_arg_page(bprm, pos & PAGE_MASK, page); - memcpy(kaddr + offset_in_page(pos), arg, bytes_to_copy); - flush_dcache_page(page); - kunmap_atomic(kaddr); + memcpy_to_page(page, offset_in_page(pos), arg, bytes_to_copy); put_arg_page(page); } -- cgit v1.2.3-70-g09d2