summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorChristian Brauner <brauner@kernel.org>2024-11-27 12:45:02 +0100
committerChristian Brauner <brauner@kernel.org>2024-11-27 12:51:30 +0100
commit3b832035387ff508fdcf0fba66701afc78f79e3d (patch)
tree4369eadb61eb63f75302ddc5ad9bc36794f1a0e0 /kernel
parent7eef7e306d3c40a0c5b9ff6adc9b273cc894dbd5 (diff)
Revert "fs: don't block i_writecount during exec"
This reverts commit 2a010c41285345da60cece35575b4e0af7e7bf44. Rui Ueyama <rui314@gmail.com> writes: > I'm the creator and the maintainer of the mold linker > (https://github.com/rui314/mold). Recently, we discovered that mold > started causing process crashes in certain situations due to a change > in the Linux kernel. Here are the details: > > - In general, overwriting an existing file is much faster than > creating an empty file and writing to it on Linux, so mold attempts to > reuse an existing executable file if it exists. > > - If a program is running, opening the executable file for writing > previously failed with ETXTBSY. If that happens, mold falls back to > creating a new file. > > - However, the Linux kernel recently changed the behavior so that > writing to an executable file is now always permitted > (https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=2a010c412853). > > That caused mold to write to an executable file even if there's a > process running that file. Since changes to mmap'ed files are > immediately visible to other processes, any processes running that > file would almost certainly crash in a very mysterious way. > Identifying the cause of these random crashes took us a few days. > > Rejecting writes to an executable file that is currently running is a > well-known behavior, and Linux had operated that way for a very long > time. So, I don’t believe relying on this behavior was our mistake; > rather, I see this as a regression in the Linux kernel. Quoting myself from commit 2a010c412853 ("fs: don't block i_writecount during exec") > Yes, someone in userspace could potentially be relying on this. It's not > completely out of the realm of possibility but let's find out if that's > actually the case and not guess. It seems we found out that someone is relying on this obscure behavior. So revert the change. Link: https://github.com/rui314/mold/issues/1361 Link: https://lore.kernel.org/r/4a2bc207-76be-4715-8e12-7fc45a76a125@leemhuis.info Cc: <stable@vger.kernel.org> Signed-off-by: Christian Brauner <brauner@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/fork.c26
1 files changed, 23 insertions, 3 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index f253e81d0c28..1450b461d196 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -621,6 +621,12 @@ static void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm)
exe_file = get_mm_exe_file(oldmm);
RCU_INIT_POINTER(mm->exe_file, exe_file);
+ /*
+ * We depend on the oldmm having properly denied write access to the
+ * exe_file already.
+ */
+ if (exe_file && deny_write_access(exe_file))
+ pr_warn_once("deny_write_access() failed in %s\n", __func__);
}
#ifdef CONFIG_MMU
@@ -1413,11 +1419,20 @@ int set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
*/
old_exe_file = rcu_dereference_raw(mm->exe_file);
- if (new_exe_file)
+ if (new_exe_file) {
+ /*
+ * We expect the caller (i.e., sys_execve) to already denied
+ * write access, so this is unlikely to fail.
+ */
+ if (unlikely(deny_write_access(new_exe_file)))
+ return -EACCES;
get_file(new_exe_file);
+ }
rcu_assign_pointer(mm->exe_file, new_exe_file);
- if (old_exe_file)
+ if (old_exe_file) {
+ allow_write_access(old_exe_file);
fput(old_exe_file);
+ }
return 0;
}
@@ -1456,6 +1471,9 @@ int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
return ret;
}
+ ret = deny_write_access(new_exe_file);
+ if (ret)
+ return -EACCES;
get_file(new_exe_file);
/* set the new file */
@@ -1464,8 +1482,10 @@ int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
rcu_assign_pointer(mm->exe_file, new_exe_file);
mmap_write_unlock(mm);
- if (old_exe_file)
+ if (old_exe_file) {
+ allow_write_access(old_exe_file);
fput(old_exe_file);
+ }
return 0;
}