diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-09-04 11:35:47 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-09-04 11:35:47 -0700 |
commit | 49624efa65ac9889f4e7c7b2452b2e6ce42ba37d (patch) | |
tree | 2ff7f446bf5d8459efc05d8a90d391a7e0452e92 /include | |
parent | f7464060f7ab9a2424428008f0ee9f1e267e410f (diff) | |
parent | 592ca09be8333bd226f50100328a905bfc377133 (diff) |
Merge tag 'denywrite-for-5.15' of git://github.com/davidhildenbrand/linux
Pull MAP_DENYWRITE removal from David Hildenbrand:
"Remove all in-tree usage of MAP_DENYWRITE from the kernel and remove
VM_DENYWRITE.
There are some (minor) user-visible changes:
- We no longer deny write access to shared libaries loaded via legacy
uselib(); this behavior matches modern user space e.g. dlopen().
- We no longer deny write access to the elf interpreter after exec
completed, treating it just like shared libraries (which it often
is).
- We always deny write access to the file linked via /proc/pid/exe:
sys_prctl(PR_SET_MM_MAP/EXE_FILE) will fail if write access to the
file cannot be denied, and write access to the file will remain
denied until the link is effectivel gone (exec, termination,
sys_prctl(PR_SET_MM_MAP/EXE_FILE)) -- just as if exec'ing the file.
Cross-compiled for a bunch of architectures (alpha, microblaze, i386,
s390x, ...) and verified via ltp that especially the relevant tests
(i.e., creat07 and execve04) continue working as expected"
* tag 'denywrite-for-5.15' of git://github.com/davidhildenbrand/linux:
fs: update documentation of get_write_access() and friends
mm: ignore MAP_DENYWRITE in ksys_mmap_pgoff()
mm: remove VM_DENYWRITE
binfmt: remove in-tree usage of MAP_DENYWRITE
kernel/fork: always deny write access to current MM exe_file
kernel/fork: factor out replacing the current MM exe_file
binfmt: don't use MAP_DENYWRITE when loading shared libraries via uselib()
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/fs.h | 19 | ||||
-rw-r--r-- | include/linux/mm.h | 4 | ||||
-rw-r--r-- | include/linux/mman.h | 4 | ||||
-rw-r--r-- | include/trace/events/mmflags.h | 1 |
4 files changed, 16 insertions, 12 deletions
diff --git a/include/linux/fs.h b/include/linux/fs.h index a6074cdb7798..37ad9a730a89 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3023,15 +3023,20 @@ static inline void file_end_write(struct file *file) } /* + * This is used for regular files where some users -- especially the + * currently executed binary in a process, previously handled via + * VM_DENYWRITE -- cannot handle concurrent write (and maybe mmap + * read-write shared) accesses. + * * get_write_access() gets write permission for a file. * put_write_access() releases this write permission. - * This is used for regular files. - * We cannot support write (and maybe mmap read-write shared) accesses and - * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode - * can have the following values: - * 0: no writers, no VM_DENYWRITE mappings - * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist - * > 0: (i_writecount) users are writing to the file. + * deny_write_access() denies write access to a file. + * allow_write_access() re-enables write access to a file. + * + * The i_writecount field of an inode can have the following values: + * 0: no write access, no denied write access + * < 0: (-i_writecount) users that denied write access to the file. + * > 0: (i_writecount) users that have write access to the file. * * Normally we operate on that counter with atomic_{inc,dec} and it's safe * except for the cases where we don't hold i_writecount yet. Then we need to diff --git a/include/linux/mm.h b/include/linux/mm.h index ed2552c9aeca..50e2c2914ac2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -281,7 +281,6 @@ extern unsigned int kobjsize(const void *objp); #define VM_GROWSDOWN 0x00000100 /* general info on the segment */ #define VM_UFFD_MISSING 0x00000200 /* missing pages tracking */ #define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */ -#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ #define VM_UFFD_WP 0x00001000 /* wrprotect pages tracking */ #define VM_LOCKED 0x00002000 @@ -2573,7 +2572,8 @@ static inline int check_data_rlimit(unsigned long rlim, extern int mm_take_all_locks(struct mm_struct *mm); extern void mm_drop_all_locks(struct mm_struct *mm); -extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); +extern int set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); +extern int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); extern struct file *get_mm_exe_file(struct mm_struct *mm); extern struct file *get_task_exe_file(struct task_struct *task); diff --git a/include/linux/mman.h b/include/linux/mman.h index ebb09a964272..b66e91b8176c 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -32,7 +32,8 @@ * The historical set of flags that all mmap implementations implicitly * support when a ->mmap_validate() op is not provided in file_operations. * - * MAP_EXECUTABLE is completely ignored throughout the kernel. + * MAP_EXECUTABLE and MAP_DENYWRITE are completely ignored throughout the + * kernel. */ #define LEGACY_MAP_MASK (MAP_SHARED \ | MAP_PRIVATE \ @@ -153,7 +154,6 @@ static inline unsigned long calc_vm_flag_bits(unsigned long flags) { return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | - _calc_vm_trans(flags, MAP_DENYWRITE, VM_DENYWRITE ) | _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | arch_calc_vm_flag_bits(flags); diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index f160484afc5c..0b53e855c4ac 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -165,7 +165,6 @@ IF_HAVE_PG_SKIP_KASAN_POISON(PG_skip_kasan_poison, "skip_kasan_poison") {VM_UFFD_MISSING, "uffd_missing" }, \ IF_HAVE_UFFD_MINOR(VM_UFFD_MINOR, "uffd_minor" ) \ {VM_PFNMAP, "pfnmap" }, \ - {VM_DENYWRITE, "denywrite" }, \ {VM_UFFD_WP, "uffd_wp" }, \ {VM_LOCKED, "locked" }, \ {VM_IO, "io" }, \ |