From a41e2ab08ed62fffc81f71a9bc9c642495a52308 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 6 Mar 2018 22:18:04 +0100 Subject: x86/entry: Remove stale syscall prototype sys32_vm86_warning() is long gone. Signed-off-by: Dominik Brodowski Acked-by: Linus Torvalds Acked-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: luto@amacapital.net Cc: viro@zeniv.linux.org.uk Signed-off-by: Ingo Molnar --- arch/x86/include/asm/sys_ia32.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 82c34ee25a65..43d59cae9eb1 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -37,7 +37,6 @@ asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32); long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int); -long sys32_vm86_warning(void); asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t); asmlinkage long sys32_sync_file_range(int, unsigned, unsigned, -- cgit v1.2.3-70-g09d2 From 4ddb45db30851b2269101ee8969f079b028dd257 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 6 Mar 2018 22:18:07 +0100 Subject: x86/syscalls: Use COMPAT_SYSCALL_DEFINEx() macros for x86-only compat syscalls While at it, convert declarations of type "unsigned" to "unsigned int". Signed-off-by: Dominik Brodowski Acked-by: Linus Torvalds Acked-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: luto@amacapital.net Cc: viro@zeniv.linux.org.uk Signed-off-by: Ingo Molnar --- arch/x86/entry/syscalls/syscall_32.tbl | 30 ++++++++-------- arch/x86/ia32/sys_ia32.c | 63 ++++++++++++++++++---------------- arch/x86/include/asm/sys_ia32.h | 45 +++++++++++++++--------- 3 files changed, 76 insertions(+), 62 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index ad5e95a369e4..e7fd0a76bf99 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -13,7 +13,7 @@ 4 i386 write sys_write 5 i386 open sys_open compat_sys_open 6 i386 close sys_close -7 i386 waitpid sys_waitpid sys32_waitpid +7 i386 waitpid sys_waitpid compat_sys_x86_waitpid 8 i386 creat sys_creat 9 i386 link sys_link 10 i386 unlink sys_unlink @@ -96,7 +96,7 @@ 87 i386 swapon sys_swapon 88 i386 reboot sys_reboot 89 i386 readdir sys_old_readdir compat_sys_old_readdir -90 i386 mmap sys_old_mmap sys32_mmap +90 i386 mmap sys_old_mmap compat_sys_x86_mmap 91 i386 munmap sys_munmap 92 i386 truncate sys_truncate compat_sys_truncate 93 i386 ftruncate sys_ftruncate compat_sys_ftruncate @@ -186,8 +186,8 @@ 177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait 178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo 179 i386 rt_sigsuspend sys_rt_sigsuspend -180 i386 pread64 sys_pread64 sys32_pread -181 i386 pwrite64 sys_pwrite64 sys32_pwrite +180 i386 pread64 sys_pread64 compat_sys_x86_pread +181 i386 pwrite64 sys_pwrite64 compat_sys_x86_pwrite 182 i386 chown sys_chown16 183 i386 getcwd sys_getcwd 184 i386 capget sys_capget @@ -199,11 +199,11 @@ 190 i386 vfork sys_vfork 191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit 192 i386 mmap2 sys_mmap_pgoff -193 i386 truncate64 sys_truncate64 sys32_truncate64 -194 i386 ftruncate64 sys_ftruncate64 sys32_ftruncate64 -195 i386 stat64 sys_stat64 sys32_stat64 -196 i386 lstat64 sys_lstat64 sys32_lstat64 -197 i386 fstat64 sys_fstat64 sys32_fstat64 +193 i386 truncate64 sys_truncate64 compat_sys_x86_truncate64 +194 i386 ftruncate64 sys_ftruncate64 compat_sys_x86_ftruncate64 +195 i386 stat64 sys_stat64 compat_sys_x86_stat64 +196 i386 lstat64 sys_lstat64 compat_sys_x86_lstat64 +197 i386 fstat64 sys_fstat64 compat_sys_x86_fstat64 198 i386 lchown32 sys_lchown 199 i386 getuid32 sys_getuid 200 i386 getgid32 sys_getgid @@ -231,7 +231,7 @@ # 222 is unused # 223 is unused 224 i386 gettid sys_gettid -225 i386 readahead sys_readahead sys32_readahead +225 i386 readahead sys_readahead compat_sys_x86_readahead 226 i386 setxattr sys_setxattr 227 i386 lsetxattr sys_lsetxattr 228 i386 fsetxattr sys_fsetxattr @@ -256,7 +256,7 @@ 247 i386 io_getevents sys_io_getevents compat_sys_io_getevents 248 i386 io_submit sys_io_submit compat_sys_io_submit 249 i386 io_cancel sys_io_cancel -250 i386 fadvise64 sys_fadvise64 sys32_fadvise64 +250 i386 fadvise64 sys_fadvise64 compat_sys_x86_fadvise64 # 251 is available for reuse (was briefly sys_set_zone_reclaim) 252 i386 exit_group sys_exit_group 253 i386 lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie @@ -278,7 +278,7 @@ 269 i386 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 270 i386 tgkill sys_tgkill 271 i386 utimes sys_utimes compat_sys_utimes -272 i386 fadvise64_64 sys_fadvise64_64 sys32_fadvise64_64 +272 i386 fadvise64_64 sys_fadvise64_64 compat_sys_x86_fadvise64_64 273 i386 vserver 274 i386 mbind sys_mbind 275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy @@ -306,7 +306,7 @@ 297 i386 mknodat sys_mknodat 298 i386 fchownat sys_fchownat 299 i386 futimesat sys_futimesat compat_sys_futimesat -300 i386 fstatat64 sys_fstatat64 sys32_fstatat +300 i386 fstatat64 sys_fstatat64 compat_sys_x86_fstatat 301 i386 unlinkat sys_unlinkat 302 i386 renameat sys_renameat 303 i386 linkat sys_linkat @@ -320,7 +320,7 @@ 311 i386 set_robust_list sys_set_robust_list compat_sys_set_robust_list 312 i386 get_robust_list sys_get_robust_list compat_sys_get_robust_list 313 i386 splice sys_splice -314 i386 sync_file_range sys_sync_file_range sys32_sync_file_range +314 i386 sync_file_range sys_sync_file_range compat_sys_x86_sync_file_range 315 i386 tee sys_tee 316 i386 vmsplice sys_vmsplice compat_sys_vmsplice 317 i386 move_pages sys_move_pages compat_sys_move_pages @@ -330,7 +330,7 @@ 321 i386 signalfd sys_signalfd compat_sys_signalfd 322 i386 timerfd_create sys_timerfd_create 323 i386 eventfd sys_eventfd -324 i386 fallocate sys_fallocate sys32_fallocate +324 i386 fallocate sys_fallocate compat_sys_x86_fallocate 325 i386 timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime 326 i386 timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime 327 i386 signalfd4 sys_signalfd4 compat_sys_signalfd4 diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 96cd33bbfc85..3bc03446ec44 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -51,15 +51,14 @@ #define AA(__x) ((unsigned long)(__x)) -asmlinkage long sys32_truncate64(const char __user *filename, - unsigned long offset_low, - unsigned long offset_high) +COMPAT_SYSCALL_DEFINE3(x86_truncate64, const char __user *, filename, + unsigned long, offset_low, unsigned long, offset_high) { return sys_truncate(filename, ((loff_t) offset_high << 32) | offset_low); } -asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long offset_low, - unsigned long offset_high) +COMPAT_SYSCALL_DEFINE3(x86_ftruncate64, unsigned int, fd, + unsigned long, offset_low, unsigned long, offset_high) { return sys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low); } @@ -96,8 +95,8 @@ static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat) return 0; } -asmlinkage long sys32_stat64(const char __user *filename, - struct stat64 __user *statbuf) +COMPAT_SYSCALL_DEFINE2(x86_stat64, const char __user *, filename, + struct stat64 __user *, statbuf) { struct kstat stat; int ret = vfs_stat(filename, &stat); @@ -107,8 +106,8 @@ asmlinkage long sys32_stat64(const char __user *filename, return ret; } -asmlinkage long sys32_lstat64(const char __user *filename, - struct stat64 __user *statbuf) +COMPAT_SYSCALL_DEFINE2(x86_lstat64, const char __user *, filename, + struct stat64 __user *, statbuf) { struct kstat stat; int ret = vfs_lstat(filename, &stat); @@ -117,7 +116,8 @@ asmlinkage long sys32_lstat64(const char __user *filename, return ret; } -asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf) +COMPAT_SYSCALL_DEFINE2(x86_fstat64, unsigned int, fd, + struct stat64 __user *, statbuf) { struct kstat stat; int ret = vfs_fstat(fd, &stat); @@ -126,8 +126,9 @@ asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf) return ret; } -asmlinkage long sys32_fstatat(unsigned int dfd, const char __user *filename, - struct stat64 __user *statbuf, int flag) +COMPAT_SYSCALL_DEFINE4(x86_fstatat, unsigned int, dfd, + const char __user *, filename, + struct stat64 __user *, statbuf, int, flag) { struct kstat stat; int error; @@ -153,7 +154,7 @@ struct mmap_arg_struct32 { unsigned int offset; }; -asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg) +COMPAT_SYSCALL_DEFINE1(x86_mmap, struct mmap_arg_struct32 __user *, arg) { struct mmap_arg_struct32 a; @@ -167,22 +168,22 @@ asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg) a.offset>>PAGE_SHIFT); } -asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr, - int options) +COMPAT_SYSCALL_DEFINE3(x86_waitpid, compat_pid_t, pid, unsigned int __user *, + stat_addr, int, options) { return compat_sys_wait4(pid, stat_addr, options, NULL); } /* warning: next two assume little endian */ -asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count, - u32 poslo, u32 poshi) +COMPAT_SYSCALL_DEFINE5(x86_pread, unsigned int, fd, char __user *, ubuf, + u32, count, u32, poslo, u32, poshi) { return sys_pread64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo)); } -asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf, - u32 count, u32 poslo, u32 poshi) +COMPAT_SYSCALL_DEFINE5(x86_pwrite, unsigned int, fd, const char __user *, ubuf, + u32, count, u32, poslo, u32, poshi) { return sys_pwrite64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo)); @@ -193,8 +194,9 @@ asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf, * Some system calls that need sign extended arguments. This could be * done by a generic wrapper. */ -long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, - __u32 len_low, __u32 len_high, int advice) +COMPAT_SYSCALL_DEFINE6(x86_fadvise64_64, int, fd, __u32, offset_low, + __u32, offset_high, __u32, len_low, __u32, len_high, + int, advice) { return sys_fadvise64_64(fd, (((u64)offset_high)<<32) | offset_low, @@ -202,30 +204,31 @@ long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, advice); } -asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi, - size_t count) +COMPAT_SYSCALL_DEFINE4(x86_readahead, int, fd, unsigned int, off_lo, + unsigned int, off_hi, size_t, count) { return sys_readahead(fd, ((u64)off_hi << 32) | off_lo, count); } -asmlinkage long sys32_sync_file_range(int fd, unsigned off_low, unsigned off_hi, - unsigned n_low, unsigned n_hi, int flags) +COMPAT_SYSCALL_DEFINE6(x86_sync_file_range, int, fd, unsigned int, off_low, + unsigned int, off_hi, unsigned int, n_low, + unsigned int, n_hi, int, flags) { return sys_sync_file_range(fd, ((u64)off_hi << 32) | off_low, ((u64)n_hi << 32) | n_low, flags); } -asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi, - size_t len, int advice) +COMPAT_SYSCALL_DEFINE5(x86_fadvise64, int, fd, unsigned int, offset_lo, + unsigned int, offset_hi, size_t, len, int, advice) { return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo, len, advice); } -asmlinkage long sys32_fallocate(int fd, int mode, unsigned offset_lo, - unsigned offset_hi, unsigned len_lo, - unsigned len_hi) +COMPAT_SYSCALL_DEFINE6(x86_fallocate, int, fd, int, mode, + unsigned int, offset_lo, unsigned int, offset_hi, + unsigned int, len_lo, unsigned int, len_hi) { return sys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo, ((u64)len_hi << 32) | len_lo); diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 43d59cae9eb1..32831905d97a 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -20,30 +20,41 @@ #include /* ia32/sys_ia32.c */ -asmlinkage long sys32_truncate64(const char __user *, unsigned long, unsigned long); -asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long); +asmlinkage long compat_sys_x86_truncate64(const char __user *, unsigned long, + unsigned long); +asmlinkage long compat_sys_x86_ftruncate64(unsigned int, unsigned long, + unsigned long); -asmlinkage long sys32_stat64(const char __user *, struct stat64 __user *); -asmlinkage long sys32_lstat64(const char __user *, struct stat64 __user *); -asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *); -asmlinkage long sys32_fstatat(unsigned int, const char __user *, +asmlinkage long compat_sys_x86_stat64(const char __user *, + struct stat64 __user *); +asmlinkage long compat_sys_x86_lstat64(const char __user *, + struct stat64 __user *); +asmlinkage long compat_sys_x86_fstat64(unsigned int, struct stat64 __user *); +asmlinkage long compat_sys_x86_fstatat(unsigned int, const char __user *, struct stat64 __user *, int); struct mmap_arg_struct32; -asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *); +asmlinkage long compat_sys_x86_mmap(struct mmap_arg_struct32 __user *); -asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int); +asmlinkage long compat_sys_x86_waitpid(compat_pid_t, unsigned int __user *, + int); -asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); -asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32); +asmlinkage long compat_sys_x86_pread(unsigned int, char __user *, u32, u32, + u32); +asmlinkage long compat_sys_x86_pwrite(unsigned int, const char __user *, u32, + u32, u32); -long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int); +asmlinkage long compat_sys_x86_fadvise64_64(int, __u32, __u32, __u32, __u32, + int); -asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t); -asmlinkage long sys32_sync_file_range(int, unsigned, unsigned, - unsigned, unsigned, int); -asmlinkage long sys32_fadvise64(int, unsigned, unsigned, size_t, int); -asmlinkage long sys32_fallocate(int, int, unsigned, - unsigned, unsigned, unsigned); +asmlinkage ssize_t compat_sys_x86_readahead(int, unsigned int, unsigned int, + size_t); +asmlinkage long compat_sys_x86_sync_file_range(int, unsigned int, unsigned int, + unsigned int, unsigned int, + int); +asmlinkage long compat_sys_x86_fadvise64(int, unsigned int, unsigned int, + size_t, int); +asmlinkage long compat_sys_x86_fallocate(int, int, unsigned int, unsigned int, + unsigned int, unsigned int); /* ia32/ia32_signal.c */ asmlinkage long sys32_sigreturn(void); -- cgit v1.2.3-70-g09d2 From af52201d991624d2d5adce2c123805b3d42a8d4d Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 6 Mar 2018 22:18:08 +0100 Subject: x86/entry: Do not special-case clone(2) in compat entry With the CPU renaming registers on its own, and all the overhead of the syscall entry/exit, it is doubtful whether the compiled output of mov %r8, %rax mov %rcx, %r8 mov %rax, %rcx jmpq sys_clone is measurably slower than the hand-crafted version of xchg %r8, %rcx So get rid of this special case. Signed-off-by: Dominik Brodowski Acked-by: Linus Torvalds Acked-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: luto@amacapital.net Cc: viro@zeniv.linux.org.uk Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64_compat.S | 12 ------------ arch/x86/entry/syscalls/syscall_32.tbl | 2 +- arch/x86/ia32/sys_ia32.c | 11 +++++++++++ arch/x86/include/asm/sys_ia32.h | 2 ++ 4 files changed, 14 insertions(+), 13 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index e811dd9c5e99..ff61b7bb750b 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -406,15 +406,3 @@ ENTRY(entry_INT80_compat) TRACE_IRQS_ON jmp swapgs_restore_regs_and_return_to_usermode END(entry_INT80_compat) - -ENTRY(stub32_clone) - /* - * The 32-bit clone ABI is: clone(..., int tls_val, int *child_tidptr). - * The 64-bit clone ABI is: clone(..., int *child_tidptr, int tls_val). - * - * The native 64-bit kernel's sys_clone() implements the latter, - * so we need to swap arguments here before calling it: - */ - xchg %r8, %rcx - jmp sys_clone -ENDPROC(stub32_clone) diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index e7fd0a76bf99..2a5e99cff859 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -126,7 +126,7 @@ 117 i386 ipc sys_ipc compat_sys_ipc 118 i386 fsync sys_fsync 119 i386 sigreturn sys_sigreturn sys32_sigreturn -120 i386 clone sys_clone stub32_clone +120 i386 clone sys_clone compat_sys_x86_clone 121 i386 setdomainname sys_setdomainname 122 i386 uname sys_newuname 123 i386 modify_ldt sys_modify_ldt diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 3bc03446ec44..6512498bbef6 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -233,3 +233,14 @@ COMPAT_SYSCALL_DEFINE6(x86_fallocate, int, fd, int, mode, return sys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo, ((u64)len_hi << 32) | len_lo); } + +/* + * The 32-bit clone ABI is CONFIG_CLONE_BACKWARDS + */ +COMPAT_SYSCALL_DEFINE5(x86_clone, unsigned long, clone_flags, + unsigned long, newsp, int __user *, parent_tidptr, + unsigned long, tls_val, int __user *, child_tidptr) +{ + return sys_clone(clone_flags, newsp, parent_tidptr, child_tidptr, + tls_val); +} diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 32831905d97a..906794aa034e 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -55,6 +55,8 @@ asmlinkage long compat_sys_x86_fadvise64(int, unsigned int, unsigned int, size_t, int); asmlinkage long compat_sys_x86_fallocate(int, int, unsigned int, unsigned int, unsigned int, unsigned int); +asmlinkage long compat_sys_x86_clone(unsigned long, unsigned long, int __user *, + unsigned long, int __user *); /* ia32/ia32_signal.c */ asmlinkage long sys32_sigreturn(void); -- cgit v1.2.3-70-g09d2 From 076ca272a14cea558b1092ec85cea08510283f2a Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 7 Mar 2018 11:12:27 -0800 Subject: x86/vsyscall/64: Drop "native" vsyscalls Since Linux v3.2, vsyscalls have been deprecated and slow. From v3.2 on, Linux had three vsyscall modes: "native", "emulate", and "none". "emulate" is the default. All known user programs work correctly in emulate mode, but vsyscalls turn into page faults and are emulated. This is very slow. In "native" mode, the vsyscall page is easily usable as an exploit gadget, but vsyscalls are a bit faster -- they turn into normal syscalls. (This is in contrast to vDSO functions, which can be much faster than syscalls.) In "none" mode, there are no vsyscalls. For all practical purposes, "native" was really just a chicken bit in case something went wrong with the emulation. It's been over six years, and nothing has gone wrong. Delete it. Signed-off-by: Andy Lutomirski Acked-by: Kees Cook Acked-by: Linus Torvalds Cc: Borislav Petkov Cc: Dominik Brodowski Cc: Kernel Hardening Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/519fee5268faea09ae550776ce969fa6e88668b0.1520449896.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 11 +---------- arch/x86/entry/vsyscall/vsyscall_64.c | 16 +++------------- arch/x86/include/asm/pgtable_types.h | 2 -- tools/testing/selftests/x86/test_vsyscall.c | 11 ++++++----- 4 files changed, 10 insertions(+), 30 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c1aed6c0e413..09c599e0900d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2266,7 +2266,7 @@ choice it can be used to assist security vulnerability exploitation. This setting can be changed at boot time via the kernel command - line parameter vsyscall=[native|emulate|none]. + line parameter vsyscall=[emulate|none]. On a system with recent enough glibc (2.14 or newer) and no static binaries, you can say None without a performance penalty @@ -2274,15 +2274,6 @@ choice If unsure, select "Emulate". - config LEGACY_VSYSCALL_NATIVE - bool "Native" - help - Actual executable code is located in the fixed vsyscall - address mapping, implementing time() efficiently. Since - this makes the mapping executable, it can be used during - security vulnerability exploitation (traditionally as - ROP gadgets). This configuration is not recommended. - config LEGACY_VSYSCALL_EMULATE bool "Emulate" help diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 577fa8adb785..8560ef68a9d6 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -42,10 +42,8 @@ #define CREATE_TRACE_POINTS #include "vsyscall_trace.h" -static enum { EMULATE, NATIVE, NONE } vsyscall_mode = -#if defined(CONFIG_LEGACY_VSYSCALL_NATIVE) - NATIVE; -#elif defined(CONFIG_LEGACY_VSYSCALL_NONE) +static enum { EMULATE, NONE } vsyscall_mode = +#ifdef CONFIG_LEGACY_VSYSCALL_NONE NONE; #else EMULATE; @@ -56,8 +54,6 @@ static int __init vsyscall_setup(char *str) if (str) { if (!strcmp("emulate", str)) vsyscall_mode = EMULATE; - else if (!strcmp("native", str)) - vsyscall_mode = NATIVE; else if (!strcmp("none", str)) vsyscall_mode = NONE; else @@ -139,10 +135,6 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) WARN_ON_ONCE(address != regs->ip); - /* This should be unreachable in NATIVE mode. */ - if (WARN_ON(vsyscall_mode == NATIVE)) - return false; - if (vsyscall_mode == NONE) { warn_bad_vsyscall(KERN_INFO, regs, "vsyscall attempted with vsyscall=none"); @@ -370,9 +362,7 @@ void __init map_vsyscall(void) if (vsyscall_mode != NONE) { __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, - vsyscall_mode == NATIVE - ? PAGE_KERNEL_VSYSCALL - : PAGE_KERNEL_VVAR); + PAGE_KERNEL_VVAR); set_vsyscall_pgtable_user_bits(swapper_pg_dir); } diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 246f15b4e64c..acfe755562a6 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -174,7 +174,6 @@ enum page_cache_mode { #define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) #define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) #define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_NOCACHE) -#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER) #define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER) #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) @@ -206,7 +205,6 @@ enum page_cache_mode { #define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC) #define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC) #define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC) -#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL | _PAGE_ENC) #define PAGE_KERNEL_VVAR __pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC) #define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO) diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c index be81621446f0..0b4f1cc2291c 100644 --- a/tools/testing/selftests/x86/test_vsyscall.c +++ b/tools/testing/selftests/x86/test_vsyscall.c @@ -450,7 +450,7 @@ static void sigtrap(int sig, siginfo_t *info, void *ctx_void) num_vsyscall_traps++; } -static int test_native_vsyscall(void) +static int test_emulation(void) { time_t tmp; bool is_native; @@ -458,7 +458,7 @@ static int test_native_vsyscall(void) if (!vtime) return 0; - printf("[RUN]\tchecking for native vsyscall\n"); + printf("[RUN]\tchecking that vsyscalls are emulated\n"); sethandler(SIGTRAP, sigtrap, 0); set_eflags(get_eflags() | X86_EFLAGS_TF); vtime(&tmp); @@ -474,11 +474,12 @@ static int test_native_vsyscall(void) */ is_native = (num_vsyscall_traps > 1); - printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n", + printf("[%s]\tvsyscalls are %s (%d instructions in vsyscall page)\n", + (is_native ? "FAIL" : "OK"), (is_native ? "native" : "emulated"), (int)num_vsyscall_traps); - return 0; + return is_native; } #endif @@ -498,7 +499,7 @@ int main(int argc, char **argv) nerrs += test_vsys_r(); #ifdef __x86_64__ - nerrs += test_native_vsyscall(); + nerrs += test_emulation(); #endif return nerrs ? 1 : 0; -- cgit v1.2.3-70-g09d2 From c07a8f8b08ba683ea24f3ac9159f37ae94daf47f Mon Sep 17 00:00:00 2001 From: Francis Deslauriers Date: Thu, 8 Mar 2018 22:18:12 -0500 Subject: x86/kprobes: Fix kernel crash when probing .entry_trampoline code Disable the kprobe probing of the entry trampoline: .entry_trampoline is a code area that is used to ensure page table isolation between userspace and kernelspace. At the beginning of the execution of the trampoline, we load the kernel's CR3 register. This has the effect of enabling the translation of the kernel virtual addresses to physical addresses. Before this happens most kernel addresses can not be translated because the running process' CR3 is still used. If a kprobe is placed on the trampoline code before that change of the CR3 register happens the kernel crashes because int3 handling pages are not accessible. To fix this, add the .entry_trampoline section to the kprobe blacklist to prohibit the probing of code before all the kernel pages are accessible. Signed-off-by: Francis Deslauriers Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: mathieu.desnoyers@efficios.com Cc: mhiramat@kernel.org Link: http://lkml.kernel.org/r/1520565492-4637-2-git-send-email-francis.deslauriers@efficios.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/sections.h | 1 + arch/x86/kernel/kprobes/core.c | 10 +++++++++- arch/x86/kernel/vmlinux.lds.S | 2 ++ 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index d6baf23782bc..5c019d23d06b 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -10,6 +10,7 @@ extern struct exception_table_entry __stop___ex_table[]; #if defined(CONFIG_X86_64) extern char __end_rodata_hpage_align[]; +extern char __entry_trampoline_start[], __entry_trampoline_end[]; #endif #endif /* _ASM_X86_SECTIONS_H */ diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index bd36f3c33cd0..0715f827607c 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -1168,10 +1168,18 @@ NOKPROBE_SYMBOL(longjmp_break_handler); bool arch_within_kprobe_blacklist(unsigned long addr) { + bool is_in_entry_trampoline_section = false; + +#ifdef CONFIG_X86_64 + is_in_entry_trampoline_section = + (addr >= (unsigned long)__entry_trampoline_start && + addr < (unsigned long)__entry_trampoline_end); +#endif return (addr >= (unsigned long)__kprobes_text_start && addr < (unsigned long)__kprobes_text_end) || (addr >= (unsigned long)__entry_text_start && - addr < (unsigned long)__entry_text_end); + addr < (unsigned long)__entry_text_end) || + is_in_entry_trampoline_section; } int __init arch_init_kprobes(void) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 9b138a06c1a4..b854ebf5851b 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -118,9 +118,11 @@ SECTIONS #ifdef CONFIG_X86_64 . = ALIGN(PAGE_SIZE); + VMLINUX_SYMBOL(__entry_trampoline_start) = .; _entry_trampoline = .; *(.entry_trampoline) . = ALIGN(PAGE_SIZE); + VMLINUX_SYMBOL(__entry_trampoline_end) = .; ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big"); #endif -- cgit v1.2.3-70-g09d2