summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-11-30 10:34:54 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2024-11-30 10:34:54 -0800
commit831c1926ee728c3e747255f7c0f434762e8e863d (patch)
tree92d1c73842c3f95b362810aba2080b7582c12c11
parent04b43ea325d21c4c98e831383a1b7d540721898a (diff)
parentbed2cc482600296fe04edbc38005ba2851449c10 (diff)
Merge tag 'uml-for-linus-6.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux
Pull UML updates from Richard Weinberger: - Lots of cleanups, mostly from Benjamin Berg and Tiwei Bie - Removal of unused code - Fix for sparse warnings - Cleanup around stub_exe() * tag 'uml-for-linus-6.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux: (68 commits) hostfs: Fix the NULL vs IS_ERR() bug for __filemap_get_folio() um: move thread info into task um: Always dump trace for specified task in show_stack um: vector: Do not use drvdata in release um: net: Do not use drvdata in release um: ubd: Do not use drvdata in release um: ubd: Initialize ubd's disk pointer in ubd_add um: virtio_uml: query the number of vqs if supported um: virtio_uml: fix call_fd IRQ allocation um: virtio_uml: send SET_MEM_TABLE message with the exact size um: remove broken double fault detection um: remove duplicate UM_NSEC_PER_SEC definition um: remove file sync for stub data um: always include kconfig.h and compiler-version.h um: set DONTDUMP and DONTFORK flags on KASAN shadow memory um: fix sparse warnings in signal code um: fix sparse warnings from regset refactor um: Remove double zero check um: fix stub exe build with CONFIG_GCOV um: Use os_set_pdeathsig helper in winch thread/process ...
-rw-r--r--arch/um/Kconfig24
-rw-r--r--arch/um/Makefile7
-rw-r--r--arch/um/Makefile-skas14
-rw-r--r--arch/um/configs/i386_defconfig1
-rw-r--r--arch/um/drivers/chan_user.c2
-rw-r--r--arch/um/drivers/hostaudio_kern.c2
-rw-r--r--arch/um/drivers/net_kern.c2
-rw-r--r--arch/um/drivers/ubd_kern.c5
-rw-r--r--arch/um/drivers/vector_kern.c3
-rw-r--r--arch/um/drivers/vhost_user.h4
-rw-r--r--arch/um/drivers/virtio_uml.c51
-rw-r--r--arch/um/include/asm/Kbuild1
-rw-r--r--arch/um/include/asm/current.h23
-rw-r--r--arch/um/include/asm/page.h34
-rw-r--r--arch/um/include/asm/pgalloc.h11
-rw-r--r--arch/um/include/asm/pgtable-2level.h2
-rw-r--r--arch/um/include/asm/pgtable-4level.h (renamed from arch/um/include/asm/pgtable-3level.h)59
-rw-r--r--arch/um/include/asm/pgtable.h83
-rw-r--r--arch/um/include/asm/processor-generic.h7
-rw-r--r--arch/um/include/asm/thread_info.h18
-rw-r--r--arch/um/include/asm/tlbflush.h4
-rw-r--r--arch/um/include/shared/as-layout.h10
-rw-r--r--arch/um/include/shared/common-offsets.h15
-rw-r--r--arch/um/include/shared/kern_util.h1
-rw-r--r--arch/um/include/shared/mem_user.h5
-rw-r--r--arch/um/include/shared/os.h15
-rw-r--r--arch/um/include/shared/registers.h6
-rw-r--r--arch/um/include/shared/skas/stub-data.h12
-rw-r--r--arch/um/include/shared/timetravel.h5
-rw-r--r--arch/um/include/shared/user.h2
-rw-r--r--arch/um/kernel/dtb.c1
-rw-r--r--arch/um/kernel/dyn.lds.S5
-rw-r--r--arch/um/kernel/initrd.c1
-rw-r--r--arch/um/kernel/irq.c112
-rw-r--r--arch/um/kernel/mem.c20
-rw-r--r--arch/um/kernel/physmem.c39
-rw-r--r--arch/um/kernel/process.c24
-rw-r--r--arch/um/kernel/skas/.gitignore2
-rw-r--r--arch/um/kernel/skas/Makefile38
-rw-r--r--arch/um/kernel/skas/mmu.c28
-rw-r--r--arch/um/kernel/skas/process.c4
-rw-r--r--arch/um/kernel/skas/stub.c10
-rw-r--r--arch/um/kernel/skas/stub_exe.c95
-rw-r--r--arch/um/kernel/skas/stub_exe_embed.S11
-rw-r--r--arch/um/kernel/sysrq.c8
-rw-r--r--arch/um/kernel/time.c20
-rw-r--r--arch/um/kernel/tlb.c74
-rw-r--r--arch/um/kernel/trap.c16
-rw-r--r--arch/um/kernel/um_arch.c75
-rw-r--r--arch/um/kernel/uml.lds.S2
-rw-r--r--arch/um/os-Linux/Makefile2
-rw-r--r--arch/um/os-Linux/file.c6
-rw-r--r--arch/um/os-Linux/main.c23
-rw-r--r--arch/um/os-Linux/mem.c14
-rw-r--r--arch/um/os-Linux/process.c88
-rw-r--r--arch/um/os-Linux/registers.c11
-rw-r--r--arch/um/os-Linux/sigio.c1
-rw-r--r--arch/um/os-Linux/signal.c55
-rw-r--r--arch/um/os-Linux/skas/mem.c21
-rw-r--r--arch/um/os-Linux/skas/process.c231
-rw-r--r--arch/um/os-Linux/umid.c2
-rw-r--r--arch/um/os-Linux/util.c4
-rw-r--r--arch/x86/um/Kconfig12
-rw-r--r--arch/x86/um/Makefile2
-rw-r--r--arch/x86/um/asm/elf.h2
-rw-r--r--arch/x86/um/asm/ptrace.h10
-rw-r--r--arch/x86/um/os-Linux/Makefile2
-rw-r--r--arch/x86/um/os-Linux/registers.c145
-rw-r--r--arch/x86/um/os-Linux/task_size.c151
-rw-r--r--arch/x86/um/ptrace.c267
-rw-r--r--arch/x86/um/ptrace_32.c84
-rw-r--r--arch/x86/um/ptrace_64.c43
-rw-r--r--arch/x86/um/shared/sysdep/ptrace.h8
-rw-r--r--arch/x86/um/shared/sysdep/ptrace_32.h4
-rw-r--r--arch/x86/um/shared/sysdep/ptrace_64.h4
-rw-r--r--arch/x86/um/shared/sysdep/ptrace_user.h6
-rw-r--r--arch/x86/um/shared/sysdep/stub_32.h18
-rw-r--r--arch/x86/um/shared/sysdep/stub_64.h27
-rw-r--r--arch/x86/um/signal.c336
-rw-r--r--arch/x86/um/user-offsets.c8
-rw-r--r--arch/x86/um/vdso/Makefile5
-rw-r--r--arch/x86/um/vdso/checkundef.sh11
-rw-r--r--fs/hostfs/hostfs_kern.c5
83 files changed, 1227 insertions, 1394 deletions
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index c89575d05021..18051b1cfce0 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -5,6 +5,7 @@ menu "UML-specific options"
config UML
bool
default y
+ select ARCH_WANTS_DYNAMIC_TASK_STRUCT
select ARCH_HAS_CPU_FINALIZE_INIT
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
@@ -32,6 +33,8 @@ config UML
select HAVE_ARCH_VMAP_STACK
select HAVE_RUST
select ARCH_HAS_UBSAN
+ select HAVE_ARCH_TRACEHOOK
+ select THREAD_INFO_IN_TASK
config MMU
bool
@@ -94,7 +97,7 @@ config MAY_HAVE_RUNTIME_DEPS
config STATIC_LINK
bool "Force a static link"
- depends on CC_CAN_LINK_STATIC_NO_RUNTIME_DEPS || !MAY_HAVE_RUNTIME_DEPS
+ depends on !MAY_HAVE_RUNTIME_DEPS
help
This option gives you the ability to force a static link of UML.
Normally, UML is linked as a shared binary. This is inconvenient for
@@ -209,8 +212,8 @@ config MMAPPER
config PGTABLE_LEVELS
int
- default 3 if 3_LEVEL_PGTABLES
- default 2
+ default 4 if 64BIT
+ default 2 if !64BIT
config UML_TIME_TRAVEL_SUPPORT
bool
@@ -227,6 +230,21 @@ config UML_TIME_TRAVEL_SUPPORT
It is safe to say Y, but you probably don't need this.
+config UML_MAX_USERSPACE_ITERATIONS
+ int
+ prompt "Maximum number of unscheduled userspace iterations"
+ default 10000
+ depends on UML_TIME_TRAVEL_SUPPORT
+ help
+ In UML inf-cpu and ext time-travel mode userspace can run without being
+ interrupted. This will eventually overwhelm the kernel and create OOM
+ situations (mainly RCU not running). This setting specifies the number
+ of kernel/userspace switches (minor/major page fault, signal or syscall)
+ for the same userspace thread before the sched_clock is advanced by a
+ jiffie to trigger scheduling.
+
+ Setting it to zero disables the feature.
+
config KASAN_SHADOW_OFFSET
hex
depends on KASAN
diff --git a/arch/um/Makefile b/arch/um/Makefile
index 00b63bac5eff..1d36a613aad8 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -61,7 +61,8 @@ KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \
$(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \
-Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \
-Din6addr_loopback=kernel_in6addr_loopback \
- -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr
+ -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr \
+ -D__close_range=kernel__close_range
KBUILD_RUSTFLAGS += -Crelocation-model=pie
@@ -70,7 +71,9 @@ KBUILD_AFLAGS += $(ARCH_INCLUDE)
USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -I%,,$(KBUILD_CFLAGS))) \
$(ARCH_INCLUDE) $(MODE_INCLUDE) $(filter -I%,$(CFLAGS)) \
-D_FILE_OFFSET_BITS=64 -idirafter $(srctree)/include \
- -idirafter $(objtree)/include -D__KERNEL__ -D__UM_HOST__
+ -idirafter $(objtree)/include -D__KERNEL__ -D__UM_HOST__ \
+ -include $(srctree)/include/linux/compiler-version.h \
+ -include $(srctree)/include/linux/kconfig.h
#This will adjust *FLAGS accordingly to the platform.
include $(srctree)/$(ARCH_DIR)/Makefile-os-Linux
diff --git a/arch/um/Makefile-skas b/arch/um/Makefile-skas
index 67323b028999..1a27e65bcb9c 100644
--- a/arch/um/Makefile-skas
+++ b/arch/um/Makefile-skas
@@ -3,15 +3,15 @@
# Licensed under the GPL
#
-GPROF_OPT += -pg
+export UM_GPROF_OPT += -pg
ifdef CONFIG_CC_IS_CLANG
-GCOV_OPT += -fprofile-instr-generate -fcoverage-mapping
+export UM_GCOV_OPT += -fprofile-instr-generate -fcoverage-mapping
else
-GCOV_OPT += -fprofile-arcs -ftest-coverage
+export UM_GCOV_OPT += -fprofile-arcs -ftest-coverage
endif
-CFLAGS-$(CONFIG_GCOV) += $(GCOV_OPT)
-CFLAGS-$(CONFIG_GPROF) += $(GPROF_OPT)
-LINK-$(CONFIG_GCOV) += $(GCOV_OPT)
-LINK-$(CONFIG_GPROF) += $(GPROF_OPT)
+CFLAGS-$(CONFIG_GCOV) += $(UM_GCOV_OPT)
+CFLAGS-$(CONFIG_GPROF) += $(UM_GPROF_OPT)
+LINK-$(CONFIG_GCOV) += $(UM_GCOV_OPT)
+LINK-$(CONFIG_GPROF) += $(UM_GPROF_OPT)
diff --git a/arch/um/configs/i386_defconfig b/arch/um/configs/i386_defconfig
index 9c9c77f1255a..1ffa088739f4 100644
--- a/arch/um/configs/i386_defconfig
+++ b/arch/um/configs/i386_defconfig
@@ -1,4 +1,3 @@
-CONFIG_3_LEVEL_PGTABLES=y
# CONFIG_COMPACTION is not set
CONFIG_BINFMT_MISC=m
CONFIG_HOSTFS=y
diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c
index a66e556012c4..35f9beeb19b3 100644
--- a/arch/um/drivers/chan_user.c
+++ b/arch/um/drivers/chan_user.c
@@ -161,6 +161,8 @@ static __noreturn int winch_thread(void *arg)
int count;
char c = 1;
+ os_set_pdeathsig();
+
pty_fd = data->pty_fd;
pipe_fd = data->pipe_fd;
count = write(pipe_fd, &c, sizeof(c));
diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c
index 9d228878cea2..0ac149de1ac0 100644
--- a/arch/um/drivers/hostaudio_kern.c
+++ b/arch/um/drivers/hostaudio_kern.c
@@ -48,6 +48,7 @@ MODULE_PARM_DESC(mixer, MIXER_HELP);
#ifndef MODULE
static int set_dsp(char *name, int *add)
{
+ *add = 0;
dsp = name;
return 0;
}
@@ -56,6 +57,7 @@ __uml_setup("dsp=", set_dsp, "dsp=<dsp device>\n" DSP_HELP);
static int set_mixer(char *name, int *add)
{
+ *add = 0;
mixer = name;
return 0;
}
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 77c4afb8ab90..75d04fb4994a 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -336,7 +336,7 @@ static struct platform_driver uml_net_driver = {
static void net_device_release(struct device *dev)
{
- struct uml_net *device = dev_get_drvdata(dev);
+ struct uml_net *device = container_of(dev, struct uml_net, pdev.dev);
struct net_device *netdev = device->dev;
struct uml_net_private *lp = netdev_priv(netdev);
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 7f28ec1929dc..66c1a8835e36 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -779,7 +779,7 @@ static int ubd_open_dev(struct ubd *ubd_dev)
static void ubd_device_release(struct device *dev)
{
- struct ubd *ubd_dev = dev_get_drvdata(dev);
+ struct ubd *ubd_dev = container_of(dev, struct ubd, pdev.dev);
blk_mq_free_tag_set(&ubd_dev->tag_set);
*ubd_dev = ((struct ubd) DEFAULT_UBD);
@@ -898,6 +898,8 @@ static int ubd_add(int n, char **error_out)
if (err)
goto out_cleanup_disk;
+ ubd_dev->disk = disk;
+
return 0;
out_cleanup_disk:
@@ -1499,6 +1501,7 @@ int io_thread(void *arg)
{
int n, count, written, res;
+ os_set_pdeathsig();
os_fix_helper_signals();
while(1){
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index c992da83268d..64c09db392c1 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -815,7 +815,8 @@ static struct platform_driver uml_net_driver = {
static void vector_device_release(struct device *dev)
{
- struct vector_device *device = dev_get_drvdata(dev);
+ struct vector_device *device =
+ container_of(dev, struct vector_device, pdev.dev);
struct net_device *netdev = device->dev;
list_del(&device->list);
diff --git a/arch/um/drivers/vhost_user.h b/arch/um/drivers/vhost_user.h
index 6f147cd3c9f7..fcfa3b7e021b 100644
--- a/arch/um/drivers/vhost_user.h
+++ b/arch/um/drivers/vhost_user.h
@@ -10,6 +10,7 @@
/* Feature bits */
#define VHOST_USER_F_PROTOCOL_FEATURES 30
/* Protocol feature bits */
+#define VHOST_USER_PROTOCOL_F_MQ 0
#define VHOST_USER_PROTOCOL_F_REPLY_ACK 3
#define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5
#define VHOST_USER_PROTOCOL_F_CONFIG 9
@@ -23,7 +24,8 @@
/* Supported transport features */
#define VHOST_USER_SUPPORTED_F BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)
/* Supported protocol features */
-#define VHOST_USER_SUPPORTED_PROTOCOL_F (BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
+#define VHOST_USER_SUPPORTED_PROTOCOL_F (BIT_ULL(VHOST_USER_PROTOCOL_F_MQ) | \
+ BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG) | \
BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS))
diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c
index 2b6e701776b6..cc3be48a9d6e 100644
--- a/arch/um/drivers/virtio_uml.c
+++ b/arch/um/drivers/virtio_uml.c
@@ -56,6 +56,7 @@ struct virtio_uml_device {
int sock, req_fd, irq;
u64 features;
u64 protocol_features;
+ u64 max_vqs;
u8 status;
u8 registered:1;
u8 suspended:1;
@@ -72,8 +73,6 @@ struct virtio_uml_vq_info {
bool suspended;
};
-extern unsigned long long physmem_size, highmem;
-
#define vu_err(vu_dev, ...) dev_err(&(vu_dev)->pdev->dev, ##__VA_ARGS__)
/* Vhost-user protocol */
@@ -343,6 +342,17 @@ static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev,
protocol_features);
}
+static int vhost_user_get_queue_num(struct virtio_uml_device *vu_dev,
+ u64 *queue_num)
+{
+ int rc = vhost_user_send_no_payload(vu_dev, true,
+ VHOST_USER_GET_QUEUE_NUM);
+
+ if (rc)
+ return rc;
+ return vhost_user_recv_u64(vu_dev, queue_num);
+}
+
static void vhost_user_reply(struct virtio_uml_device *vu_dev,
struct vhost_user_msg *msg, int response)
{
@@ -516,6 +526,15 @@ static int vhost_user_init(struct virtio_uml_device *vu_dev)
return rc;
}
+ if (vu_dev->protocol_features &
+ BIT_ULL(VHOST_USER_PROTOCOL_F_MQ)) {
+ rc = vhost_user_get_queue_num(vu_dev, &vu_dev->max_vqs);
+ if (rc)
+ return rc;
+ } else {
+ vu_dev->max_vqs = U64_MAX;
+ }
+
return 0;
}
@@ -625,7 +644,7 @@ static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev)
{
struct vhost_user_msg msg = {
.header.request = VHOST_USER_SET_MEM_TABLE,
- .header.size = sizeof(msg.payload.mem_regions),
+ .header.size = offsetof(typeof(msg.payload.mem_regions), regions[1]),
.payload.mem_regions.num = 1,
};
unsigned long reserved = uml_reserved - uml_physmem;
@@ -673,13 +692,6 @@ static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev)
if (rc < 0)
return rc;
- if (highmem) {
- msg.payload.mem_regions.num++;
- rc = vhost_user_init_mem_region(__pa(end_iomem), highmem,
- &fds[1], &msg.payload.mem_regions.regions[1]);
- if (rc < 0)
- return rc;
- }
return vhost_user_send(vu_dev, false, &msg, fds,
msg.payload.mem_regions.num);
@@ -897,7 +909,7 @@ static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev,
{
struct virtio_uml_vq_info *info = vq->priv;
int call_fds[2];
- int rc;
+ int rc, irq;
/* no call FD needed/desired in this case */
if (vu_dev->protocol_features &
@@ -914,19 +926,23 @@ static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev,
return rc;
info->call_fd = call_fds[0];
- rc = um_request_irq(vu_dev->irq, info->call_fd, IRQ_READ,
- vu_interrupt, IRQF_SHARED, info->name, vq);
- if (rc < 0)
+ irq = um_request_irq(vu_dev->irq, info->call_fd, IRQ_READ,
+ vu_interrupt, IRQF_SHARED, info->name, vq);
+ if (irq < 0) {
+ rc = irq;
goto close_both;
+ }
rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]);
if (rc)
goto release_irq;
+ vu_dev->irq = irq;
+
goto out;
release_irq:
- um_free_irq(vu_dev->irq, vq);
+ um_free_irq(irq, vq);
close_both:
os_close_file(call_fds[0]);
out:
@@ -1023,7 +1039,9 @@ static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vq;
/* not supported for now */
- if (WARN_ON(nvqs > 64))
+ if (WARN(nvqs > 64 || nvqs > vu_dev->max_vqs,
+ "%d VQs requested, only up to 64 or %lld supported\n",
+ nvqs, vu_dev->max_vqs))
return -EINVAL;
rc = vhost_user_set_mem_table(vu_dev);
@@ -1210,6 +1228,7 @@ static int virtio_uml_probe(struct platform_device *pdev)
vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID;
vu_dev->pdev = pdev;
vu_dev->req_fd = -1;
+ vu_dev->irq = UM_IRQ_ALLOC;
time_travel_propagate_time();
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 18f902da8e99..428f2c5158c2 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
generic-y += bug.h
generic-y += compat.h
-generic-y += current.h
generic-y += device.h
generic-y += dma-mapping.h
generic-y += emergency-restart.h
diff --git a/arch/um/include/asm/current.h b/arch/um/include/asm/current.h
new file mode 100644
index 000000000000..de64e032d66c
--- /dev/null
+++ b/arch/um/include/asm/current.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_CURRENT_H
+#define __ASM_CURRENT_H
+
+#include <linux/compiler.h>
+#include <linux/threads.h>
+
+#ifndef __ASSEMBLY__
+
+struct task_struct;
+extern struct task_struct *cpu_tasks[NR_CPUS];
+
+static __always_inline struct task_struct *get_current(void)
+{
+ return cpu_tasks[0];
+}
+
+
+#define current get_current()
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_CURRENT_H */
diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h
index 834313ecd3d6..3d516f3ca9c7 100644
--- a/arch/um/include/asm/page.h
+++ b/arch/um/include/asm/page.h
@@ -29,51 +29,35 @@ struct page;
#define clear_user_page(page, vaddr, pg) clear_page(page)
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
-#if defined(CONFIG_3_LEVEL_PGTABLES) && !defined(CONFIG_64BIT)
-
typedef struct { unsigned long pte; } pte_t;
-typedef struct { unsigned long pmd; } pmd_t;
typedef struct { unsigned long pgd; } pgd_t;
-#define pte_val(p) ((p).pte)
-#define pte_get_bits(p, bits) ((p).pte & (bits))
-#define pte_set_bits(p, bits) ((p).pte |= (bits))
-#define pte_clear_bits(p, bits) ((p).pte &= ~(bits))
-#define pte_copy(to, from) ({ (to).pte = (from).pte; })
-#define pte_is_zero(p) (!((p).pte & ~_PAGE_NEWPAGE))
-#define pte_set_val(p, phys, prot) \
- ({ (p).pte = (phys) | pgprot_val(prot); })
+#if CONFIG_PGTABLE_LEVELS > 2
+typedef struct { unsigned long pmd; } pmd_t;
#define pmd_val(x) ((x).pmd)
#define __pmd(x) ((pmd_t) { (x) } )
-typedef unsigned long long phys_t;
+#if CONFIG_PGTABLE_LEVELS > 3
-#else
-
-typedef struct { unsigned long pte; } pte_t;
-typedef struct { unsigned long pgd; } pgd_t;
+typedef struct { unsigned long pud; } pud_t;
+#define pud_val(x) ((x).pud)
+#define __pud(x) ((pud_t) { (x) } )
-#ifdef CONFIG_3_LEVEL_PGTABLES
-typedef struct { unsigned long pmd; } pmd_t;
-#define pmd_val(x) ((x).pmd)
-#define __pmd(x) ((pmd_t) { (x) } )
-#endif
+#endif /* CONFIG_PGTABLE_LEVELS > 3 */
+#endif /* CONFIG_PGTABLE_LEVELS > 2 */
#define pte_val(x) ((x).pte)
-
#define pte_get_bits(p, bits) ((p).pte & (bits))
#define pte_set_bits(p, bits) ((p).pte |= (bits))
#define pte_clear_bits(p, bits) ((p).pte &= ~(bits))
#define pte_copy(to, from) ((to).pte = (from).pte)
-#define pte_is_zero(p) (!((p).pte & ~_PAGE_NEWPAGE))
+#define pte_is_zero(p) (!((p).pte & ~_PAGE_NEEDSYNC))
#define pte_set_val(p, phys, prot) (p).pte = (phys | pgprot_val(prot))
typedef unsigned long phys_t;
-#endif
-
typedef struct { unsigned long pgprot; } pgprot_t;
typedef struct page *pgtable_t;
diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
index de5e31c64793..04fb4e6969a4 100644
--- a/arch/um/include/asm/pgalloc.h
+++ b/arch/um/include/asm/pgalloc.h
@@ -31,7 +31,7 @@ do { \
tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \
} while (0)
-#ifdef CONFIG_3_LEVEL_PGTABLES
+#if CONFIG_PGTABLE_LEVELS > 2
#define __pmd_free_tlb(tlb, pmd, address) \
do { \
@@ -39,6 +39,15 @@ do { \
tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pmd)); \
} while (0)
+#if CONFIG_PGTABLE_LEVELS > 3
+
+#define __pud_free_tlb(tlb, pud, address) \
+do { \
+ pagetable_pud_dtor(virt_to_ptdesc(pud)); \
+ tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pud)); \
+} while (0)
+
+#endif
#endif
#endif
diff --git a/arch/um/include/asm/pgtable-2level.h b/arch/um/include/asm/pgtable-2level.h
index 8256ecc5b919..ab0c8dd86564 100644
--- a/arch/um/include/asm/pgtable-2level.h
+++ b/arch/um/include/asm/pgtable-2level.h
@@ -31,7 +31,7 @@
printk("%s:%d: bad pgd %p(%08lx).\n", __FILE__, __LINE__, &(e), \
pgd_val(e))
-static inline int pgd_newpage(pgd_t pgd) { return 0; }
+static inline int pgd_needsync(pgd_t pgd) { return 0; }
static inline void pgd_mkuptodate(pgd_t pgd) { }
#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-4level.h
index 8a5032ec231f..0d279caee93c 100644
--- a/arch/um/include/asm/pgtable-3level.h
+++ b/arch/um/include/asm/pgtable-4level.h
@@ -4,21 +4,25 @@
* Derived from include/asm-i386/pgtable.h
*/
-#ifndef __UM_PGTABLE_3LEVEL_H
-#define __UM_PGTABLE_3LEVEL_H
+#ifndef __UM_PGTABLE_4LEVEL_H
+#define __UM_PGTABLE_4LEVEL_H
-#include <asm-generic/pgtable-nopud.h>
+#include <asm-generic/pgtable-nop4d.h>
-/* PGDIR_SHIFT determines what a third-level page table entry can map */
+/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
-#ifdef CONFIG_64BIT
-#define PGDIR_SHIFT 30
-#else
-#define PGDIR_SHIFT 31
-#endif
+#define PGDIR_SHIFT 39
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
+/* PUD_SHIFT determines the size of the area a third-level page table can
+ * map
+ */
+
+#define PUD_SHIFT 30
+#define PUD_SIZE (1UL << PUD_SHIFT)
+#define PUD_MASK (~(PUD_SIZE-1))
+
/* PMD_SHIFT determines the size of the area a second-level page table can
* map
*/
@@ -32,13 +36,9 @@
*/
#define PTRS_PER_PTE 512
-#ifdef CONFIG_64BIT
#define PTRS_PER_PMD 512
+#define PTRS_PER_PUD 512
#define PTRS_PER_PGD 512
-#else
-#define PTRS_PER_PMD 1024
-#define PTRS_PER_PGD 1024
-#endif
#define USER_PTRS_PER_PGD ((TASK_SIZE + (PGDIR_SIZE - 1)) / PGDIR_SIZE)
@@ -48,11 +48,14 @@
#define pmd_ERROR(e) \
printk("%s:%d: bad pmd %p(%016lx).\n", __FILE__, __LINE__, &(e), \
pmd_val(e))
+#define pud_ERROR(e) \
+ printk("%s:%d: bad pud %p(%016lx).\n", __FILE__, __LINE__, &(e), \
+ pud_val(e))
#define pgd_ERROR(e) \
printk("%s:%d: bad pgd %p(%016lx).\n", __FILE__, __LINE__, &(e), \
pgd_val(e))
-#define pud_none(x) (!(pud_val(x) & ~_PAGE_NEWPAGE))
+#define pud_none(x) (!(pud_val(x) & ~_PAGE_NEEDSYNC))
#define pud_bad(x) ((pud_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
#define pud_present(x) (pud_val(x) & _PAGE_PRESENT)
#define pud_populate(mm, pud, pmd) \
@@ -60,23 +63,40 @@
#define set_pud(pudptr, pudval) (*(pudptr) = (pudval))
-static inline int pgd_newpage(pgd_t pgd)
+#define p4d_none(x) (!(p4d_val(x) & ~_PAGE_NEEDSYNC))
+#define p4d_bad(x) ((p4d_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
+#define p4d_present(x) (p4d_val(x) & _PAGE_PRESENT)
+#define p4d_populate(mm, p4d, pud) \
+ set_p4d(p4d, __p4d(_PAGE_TABLE + __pa(pud)))
+
+#define set_p4d(p4dptr, p4dval) (*(p4dptr) = (p4dval))
+
+
+static inline int pgd_needsync(pgd_t pgd)
{
- return(pgd_val(pgd) & _PAGE_NEWPAGE);
+ return pgd_val(pgd) & _PAGE_NEEDSYNC;
}
-static inline void pgd_mkuptodate(pgd_t pgd) { pgd_val(pgd) &= ~_PAGE_NEWPAGE; }
+static inline void pgd_mkuptodate(pgd_t pgd) { pgd_val(pgd) &= ~_PAGE_NEEDSYNC; }
#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
static inline void pud_clear (pud_t *pud)
{
- set_pud(pud, __pud(_PAGE_NEWPAGE));
+ set_pud(pud, __pud(_PAGE_NEEDSYNC));
+}
+
+static inline void p4d_clear (p4d_t *p4d)
+{
+ set_p4d(p4d, __p4d(_PAGE_NEEDSYNC));
}
#define pud_page(pud) phys_to_page(pud_val(pud) & PAGE_MASK)
#define pud_pgtable(pud) ((pmd_t *) __va(pud_val(pud) & PAGE_MASK))
+#define p4d_page(p4d) phys_to_page(p4d_val(p4d) & PAGE_MASK)
+#define p4d_pgtable(p4d) ((pud_t *) __va(p4d_val(p4d) & PAGE_MASK))
+
static inline unsigned long pte_pfn(pte_t pte)
{
return phys_to_pfn(pte_val(pte));
@@ -97,4 +117,3 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
}
#endif
-
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index faab5a2a4b06..0bd60afcc37d 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -11,8 +11,7 @@
#include <asm/fixmap.h>
#define _PAGE_PRESENT 0x001
-#define _PAGE_NEWPAGE 0x002
-#define _PAGE_NEWPROT 0x004
+#define _PAGE_NEEDSYNC 0x002
#define _PAGE_RW 0x020
#define _PAGE_USER 0x040
#define _PAGE_ACCESSED 0x080
@@ -24,10 +23,12 @@
/* We borrow bit 10 to store the exclusive marker in swap PTEs. */
#define _PAGE_SWP_EXCLUSIVE 0x400
-#ifdef CONFIG_3_LEVEL_PGTABLES
-#include <asm/pgtable-3level.h>
-#else
+#if CONFIG_PGTABLE_LEVELS == 4
+#include <asm/pgtable-4level.h>
+#elif CONFIG_PGTABLE_LEVELS == 2
#include <asm/pgtable-2level.h>
+#else
+#error "Unsupported number of page table levels"
#endif
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
@@ -78,22 +79,22 @@ extern unsigned long end_iomem;
*/
#define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page)
-#define pte_clear(mm,addr,xp) pte_set_val(*(xp), (phys_t) 0, __pgprot(_PAGE_NEWPAGE))
+#define pte_clear(mm, addr, xp) pte_set_val(*(xp), (phys_t) 0, __pgprot(_PAGE_NEEDSYNC))
-#define pmd_none(x) (!((unsigned long)pmd_val(x) & ~_PAGE_NEWPAGE))
+#define pmd_none(x) (!((unsigned long)pmd_val(x) & ~_PAGE_NEEDSYNC))
#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
-#define pmd_clear(xp) do { pmd_val(*(xp)) = _PAGE_NEWPAGE; } while (0)
+#define pmd_clear(xp) do { pmd_val(*(xp)) = _PAGE_NEEDSYNC; } while (0)
-#define pmd_newpage(x) (pmd_val(x) & _PAGE_NEWPAGE)
-#define pmd_mkuptodate(x) (pmd_val(x) &= ~_PAGE_NEWPAGE)
+#define pmd_needsync(x) (pmd_val(x) & _PAGE_NEEDSYNC)
+#define pmd_mkuptodate(x) (pmd_val(x) &= ~_PAGE_NEEDSYNC)
-#define pud_newpage(x) (pud_val(x) & _PAGE_NEWPAGE)
-#define pud_mkuptodate(x) (pud_val(x) &= ~_PAGE_NEWPAGE)
+#define pud_needsync(x) (pud_val(x) & _PAGE_NEEDSYNC)
+#define pud_mkuptodate(x) (pud_val(x) &= ~_PAGE_NEEDSYNC)
-#define p4d_newpage(x) (p4d_val(x) & _PAGE_NEWPAGE)
-#define p4d_mkuptodate(x) (p4d_val(x) &= ~_PAGE_NEWPAGE)
+#define p4d_needsync(x) (p4d_val(x) & _PAGE_NEEDSYNC)
+#define p4d_mkuptodate(x) (p4d_val(x) &= ~_PAGE_NEEDSYNC)
#define pmd_pfn(pmd) (pmd_val(pmd) >> PAGE_SHIFT)
#define pmd_page(pmd) phys_to_page(pmd_val(pmd) & PAGE_MASK)
@@ -144,14 +145,9 @@ static inline int pte_young(pte_t pte)
return pte_get_bits(pte, _PAGE_ACCESSED);
}
-static inline int pte_newpage(pte_t pte)
+static inline int pte_needsync(pte_t pte)
{
- return pte_get_bits(pte, _PAGE_NEWPAGE);
-}
-
-static inline int pte_newprot(pte_t pte)
-{
- return(pte_present(pte) && (pte_get_bits(pte, _PAGE_NEWPROT)));
+ return pte_get_bits(pte, _PAGE_NEEDSYNC);
}
/*
@@ -160,12 +156,6 @@ static inline int pte_newprot(pte_t pte)
* =================================
*/
-static inline pte_t pte_mknewprot(pte_t pte)
-{
- pte_set_bits(pte, _PAGE_NEWPROT);
- return(pte);
-}
-
static inline pte_t pte_mkclean(pte_t pte)
{
pte_clear_bits(pte, _PAGE_DIRTY);
@@ -180,19 +170,14 @@ static inline pte_t pte_mkold(pte_t pte)
static inline pte_t pte_wrprotect(pte_t pte)
{
- if (likely(pte_get_bits(pte, _PAGE_RW)))
- pte_clear_bits(pte, _PAGE_RW);
- else
- return pte;
- return(pte_mknewprot(pte));
+ pte_clear_bits(pte, _PAGE_RW);
+ return pte;
}
static inline pte_t pte_mkread(pte_t pte)
{
- if (unlikely(pte_get_bits(pte, _PAGE_USER)))
- return pte;
pte_set_bits(pte, _PAGE_USER);
- return(pte_mknewprot(pte));
+ return pte;
}
static inline pte_t pte_mkdirty(pte_t pte)
@@ -209,23 +194,19 @@ static inline pte_t pte_mkyoung(pte_t pte)
static inline pte_t pte_mkwrite_novma(pte_t pte)
{
- if (unlikely(pte_get_bits(pte, _PAGE_RW)))
- return pte;
pte_set_bits(pte, _PAGE_RW);
- return(pte_mknewprot(pte));
+ return pte;
}
static inline pte_t pte_mkuptodate(pte_t pte)
{
- pte_clear_bits(pte, _PAGE_NEWPAGE);
- if(pte_present(pte))
- pte_clear_bits(pte, _PAGE_NEWPROT);
- return(pte);
+ pte_clear_bits(pte, _PAGE_NEEDSYNC);
+ return pte;
}
-static inline pte_t pte_mknewpage(pte_t pte)
+static inline pte_t pte_mkneedsync(pte_t pte)
{
- pte_set_bits(pte, _PAGE_NEWPAGE);
+ pte_set_bits(pte, _PAGE_NEEDSYNC);
return(pte);
}
@@ -233,13 +214,11 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval)
{
pte_copy(*pteptr, pteval);
- /* If it's a swap entry, it needs to be marked _PAGE_NEWPAGE so
- * fix_range knows to unmap it. _PAGE_NEWPROT is specific to
- * mapped pages.
+ /* If it's a swap entry, it needs to be marked _PAGE_NEEDSYNC so
+ * update_pte_range knows to unmap it.
*/
- *pteptr = pte_mknewpage(*pteptr);
- if(pte_present(*pteptr)) *pteptr = pte_mknewprot(*pteptr);
+ *pteptr = pte_mkneedsync(*pteptr);
}
#define PFN_PTE_SHIFT PAGE_SHIFT
@@ -279,7 +258,7 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
#define __HAVE_ARCH_PTE_SAME
static inline int pte_same(pte_t pte_a, pte_t pte_b)
{
- return !((pte_val(pte_a) ^ pte_val(pte_b)) & ~_PAGE_NEWPAGE);
+ return !((pte_val(pte_a) ^ pte_val(pte_b)) & ~_PAGE_NEEDSYNC);
}
/*
@@ -294,8 +273,6 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b)
({ pte_t pte; \
\
pte_set_val(pte, page_to_phys(page), (pgprot)); \
- if (pte_present(pte)) \
- pte_mknewprot(pte_mknewpage(pte)); \
pte;})
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
@@ -329,7 +306,7 @@ extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr);
* <--------------- offset ----------------> E < type -> 0 0 0 1 0
*
* E is the exclusive marker that is not stored in swap entries.
- * _PAGE_NEWPAGE (bit 1) is always set to 1 in set_pte().
+ * _PAGE_NEEDSYNC (bit 1) is always set to 1 in set_pte().
*/
#define __swp_type(x) (((x).val >> 5) & 0x1f)
#define __swp_offset(x) ((x).val >> 11)
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index bce4595798da..5d6356eafffe 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -20,10 +20,7 @@ struct task_struct;
struct mm_struct;
struct thread_struct {
- struct pt_regs regs;
struct pt_regs *segv_regs;
- void *fault_addr;
- jmp_buf *fault_catcher;
struct task_struct *prev_sched;
struct arch_thread arch;
jmp_buf switch_buf;
@@ -33,12 +30,14 @@ struct thread_struct {
void *arg;
} thread;
} request;
+
+ /* Contains variable sized FP registers */
+ struct pt_regs regs;
};
#define INIT_THREAD \
{ \
.regs = EMPTY_REGS, \
- .fault_addr = NULL, \
.prev_sched = NULL, \
.arch = INIT_ARCH_THREAD, \
.request = { } \
diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h
index c7b4b49826a2..f9ad06fcc991 100644
--- a/arch/um/include/asm/thread_info.h
+++ b/arch/um/include/asm/thread_info.h
@@ -17,35 +17,17 @@
#include <sysdep/ptrace_user.h>
struct thread_info {
- struct task_struct *task; /* main task structure */
unsigned long flags; /* low level flags */
__u32 cpu; /* current CPU */
int preempt_count; /* 0 => preemptable,
<0 => BUG */
- struct thread_info *real_thread; /* Points to non-IRQ stack */
- unsigned long aux_fp_regs[FP_SIZE]; /* auxiliary fp_regs to save/restore
- them out-of-band */
};
#define INIT_THREAD_INFO(tsk) \
{ \
- .task = &tsk, \
.flags = 0, \
.cpu = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
- .real_thread = NULL, \
-}
-
-/* how to get the thread information struct from C */
-static inline struct thread_info *current_thread_info(void)
-{
- struct thread_info *ti;
- unsigned long mask = THREAD_SIZE - 1;
- void *p;
-
- asm volatile ("" : "=r" (p) : "0" (&ti));
- ti = (struct thread_info *) (((unsigned long)p) & ~mask);
- return ti;
}
#endif
diff --git a/arch/um/include/asm/tlbflush.h b/arch/um/include/asm/tlbflush.h
index db997976b6ea..13a3009942be 100644
--- a/arch/um/include/asm/tlbflush.h
+++ b/arch/um/include/asm/tlbflush.h
@@ -9,8 +9,8 @@
#include <linux/mm.h>
/*
- * In UML, we need to sync the TLB over by using mmap/munmap/mprotect syscalls
- * from the process handling the MM (which can be the kernel itself).
+ * In UML, we need to sync the TLB over by using mmap/munmap syscalls from
+ * the process handling the MM (which can be the kernel itself).
*
* To track updates, we can hook into set_ptes and flush_tlb_*. With set_ptes
* we catch all PTE transitions where memory that was unusable becomes usable.
diff --git a/arch/um/include/shared/as-layout.h b/arch/um/include/shared/as-layout.h
index 06292fca5a4d..ea65f151bf48 100644
--- a/arch/um/include/shared/as-layout.h
+++ b/arch/um/include/shared/as-layout.h
@@ -30,25 +30,23 @@
#include <sysdep/ptrace.h>
-struct cpu_task {
- void *task;
-};
+struct task_struct;
+extern struct task_struct *cpu_tasks[];
-extern struct cpu_task cpu_tasks[];
+extern unsigned long long physmem_size;
extern unsigned long high_physmem;
extern unsigned long uml_physmem;
extern unsigned long uml_reserved;
extern unsigned long end_vm;
extern unsigned long start_vm;
-extern unsigned long long highmem;
extern unsigned long brk_start;
extern unsigned long host_task_size;
extern unsigned long stub_start;
-extern int linux_main(int argc, char **argv);
+extern int linux_main(int argc, char **argv, char **envp);
extern void uml_finishsetup(void);
struct siginfo;
diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h
index 579ed946a3a9..73f3a4792ed8 100644
--- a/arch/um/include/shared/common-offsets.h
+++ b/arch/um/include/shared/common-offsets.h
@@ -6,7 +6,6 @@ DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE);
DEFINE(UM_KERN_PAGE_SIZE, PAGE_SIZE);
DEFINE(UM_KERN_PAGE_MASK, PAGE_MASK);
DEFINE(UM_KERN_PAGE_SHIFT, PAGE_SHIFT);
-DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC);
DEFINE(UM_GFP_KERNEL, GFP_KERNEL);
DEFINE(UM_GFP_ATOMIC, GFP_ATOMIC);
@@ -15,17 +14,3 @@ DEFINE(UM_THREAD_SIZE, THREAD_SIZE);
DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC);
DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC);
-
-#ifdef CONFIG_PRINTK
-DEFINE(UML_CONFIG_PRINTK, CONFIG_PRINTK);
-#endif
-#ifdef CONFIG_UML_X86
-DEFINE(UML_CONFIG_UML_X86, CONFIG_UML_X86);
-#endif
-#ifdef CONFIG_64BIT
-DEFINE(UML_CONFIG_64BIT, CONFIG_64BIT);
-#endif
-#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
-DEFINE(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT, CONFIG_UML_TIME_TRAVEL_SUPPORT);
-#endif
-
diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h
index d8ffd2db168e..f21dc8517538 100644
--- a/arch/um/include/shared/kern_util.h
+++ b/arch/um/include/shared/kern_util.h
@@ -60,7 +60,6 @@ extern unsigned long from_irq_stack(int nested);
extern int singlestepping(void);
extern void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
-extern void bus_handler(int sig, struct siginfo *si, struct uml_pt_regs *regs);
extern void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
extern void fatal_sigsegv(void) __attribute__ ((noreturn));
diff --git a/arch/um/include/shared/mem_user.h b/arch/um/include/shared/mem_user.h
index 11a723a58545..adfa08062f88 100644
--- a/arch/um/include/shared/mem_user.h
+++ b/arch/um/include/shared/mem_user.h
@@ -47,10 +47,9 @@ extern int iomem_size;
#define ROUND_4M(n) ((((unsigned long) (n)) + (1 << 22)) & ~((1 << 22) - 1))
extern unsigned long find_iomem(char *driver, unsigned long *len_out);
-extern void mem_total_pages(unsigned long physmem, unsigned long iomem,
- unsigned long highmem);
+extern void mem_total_pages(unsigned long physmem, unsigned long iomem);
extern void setup_physmem(unsigned long start, unsigned long usable,
- unsigned long len, unsigned long long highmem);
+ unsigned long len);
extern void map_memory(unsigned long virt, unsigned long phys,
unsigned long len, int r, int w, int x);
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 9a039d6f1f74..5babad8c5f75 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -145,7 +145,6 @@ extern int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg);
extern int os_get_ifname(int fd, char *namebuf);
extern int os_set_slip(int fd);
extern int os_mode_fd(int fd, int mode);
-extern int os_fsync_file(int fd);
extern int os_seek_file(int fd, unsigned long long offset);
extern int os_open_file(const char *file, struct openflags flags, int mode);
@@ -199,15 +198,11 @@ extern int create_mem_file(unsigned long long len);
extern void report_enomem(void);
/* process.c */
-extern unsigned long os_process_pc(int pid);
-extern int os_process_parent(int pid);
extern void os_alarm_process(int pid);
-extern void os_stop_process(int pid);
extern void os_kill_process(int pid, int reap_child);
extern void os_kill_ptraced_process(int pid, int reap_child);
extern int os_getpid(void);
-extern int os_getpgrp(void);
extern void init_new_thread_signals(void);
@@ -220,6 +215,8 @@ extern int os_drop_memory(void *addr, int length);
extern int can_drop_memory(void);
extern int os_mincore(void *addr, unsigned long len);
+void os_set_pdeathsig(void);
+
/* execvp.c */
extern int execvp_noalloc(char *buf, const char *file, char *const argv[]);
/* helper.c */
@@ -244,7 +241,6 @@ extern void block_signals(void);
extern void unblock_signals(void);
extern int um_set_signals(int enable);
extern int um_set_signals_trace(int enable);
-extern int os_is_signal_stack(void);
extern void deliver_alarm(void);
extern void register_pm_wake_signal(void);
extern void block_signals_hard(void);
@@ -283,13 +279,11 @@ int map(struct mm_id *mm_idp, unsigned long virt,
unsigned long len, int prot, int phys_fd,
unsigned long long offset);
int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len);
-int protect(struct mm_id *mm_idp, unsigned long addr,
- unsigned long len, unsigned int prot);
/* skas/process.c */
extern int is_skas_winch(int pid, int fd, void *data);
extern int start_userspace(unsigned long stub_stack);
-extern void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs);
+extern void userspace(struct uml_pt_regs *regs);
extern void new_thread(void *stack, jmp_buf *buf, void (*handler)(void));
extern void switch_threads(jmp_buf *me, jmp_buf *you);
extern int start_idle_thread(void *stack, jmp_buf *switch_buf);
@@ -329,9 +323,6 @@ extern int __ignore_sigio_fd(int fd);
/* tty.c */
extern int get_pty(void);
-/* sys-$ARCH/task_size.c */
-extern unsigned long os_get_top_address(void);
-
long syscall(long number, ...);
/* irqflags tracing */
diff --git a/arch/um/include/shared/registers.h b/arch/um/include/shared/registers.h
index a0450326521c..7d81b2339a48 100644
--- a/arch/um/include/shared/registers.h
+++ b/arch/um/include/shared/registers.h
@@ -8,12 +8,6 @@
#include <sysdep/ptrace.h>
-extern int save_i387_registers(int pid, unsigned long *fp_regs);
-extern int restore_i387_registers(int pid, unsigned long *fp_regs);
-extern int save_fp_registers(int pid, unsigned long *fp_regs);
-extern int restore_fp_registers(int pid, unsigned long *fp_regs);
-extern int save_fpx_registers(int pid, unsigned long *fp_regs);
-extern int restore_fpx_registers(int pid, unsigned long *fp_regs);
extern int init_pid_registers(int pid);
extern void get_safe_registers(unsigned long *regs, unsigned long *fp_regs);
extern int get_fp_registers(int pid, unsigned long *regs);
diff --git a/arch/um/include/shared/skas/stub-data.h b/arch/um/include/shared/skas/stub-data.h
index 2b6b44759dfa..81a4cace032c 100644
--- a/arch/um/include/shared/skas/stub-data.h
+++ b/arch/um/include/shared/skas/stub-data.h
@@ -12,6 +12,17 @@
#include <as-layout.h>
#include <sysdep/tls.h>
+struct stub_init_data {
+ unsigned long stub_start;
+
+ int stub_code_fd;
+ unsigned long stub_code_offset;
+ int stub_data_fd;
+ unsigned long stub_data_offset;
+
+ unsigned long segv_handler;
+};
+
#define STUB_NEXT_SYSCALL(s) \
((struct stub_syscall *) (((unsigned long) s) + (s)->cmd_len))
@@ -19,7 +30,6 @@ enum stub_syscall_type {
STUB_SYSCALL_UNSET = 0,
STUB_SYSCALL_MMAP,
STUB_SYSCALL_MUNMAP,
- STUB_SYSCALL_MPROTECT,
};
struct stub_syscall {
diff --git a/arch/um/include/shared/timetravel.h b/arch/um/include/shared/timetravel.h
index c8db2f213dba..7c2b277b7eb0 100644
--- a/arch/um/include/shared/timetravel.h
+++ b/arch/um/include/shared/timetravel.h
@@ -12,14 +12,13 @@ enum time_travel_mode {
TT_MODE_EXTERNAL,
};
-#if defined(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT) || \
- defined(CONFIG_UML_TIME_TRAVEL_SUPPORT)
+#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT)
extern enum time_travel_mode time_travel_mode;
extern int time_travel_should_print_bc_msg;
#else
#define time_travel_mode TT_MODE_OFF
#define time_travel_should_print_bc_msg 0
-#endif /* (UML_)CONFIG_UML_TIME_TRAVEL_SUPPORT */
+#endif /* CONFIG_UML_TIME_TRAVEL_SUPPORT */
void _time_travel_print_bc_msg(void);
static inline void time_travel_print_bc_msg(void)
diff --git a/arch/um/include/shared/user.h b/arch/um/include/shared/user.h
index bbab79c0c074..139eb78a4767 100644
--- a/arch/um/include/shared/user.h
+++ b/arch/um/include/shared/user.h
@@ -38,7 +38,7 @@ extern void panic(const char *fmt, ...)
#define UM_KERN_DEBUG KERN_DEBUG
#define UM_KERN_CONT KERN_CONT
-#ifdef UML_CONFIG_PRINTK
+#if IS_ENABLED(CONFIG_PRINTK)
#define printk(...) _printk(__VA_ARGS__)
extern int _printk(const char *fmt, ...)
__attribute__ ((format (printf, 1, 2)));
diff --git a/arch/um/kernel/dtb.c b/arch/um/kernel/dtb.c
index 8d78ced9e08f..15c342426489 100644
--- a/arch/um/kernel/dtb.c
+++ b/arch/um/kernel/dtb.c
@@ -31,6 +31,7 @@ void uml_dtb_init(void)
static int __init uml_dtb_setup(char *line, int *add)
{
+ *add = 0;
dtb = line;
return 0;
}
diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S
index 3385d653ebd0..a36b7918a011 100644
--- a/arch/um/kernel/dyn.lds.S
+++ b/arch/um/kernel/dyn.lds.S
@@ -116,8 +116,6 @@ SECTIONS
.fini_array : { *(.fini_array) }
.data : {
INIT_TASK_DATA(KERNEL_STACK_SIZE)
- . = ALIGN(KERNEL_STACK_SIZE);
- *(.data..init_irqstack)
DATA_DATA
*(.data.* .gnu.linkonce.d.*)
SORT(CONSTRUCTORS)
@@ -178,3 +176,6 @@ SECTIONS
DISCARDS
}
+
+ASSERT(__syscall_stub_end - __syscall_stub_start <= PAGE_SIZE,
+ "STUB code must not be larger than one page");
diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c
index 47b8cb1a1156..99dba827461c 100644
--- a/arch/um/kernel/initrd.c
+++ b/arch/um/kernel/initrd.c
@@ -34,6 +34,7 @@ int __init read_initrd(void)
static int __init uml_initrd_setup(char *line, int *add)
{
+ *add = 0;
initrd = line;
return 0;
}
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 534e91797f89..338450741aac 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -674,115 +674,3 @@ void __init init_IRQ(void)
/* Initialize EPOLL Loop */
os_setup_epoll();
}
-
-/*
- * IRQ stack entry and exit:
- *
- * Unlike i386, UML doesn't receive IRQs on the normal kernel stack
- * and switch over to the IRQ stack after some preparation. We use
- * sigaltstack to receive signals on a separate stack from the start.
- * These two functions make sure the rest of the kernel won't be too
- * upset by being on a different stack. The IRQ stack has a
- * thread_info structure at the bottom so that current et al continue
- * to work.
- *
- * to_irq_stack copies the current task's thread_info to the IRQ stack
- * thread_info and sets the tasks's stack to point to the IRQ stack.
- *
- * from_irq_stack copies the thread_info struct back (flags may have
- * been modified) and resets the task's stack pointer.
- *
- * Tricky bits -
- *
- * What happens when two signals race each other? UML doesn't block
- * signals with sigprocmask, SA_DEFER, or sa_mask, so a second signal
- * could arrive while a previous one is still setting up the
- * thread_info.
- *
- * There are three cases -
- * The first interrupt on the stack - sets up the thread_info and
- * handles the interrupt
- * A nested interrupt interrupting the copying of the thread_info -
- * can't handle the interrupt, as the stack is in an unknown state
- * A nested interrupt not interrupting the copying of the
- * thread_info - doesn't do any setup, just handles the interrupt
- *
- * The first job is to figure out whether we interrupted stack setup.
- * This is done by xchging the signal mask with thread_info->pending.
- * If the value that comes back is zero, then there is no setup in
- * progress, and the interrupt can be handled. If the value is
- * non-zero, then there is stack setup in progress. In order to have
- * the interrupt handled, we leave our signal in the mask, and it will
- * be handled by the upper handler after it has set up the stack.
- *
- * Next is to figure out whether we are the outer handler or a nested
- * one. As part of setting up the stack, thread_info->real_thread is
- * set to non-NULL (and is reset to NULL on exit). This is the
- * nesting indicator. If it is non-NULL, then the stack is already
- * set up and the handler can run.
- */
-
-static unsigned long pending_mask;
-
-unsigned long to_irq_stack(unsigned long *mask_out)
-{
- struct thread_info *ti;
- unsigned long mask, old;
- int nested;
-
- mask = xchg(&pending_mask, *mask_out);
- if (mask != 0) {
- /*
- * If any interrupts come in at this point, we want to
- * make sure that their bits aren't lost by our
- * putting our bit in. So, this loop accumulates bits
- * until xchg returns the same value that we put in.
- * When that happens, there were no new interrupts,
- * and pending_mask contains a bit for each interrupt
- * that came in.
- */
- old = *mask_out;
- do {
- old |= mask;
- mask = xchg(&pending_mask, old);
- } while (mask != old);
- return 1;
- }
-
- ti = current_thread_info();
- nested = (ti->real_thread != NULL);
- if (!nested) {
- struct task_struct *task;
- struct thread_info *tti;
-
- task = cpu_tasks[ti->cpu].task;
- tti = task_thread_info(task);
-
- *ti = *tti;
- ti->real_thread = tti;
- task->stack = ti;
- }
-
- mask = xchg(&pending_mask, 0);
- *mask_out |= mask | nested;
- return 0;
-}
-
-unsigned long from_irq_stack(int nested)
-{
- struct thread_info *ti, *to;
- unsigned long mask;
-
- ti = current_thread_info();
-
- pending_mask = 1;
-
- to = ti->real_thread;
- current->stack = to;
- ti->real_thread = NULL;
- *to = *ti;
-
- mask = xchg(&pending_mask, 0);
- return mask & ~1;
-}
-
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index a5b4fe2ad931..53248ed04771 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -6,7 +6,6 @@
#include <linux/stddef.h>
#include <linux/module.h>
#include <linux/memblock.h>
-#include <linux/highmem.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/slab.h>
@@ -51,8 +50,6 @@ EXPORT_SYMBOL(empty_zero_page);
pgd_t swapper_pg_dir[PTRS_PER_PGD];
/* Initialized at boot time, and readonly after that */
-unsigned long long highmem;
-EXPORT_SYMBOL(highmem);
int kmalloc_ok = 0;
/* Used during early boot */
@@ -98,7 +95,7 @@ static void __init one_page_table_init(pmd_t *pmd)
static void __init one_md_table_init(pud_t *pud)
{
-#ifdef CONFIG_3_LEVEL_PGTABLES
+#if CONFIG_PGTABLE_LEVELS > 2
pmd_t *pmd_table = (pmd_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
if (!pmd_table)
panic("%s: Failed to allocate %lu bytes align=%lx\n",
@@ -109,6 +106,19 @@ static void __init one_md_table_init(pud_t *pud)
#endif
}
+static void __init one_ud_table_init(p4d_t *p4d)
+{
+#if CONFIG_PGTABLE_LEVELS > 3
+ pud_t *pud_table = (pud_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
+ if (!pud_table)
+ panic("%s: Failed to allocate %lu bytes align=%lx\n",
+ __func__, PAGE_SIZE, PAGE_SIZE);
+
+ set_p4d(p4d, __p4d(_KERNPG_TABLE + (unsigned long) __pa(pud_table)));
+ BUG_ON(pud_table != pud_offset(p4d, 0));
+#endif
+}
+
static void __init fixrange_init(unsigned long start, unsigned long end,
pgd_t *pgd_base)
{
@@ -126,6 +136,8 @@ static void __init fixrange_init(unsigned long start, unsigned long end,
for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) {
p4d = p4d_offset(pgd, vaddr);
+ if (p4d_none(*p4d))
+ one_ud_table_init(p4d);
pud = pud_offset(p4d, vaddr);
if (pud_none(*pud))
one_md_table_init(pud);
diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
index fb2adfb49945..a74f17b033c4 100644
--- a/arch/um/kernel/physmem.c
+++ b/arch/um/kernel/physmem.c
@@ -22,19 +22,14 @@ static int physmem_fd = -1;
unsigned long high_physmem;
EXPORT_SYMBOL(high_physmem);
-extern unsigned long long physmem_size;
-
-void __init mem_total_pages(unsigned long physmem, unsigned long iomem,
- unsigned long highmem)
+void __init mem_total_pages(unsigned long physmem, unsigned long iomem)
{
- unsigned long phys_pages, highmem_pages;
- unsigned long iomem_pages, total_pages;
+ unsigned long phys_pages, iomem_pages, total_pages;
- phys_pages = physmem >> PAGE_SHIFT;
- iomem_pages = iomem >> PAGE_SHIFT;
- highmem_pages = highmem >> PAGE_SHIFT;
+ phys_pages = physmem >> PAGE_SHIFT;
+ iomem_pages = iomem >> PAGE_SHIFT;
- total_pages = phys_pages + iomem_pages + highmem_pages;
+ total_pages = phys_pages + iomem_pages;
max_mapnr = total_pages;
}
@@ -64,13 +59,12 @@ void map_memory(unsigned long virt, unsigned long phys, unsigned long len,
* @reserve_end: end address of the physical kernel memory.
* @len: Length of total physical memory that should be mapped/made
* available, in bytes.
- * @highmem: Number of highmem bytes that should be mapped/made available.
*
- * Creates an unlinked temporary file of size (len + highmem) and memory maps
+ * Creates an unlinked temporary file of size (len) and memory maps
* it on the last executable image address (uml_reserved).
*
* The offset is needed as the length of the total physical memory
- * (len + highmem) includes the size of the memory used be the executable image,
+ * (len) includes the size of the memory used be the executable image,
* but the mapped-to address is the last address of the executable image
* (uml_reserved == end address of executable image).
*
@@ -78,24 +72,24 @@ void map_memory(unsigned long virt, unsigned long phys, unsigned long len,
* of all user space processes/kernel tasks.
*/
void __init setup_physmem(unsigned long start, unsigned long reserve_end,
- unsigned long len, unsigned long long highmem)
+ unsigned long len)
{
unsigned long reserve = reserve_end - start;
- long map_size = len - reserve;
+ unsigned long map_size = len - reserve;
int err;
- if(map_size <= 0) {
+ if (len <= reserve) {
os_warn("Too few physical memory! Needed=%lu, given=%lu\n",
reserve, len);
exit(1);
}
- physmem_fd = create_mem_file(len + highmem);
+ physmem_fd = create_mem_file(len);
err = os_map_memory((void *) reserve_end, physmem_fd, reserve,
map_size, 1, 1, 1);
if (err < 0) {
- os_warn("setup_physmem - mapping %ld bytes of memory at 0x%p "
+ os_warn("setup_physmem - mapping %lu bytes of memory at 0x%p "
"failed - errno = %d\n", map_size,
(void *) reserve_end, err);
exit(1);
@@ -107,9 +101,8 @@ void __init setup_physmem(unsigned long start, unsigned long reserve_end,
*/
os_seek_file(physmem_fd, __pa(__syscall_stub_start));
os_write_file(physmem_fd, __syscall_stub_start, PAGE_SIZE);
- os_fsync_file(physmem_fd);
- memblock_add(__pa(start), len + highmem);
+ memblock_add(__pa(start), len);
memblock_reserve(__pa(start), reserve);
min_low_pfn = PFN_UP(__pa(reserve_end));
@@ -137,10 +130,6 @@ int phys_mapping(unsigned long phys, unsigned long long *offset_out)
region = region->next;
}
}
- else if (phys < __pa(end_iomem) + highmem) {
- fd = physmem_fd;
- *offset_out = phys - iomem_size;
- }
return fd;
}
@@ -149,6 +138,8 @@ EXPORT_SYMBOL(phys_mapping);
static int __init uml_mem_setup(char *line, int *add)
{
char *retptr;
+
+ *add = 0;
physmem_size = memparse(line,&retptr);
return 0;
}
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index be2856af6d4c..30bdc0a87dc8 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -43,7 +43,8 @@
* cares about its entry, so it's OK if another processor is modifying its
* entry.
*/
-struct cpu_task cpu_tasks[NR_CPUS] = { [0 ... NR_CPUS - 1] = { NULL } };
+struct task_struct *cpu_tasks[NR_CPUS];
+EXPORT_SYMBOL(cpu_tasks);
void free_stack(unsigned long stack, int order)
{
@@ -64,7 +65,7 @@ unsigned long alloc_stack(int order, int atomic)
static inline void set_current(struct task_struct *task)
{
- cpu_tasks[task_thread_info(task)->cpu] = ((struct cpu_task) { task });
+ cpu_tasks[task_thread_info(task)->cpu] = task;
}
struct task_struct *__switch_to(struct task_struct *from, struct task_struct *to)
@@ -116,7 +117,7 @@ void new_thread_handler(void)
* callback returns only if the kernel thread execs a process
*/
fn(arg);
- userspace(&current->thread.regs.regs, current_thread_info()->aux_fp_regs);
+ userspace(&current->thread.regs.regs);
}
/* Called magically, see new_thread_handler above */
@@ -133,7 +134,7 @@ static void fork_handler(void)
current->thread.prev_sched = NULL;
- userspace(&current->thread.regs.regs, current_thread_info()->aux_fp_regs);
+ userspace(&current->thread.regs.regs);
}
int copy_thread(struct task_struct * p, const struct kernel_clone_args *args)
@@ -187,6 +188,13 @@ void initial_thread_cb(void (*proc)(void *), void *arg)
kmalloc_ok = save_kmalloc_ok;
}
+int arch_dup_task_struct(struct task_struct *dst,
+ struct task_struct *src)
+{
+ memcpy(dst, src, arch_task_struct_size);
+ return 0;
+}
+
void um_idle_sleep(void)
{
if (time_travel_mode != TT_MODE_OFF)
@@ -287,11 +295,3 @@ unsigned long __get_wchan(struct task_struct *p)
return 0;
}
-
-int elf_core_copy_task_fpregs(struct task_struct *t, elf_fpregset_t *fpu)
-{
- int cpu = current_thread_info()->cpu;
-
- return save_i387_registers(userspace_pid[cpu], (unsigned long *) fpu);
-}
-
diff --git a/arch/um/kernel/skas/.gitignore b/arch/um/kernel/skas/.gitignore
new file mode 100644
index 000000000000..c3409ced0f38
--- /dev/null
+++ b/arch/um/kernel/skas/.gitignore
@@ -0,0 +1,2 @@
+stub_exe
+stub_exe.dbg
diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile
index 6f86d53e3d69..3384be42691f 100644
--- a/arch/um/kernel/skas/Makefile
+++ b/arch/um/kernel/skas/Makefile
@@ -3,14 +3,48 @@
# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
#
-obj-y := stub.o mmu.o process.o syscall.o uaccess.o
+obj-y := stub.o mmu.o process.o syscall.o uaccess.o \
+ stub_exe_embed.o
+
+# Stub executable
+
+stub_exe_objs-y := stub_exe.o
+
+stub_exe_objs := $(foreach F,$(stub_exe_objs-y),$(obj)/$F)
+
+# Object file containing the ELF executable
+$(obj)/stub_exe_embed.o: $(src)/stub_exe_embed.S $(obj)/stub_exe
+
+$(obj)/stub_exe.dbg: $(stub_exe_objs) FORCE
+ $(call if_changed,stub_exe)
+
+$(obj)/stub_exe: OBJCOPYFLAGS := -S
+$(obj)/stub_exe: $(obj)/stub_exe.dbg FORCE
+ $(call if_changed,objcopy)
+
+quiet_cmd_stub_exe = STUB_EXE $@
+ cmd_stub_exe = $(CC) -nostdlib -o $@ \
+ $(filter-out $(UM_GPROF_OPT) $(UM_GCOV_OPT),$(KBUILD_CFLAGS)) $(STUB_EXE_LDFLAGS) \
+ $(filter %.o,$^)
+
+STUB_EXE_LDFLAGS = -Wl,-n -static
+
+targets += stub_exe.dbg stub_exe $(stub_exe_objs-y)
+
+# end
# stub.o is in the stub, so it can't be built with profiling
# GCC hardened also auto-enables -fpic, but we need %ebx so it can't work ->
# disable it
CFLAGS_stub.o := $(CFLAGS_NO_HARDENING)
-UNPROFILE_OBJS := stub.o
+CFLAGS_stub_exe.o := $(CFLAGS_NO_HARDENING)
+
+# Clang will call memset() from __builtin_alloca() when stack variable
+# initialization is enabled, which is used in stub_exe.c.
+CFLAGS_stub_exe.o += $(call cc-option, -ftrivial-auto-var-init=uninitialized)
+
+UNPROFILE_OBJS := stub.o stub_exe.o
KCOV_INSTRUMENT := n
include $(srctree)/arch/um/scripts/Makefile.rules
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 886ed5e65674..0eb5a1d3ba70 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -40,35 +40,13 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm)
goto out_free;
}
- /*
- * Ensure the new MM is clean and nothing unwanted is mapped.
- *
- * TODO: We should clear the memory up to STUB_START to ensure there is
- * nothing mapped there, i.e. we (currently) have:
- *
- * |- user memory -|- unused -|- stub -|- unused -|
- * ^ TASK_SIZE ^ STUB_START
- *
- * Meaning we have two unused areas where we may still have valid
- * mappings from our internal clone(). That isn't really a problem as
- * userspace is not going to access them, but it is definitely not
- * correct.
- *
- * However, we are "lucky" and if rseq is configured, then on 32 bit
- * it will fall into the first empty range while on 64 bit it is going
- * to use an anonymous mapping in the second range. As such, things
- * continue to work for now as long as we don't start unmapping these
- * areas.
- *
- * Change this to STUB_START once we have a clean userspace.
- */
- unmap(new_id, 0, TASK_SIZE);
+ /* Ensure the new MM is clean and nothing unwanted is mapped */
+ unmap(new_id, 0, STUB_START);
return 0;
out_free:
- if (new_id->stack != 0)
- free_pages(new_id->stack, ilog2(STUB_DATA_PAGES));
+ free_pages(new_id->stack, ilog2(STUB_DATA_PAGES));
out:
return ret;
}
diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c
index 68657988c8d1..05dcdc057af9 100644
--- a/arch/um/kernel/skas/process.c
+++ b/arch/um/kernel/skas/process.c
@@ -22,15 +22,13 @@ static int __init start_kernel_proc(void *unused)
{
block_signals_trace();
- cpu_tasks[0].task = current;
-
start_kernel();
return 0;
}
extern int userspace_pid[];
-extern char cpu0_irqstack[];
+static char cpu0_irqstack[THREAD_SIZE] __aligned(THREAD_SIZE);
int __init start_uml(void)
{
diff --git a/arch/um/kernel/skas/stub.c b/arch/um/kernel/skas/stub.c
index 5d52ffa682dc..796fc266d3bb 100644
--- a/arch/um/kernel/skas/stub.c
+++ b/arch/um/kernel/skas/stub.c
@@ -35,16 +35,6 @@ static __always_inline int syscall_handler(struct stub_data *d)
return -1;
}
break;
- case STUB_SYSCALL_MPROTECT:
- res = stub_syscall3(__NR_mprotect,
- sc->mem.addr, sc->mem.length,
- sc->mem.prot);
- if (res) {
- d->err = res;
- d->syscall_data_len = i;
- return -1;
- }
- break;
default:
d->err = -95; /* EOPNOTSUPP */
d->syscall_data_len = i;
diff --git a/arch/um/kernel/skas/stub_exe.c b/arch/um/kernel/skas/stub_exe.c
new file mode 100644
index 000000000000..23c99b285e82
--- /dev/null
+++ b/arch/um/kernel/skas/stub_exe.c
@@ -0,0 +1,95 @@
+#include <sys/ptrace.h>
+#include <sys/prctl.h>
+#include <asm/unistd.h>
+#include <sysdep/stub.h>
+#include <stub-data.h>
+
+void _start(void);
+
+noinline static void real_init(void)
+{
+ struct stub_init_data init_data;
+ unsigned long res;
+ struct {
+ void *ss_sp;
+ int ss_flags;
+ size_t ss_size;
+ } stack = {
+ .ss_size = STUB_DATA_PAGES * UM_KERN_PAGE_SIZE,
+ };
+ struct {
+ void *sa_handler_;
+ unsigned long sa_flags;
+ void *sa_restorer;
+ unsigned long long sa_mask;
+ } sa = {
+ /* Need to set SA_RESTORER (but the handler never returns) */
+ .sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO | 0x04000000,
+ /* no need to mask any signals */
+ .sa_mask = 0,
+ };
+
+ /* set a nice name */
+ stub_syscall2(__NR_prctl, PR_SET_NAME, (unsigned long)"uml-userspace");
+
+ /* Make sure this process dies if the kernel dies */
+ stub_syscall2(__NR_prctl, PR_SET_PDEATHSIG, SIGKILL);
+
+ /* read information from STDIN and close it */
+ res = stub_syscall3(__NR_read, 0,
+ (unsigned long)&init_data, sizeof(init_data));
+ if (res != sizeof(init_data))
+ stub_syscall1(__NR_exit, 10);
+
+ stub_syscall1(__NR_close, 0);
+
+ /* map stub code + data */
+ res = stub_syscall6(STUB_MMAP_NR,
+ init_data.stub_start, UM_KERN_PAGE_SIZE,
+ PROT_READ | PROT_EXEC, MAP_FIXED | MAP_SHARED,
+ init_data.stub_code_fd, init_data.stub_code_offset);
+ if (res != init_data.stub_start)
+ stub_syscall1(__NR_exit, 11);
+
+ res = stub_syscall6(STUB_MMAP_NR,
+ init_data.stub_start + UM_KERN_PAGE_SIZE,
+ STUB_DATA_PAGES * UM_KERN_PAGE_SIZE,
+ PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED,
+ init_data.stub_data_fd, init_data.stub_data_offset);
+ if (res != init_data.stub_start + UM_KERN_PAGE_SIZE)
+ stub_syscall1(__NR_exit, 12);
+
+ /* setup signal stack inside stub data */
+ stack.ss_sp = (void *)init_data.stub_start + UM_KERN_PAGE_SIZE;
+ stub_syscall2(__NR_sigaltstack, (unsigned long)&stack, 0);
+
+ /* register SIGSEGV handler */
+ sa.sa_handler_ = (void *) init_data.segv_handler;
+ res = stub_syscall4(__NR_rt_sigaction, SIGSEGV, (unsigned long)&sa, 0,
+ sizeof(sa.sa_mask));
+ if (res != 0)
+ stub_syscall1(__NR_exit, 13);
+
+ stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0);
+
+ stub_syscall2(__NR_kill, stub_syscall0(__NR_getpid), SIGSTOP);
+
+ stub_syscall1(__NR_exit, 14);
+
+ __builtin_unreachable();
+}
+
+__attribute__((naked)) void _start(void)
+{
+ /*
+ * Since the stack after exec() starts at the top-most address,
+ * but that's exactly where we also want to map the stub data
+ * and code, this must:
+ * - push the stack by 1 code and STUB_DATA_PAGES data pages
+ * - call real_init()
+ * This way, real_init() can use the stack normally, while the
+ * original stack further down (higher address) will become
+ * inaccessible after the mmap() calls above.
+ */
+ stub_start(real_init);
+}
diff --git a/arch/um/kernel/skas/stub_exe_embed.S b/arch/um/kernel/skas/stub_exe_embed.S
new file mode 100644
index 000000000000..6d8914fbe8f1
--- /dev/null
+++ b/arch/um/kernel/skas/stub_exe_embed.S
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/init.h>
+#include <linux/linkage.h>
+
+__INITDATA
+
+SYM_DATA_START(stub_exe_start)
+ .incbin "arch/um/kernel/skas/stub_exe"
+SYM_DATA_END_LABEL(stub_exe_start, SYM_L_GLOBAL, stub_exe_end)
+
+__FINIT
diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c
index 4bb8622dc512..13ee5666668d 100644
--- a/arch/um/kernel/sysrq.c
+++ b/arch/um/kernel/sysrq.c
@@ -32,12 +32,6 @@ void show_stack(struct task_struct *task, unsigned long *stack,
struct pt_regs *segv_regs = current->thread.segv_regs;
int i;
- if (!segv_regs && os_is_signal_stack()) {
- pr_err("Received SIGSEGV in SIGSEGV handler,"
- " aborting stack trace!\n");
- return;
- }
-
if (!stack)
stack = get_stack_pointer(task, segv_regs);
@@ -52,5 +46,5 @@ void show_stack(struct task_struct *task, unsigned long *stack,
}
printk("%sCall Trace:\n", loglvl);
- dump_trace(current, &stackops, (void *)loglvl);
+ dump_trace(task ?: current, &stackops, (void *)loglvl);
}
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 29b27b90581f..1394568c0210 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -25,6 +25,8 @@
#include <shared/init.h>
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+#include <linux/sched/clock.h>
+
enum time_travel_mode time_travel_mode;
EXPORT_SYMBOL_GPL(time_travel_mode);
@@ -47,6 +49,15 @@ static u16 time_travel_shm_id;
static struct um_timetravel_schedshm *time_travel_shm;
static union um_timetravel_schedshm_client *time_travel_shm_client;
+unsigned long tt_extra_sched_jiffies;
+
+notrace unsigned long long sched_clock(void)
+{
+ return (unsigned long long)(jiffies - INITIAL_JIFFIES +
+ tt_extra_sched_jiffies)
+ * (NSEC_PER_SEC / HZ);
+}
+
static void time_travel_set_time(unsigned long long ns)
{
if (unlikely(ns < time_travel_time))
@@ -443,6 +454,11 @@ static void time_travel_periodic_timer(struct time_travel_event *e)
{
time_travel_add_event(&time_travel_timer_event,
time_travel_time + time_travel_timer_interval);
+
+ /* clock tick; decrease extra jiffies by keeping sched_clock constant */
+ if (tt_extra_sched_jiffies > 0)
+ tt_extra_sched_jiffies -= 1;
+
deliver_alarm();
}
@@ -594,6 +610,10 @@ EXPORT_SYMBOL_GPL(time_travel_add_irq_event);
static void time_travel_oneshot_timer(struct time_travel_event *e)
{
+ /* clock tick; decrease extra jiffies by keeping sched_clock constant */
+ if (tt_extra_sched_jiffies > 0)
+ tt_extra_sched_jiffies -= 1;
+
deliver_alarm();
}
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 548af31d4111..cf7e0d4407f2 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -23,9 +23,6 @@ struct vm_ops {
int phys_fd, unsigned long long offset);
int (*unmap)(struct mm_id *mm_idp,
unsigned long virt, unsigned long len);
- int (*mprotect)(struct mm_id *mm_idp,
- unsigned long virt, unsigned long len,
- unsigned int prot);
};
static int kern_map(struct mm_id *mm_idp,
@@ -44,15 +41,6 @@ static int kern_unmap(struct mm_id *mm_idp,
return os_unmap_memory((void *)virt, len);
}
-static int kern_mprotect(struct mm_id *mm_idp,
- unsigned long virt, unsigned long len,
- unsigned int prot)
-{
- return os_protect_memory((void *)virt, len,
- prot & UM_PROT_READ, prot & UM_PROT_WRITE,
- 1);
-}
-
void report_enomem(void)
{
printk(KERN_ERR "UML ran out of memory on the host side! "
@@ -65,33 +53,37 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
struct vm_ops *ops)
{
pte_t *pte;
- int r, w, x, prot, ret = 0;
+ int ret = 0;
pte = pte_offset_kernel(pmd, addr);
do {
- r = pte_read(*pte);
- w = pte_write(*pte);
- x = pte_exec(*pte);
- if (!pte_young(*pte)) {
- r = 0;
- w = 0;
- } else if (!pte_dirty(*pte))
- w = 0;
-
- prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
- (x ? UM_PROT_EXEC : 0));
- if (pte_newpage(*pte)) {
- if (pte_present(*pte)) {
- __u64 offset;
- unsigned long phys = pte_val(*pte) & PAGE_MASK;
- int fd = phys_mapping(phys, &offset);
-
- ret = ops->mmap(ops->mm_idp, addr, PAGE_SIZE,
- prot, fd, offset);
- } else
- ret = ops->unmap(ops->mm_idp, addr, PAGE_SIZE);
- } else if (pte_newprot(*pte))
- ret = ops->mprotect(ops->mm_idp, addr, PAGE_SIZE, prot);
+ if (!pte_needsync(*pte))
+ continue;
+
+ if (pte_present(*pte)) {
+ __u64 offset;
+ unsigned long phys = pte_val(*pte) & PAGE_MASK;
+ int fd = phys_mapping(phys, &offset);
+ int r, w, x, prot;
+
+ r = pte_read(*pte);
+ w = pte_write(*pte);
+ x = pte_exec(*pte);
+ if (!pte_young(*pte)) {
+ r = 0;
+ w = 0;
+ } else if (!pte_dirty(*pte))
+ w = 0;
+
+ prot = (r ? UM_PROT_READ : 0) |
+ (w ? UM_PROT_WRITE : 0) |
+ (x ? UM_PROT_EXEC : 0);
+
+ ret = ops->mmap(ops->mm_idp, addr, PAGE_SIZE,
+ prot, fd, offset);
+ } else
+ ret = ops->unmap(ops->mm_idp, addr, PAGE_SIZE);
+
*pte = pte_mkuptodate(*pte);
} while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret));
return ret;
@@ -109,7 +101,7 @@ static inline int update_pmd_range(pud_t *pud, unsigned long addr,
do {
next = pmd_addr_end(addr, end);
if (!pmd_present(*pmd)) {
- if (pmd_newpage(*pmd)) {
+ if (pmd_needsync(*pmd)) {
ret = ops->unmap(ops->mm_idp, addr,
next - addr);
pmd_mkuptodate(*pmd);
@@ -132,7 +124,7 @@ static inline int update_pud_range(p4d_t *p4d, unsigned long addr,
do {
next = pud_addr_end(addr, end);
if (!pud_present(*pud)) {
- if (pud_newpage(*pud)) {
+ if (pud_needsync(*pud)) {
ret = ops->unmap(ops->mm_idp, addr,
next - addr);
pud_mkuptodate(*pud);
@@ -155,7 +147,7 @@ static inline int update_p4d_range(pgd_t *pgd, unsigned long addr,
do {
next = p4d_addr_end(addr, end);
if (!p4d_present(*p4d)) {
- if (p4d_newpage(*p4d)) {
+ if (p4d_needsync(*p4d)) {
ret = ops->unmap(ops->mm_idp, addr,
next - addr);
p4d_mkuptodate(*p4d);
@@ -180,18 +172,16 @@ int um_tlb_sync(struct mm_struct *mm)
if (mm == &init_mm) {
ops.mmap = kern_map;
ops.unmap = kern_unmap;
- ops.mprotect = kern_mprotect;
} else {
ops.mmap = map;
ops.unmap = unmap;
- ops.mprotect = protect;
}
pgd = pgd_offset(mm, addr);
do {
next = pgd_addr_end(addr, mm->context.sync_tlb_range_to);
if (!pgd_present(*pgd)) {
- if (pgd_newpage(*pgd)) {
+ if (pgd_needsync(*pgd)) {
ret = ops.unmap(ops.mm_idp, addr,
next - addr);
pgd_mkuptodate(*pgd);
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 97c8df9c4401..cdaee3e94273 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -201,7 +201,6 @@ void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
struct uml_pt_regs *regs)
{
- jmp_buf *catcher;
int si_code;
int err;
int is_write = FAULT_WRITE(fi);
@@ -246,15 +245,8 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
address = 0;
}
- catcher = current->thread.fault_catcher;
if (!err)
goto out;
- else if (catcher != NULL) {
- current->thread.fault_addr = (void *) address;
- UML_LONGJMP(catcher, 1);
- }
- else if (current->thread.fault_addr != NULL)
- panic("fault_addr set but no fault catcher");
else if (!is_user && arch_fixup(ip, regs))
goto out;
@@ -310,14 +302,6 @@ void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs)
}
}
-void bus_handler(int sig, struct siginfo *si, struct uml_pt_regs *regs)
-{
- if (current->thread.fault_catcher != NULL)
- UML_LONGJMP(current->thread.fault_catcher, 1);
- else
- relay_signal(sig, si, regs);
-}
-
void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
{
do_IRQ(WINCH_IRQ, regs);
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index e8e8b54b3037..8037a967225d 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -65,9 +65,6 @@ struct cpuinfo_um boot_cpu_data = {
EXPORT_SYMBOL(boot_cpu_data);
-union thread_union cpu0_irqstack
- __section(".data..init_irqstack") =
- { .thread_info = INIT_THREAD_INFO(init_task) };
/* Changed in setup_arch, which is called in early boot */
static char host_info[(__NEW_UTS_LEN + 1) * 5];
@@ -131,7 +128,7 @@ static int have_root __initdata;
static int have_console __initdata;
/* Set in uml_mem_setup and modified in linux_main */
-long long physmem_size = 64 * 1024 * 1024;
+unsigned long long physmem_size = 64 * 1024 * 1024;
EXPORT_SYMBOL(physmem_size);
static const char *usage_string =
@@ -167,19 +164,6 @@ __uml_setup("root=", uml_root_setup,
" root=/dev/ubd5\n\n"
);
-static int __init no_skas_debug_setup(char *line, int *add)
-{
- os_warn("'debug' is not necessary to gdb UML in skas mode - run\n");
- os_warn("'gdb linux'\n");
-
- return 0;
-}
-
-__uml_setup("debug", no_skas_debug_setup,
-"debug\n"
-" this flag is not needed to run gdb on UML in skas mode\n\n"
-);
-
static int __init uml_console_setup(char *line, int *add)
{
have_console = 1;
@@ -257,6 +241,8 @@ static struct notifier_block panic_exit_notifier = {
void uml_finishsetup(void)
{
+ cpu_tasks[0] = &init_task;
+
atomic_notifier_chain_register(&panic_notifier_list,
&panic_exit_notifier);
@@ -302,7 +288,24 @@ static void parse_cache_line(char *line)
}
}
-int __init linux_main(int argc, char **argv)
+static unsigned long get_top_address(char **envp)
+{
+ unsigned long top_addr = (unsigned long) &top_addr;
+ int i;
+
+ /* The earliest variable should be after the program name in ELF */
+ for (i = 0; envp[i]; i++) {
+ if ((unsigned long) envp[i] > top_addr)
+ top_addr = (unsigned long) envp[i];
+ }
+
+ top_addr &= ~(UM_KERN_PAGE_SIZE - 1);
+ top_addr += UM_KERN_PAGE_SIZE;
+
+ return top_addr;
+}
+
+int __init linux_main(int argc, char **argv, char **envp)
{
unsigned long avail, diff;
unsigned long virtmem_size, max_physmem;
@@ -324,20 +327,23 @@ int __init linux_main(int argc, char **argv)
if (have_console == 0)
add_arg(DEFAULT_COMMAND_LINE_CONSOLE);
- host_task_size = os_get_top_address();
- /* reserve a few pages for the stubs (taking care of data alignment) */
- /* align the data portion */
- BUILD_BUG_ON(!is_power_of_2(STUB_DATA_PAGES));
- stub_start = (host_task_size - 1) & ~(STUB_DATA_PAGES * PAGE_SIZE - 1);
+ host_task_size = get_top_address(envp);
+ /* reserve a few pages for the stubs */
+ stub_start = host_task_size - STUB_DATA_PAGES * PAGE_SIZE;
/* another page for the code portion */
stub_start -= PAGE_SIZE;
host_task_size = stub_start;
+ /* Limit TASK_SIZE to what is addressable by the page table */
+ task_size = host_task_size;
+ if (task_size > (unsigned long long) PTRS_PER_PGD * PGDIR_SIZE)
+ task_size = PTRS_PER_PGD * PGDIR_SIZE;
+
/*
* TASK_SIZE needs to be PGDIR_SIZE aligned or else exit_mmap craps
* out
*/
- task_size = host_task_size & PGDIR_MASK;
+ task_size = task_size & PGDIR_MASK;
/* OS sanity checks that need to happen before the kernel runs */
os_early_checks();
@@ -366,18 +372,15 @@ int __init linux_main(int argc, char **argv)
setup_machinename(init_utsname()->machine);
- highmem = 0;
+ physmem_size = (physmem_size + PAGE_SIZE - 1) & PAGE_MASK;
iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK;
+
max_physmem = TASK_SIZE - uml_physmem - iomem_size - MIN_VMALLOC;
- /*
- * Zones have to begin on a 1 << MAX_PAGE_ORDER page boundary,
- * so this makes sure that's true for highmem
- */
- max_physmem &= ~((1 << (PAGE_SHIFT + MAX_PAGE_ORDER)) - 1);
if (physmem_size + iomem_size > max_physmem) {
- highmem = physmem_size + iomem_size - max_physmem;
- physmem_size -= highmem;
+ physmem_size = max_physmem - iomem_size;
+ os_info("Physical memory size shrunk to %llu bytes\n",
+ physmem_size);
}
high_physmem = uml_physmem + physmem_size;
@@ -398,6 +401,8 @@ int __init linux_main(int argc, char **argv)
os_info("Kernel virtual memory size shrunk to %lu bytes\n",
virtmem_size);
+ arch_task_struct_size = sizeof(struct task_struct) + host_fp_size;
+
os_flush_stdout();
return start_uml();
@@ -412,9 +417,9 @@ void __init setup_arch(char **cmdline_p)
{
u8 rng_seed[32];
- stack_protections((unsigned long) &init_thread_info);
- setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem);
- mem_total_pages(physmem_size, iomem_size, highmem);
+ stack_protections((unsigned long) init_task.stack);
+ setup_physmem(uml_physmem, uml_reserved, physmem_size);
+ mem_total_pages(physmem_size, iomem_size);
uml_dtb_init();
read_initrd();
diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S
index 5c92d58a78e8..a409d4b66114 100644
--- a/arch/um/kernel/uml.lds.S
+++ b/arch/um/kernel/uml.lds.S
@@ -77,8 +77,6 @@ SECTIONS
.data :
{
INIT_TASK_DATA(KERNEL_STACK_SIZE)
- . = ALIGN(KERNEL_STACK_SIZE);
- *(.data..init_irqstack)
DATA_DATA
*(.gnu.linkonce.d*)
CONSTRUCTORS
diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile
index 544e0b344c75..049dfa5bc9c6 100644
--- a/arch/um/os-Linux/Makefile
+++ b/arch/um/os-Linux/Makefile
@@ -12,6 +12,8 @@ obj-y = execvp.o file.o helper.o irq.o main.o mem.o process.o \
CFLAGS_signal.o += -Wframe-larger-than=4096
+CFLAGS_main.o += -Wno-frame-larger-than
+
obj-$(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) += elf_aux.o
USER_OBJS := $(user-objs-y) elf_aux.o execvp.o file.o helper.o irq.o \
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index f1d03cf3957f..a0d01c68ce3e 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -255,12 +255,6 @@ void os_close_file(int fd)
{
close(fd);
}
-int os_fsync_file(int fd)
-{
- if (fsync(fd) < 0)
- return -errno;
- return 0;
-}
int os_seek_file(int fd, unsigned long long offset)
{
diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c
index f98ff79cdbf7..0afcdeb8995b 100644
--- a/arch/um/os-Linux/main.c
+++ b/arch/um/os-Linux/main.c
@@ -11,6 +11,7 @@
#include <signal.h>
#include <string.h>
#include <sys/resource.h>
+#include <sys/personality.h>
#include <as-layout.h>
#include <init.h>
#include <kern_util.h>
@@ -108,6 +109,21 @@ int __init main(int argc, char **argv, char **envp)
char **new_argv;
int ret, i, err;
+ /* Disable randomization and re-exec if it was changed successfully */
+ ret = personality(PER_LINUX | ADDR_NO_RANDOMIZE);
+ if (ret >= 0 && (ret & (PER_LINUX | ADDR_NO_RANDOMIZE)) !=
+ (PER_LINUX | ADDR_NO_RANDOMIZE)) {
+ char buf[4096] = {};
+ ssize_t ret;
+
+ ret = readlink("/proc/self/exe", buf, sizeof(buf));
+ if (ret < 0 || ret >= sizeof(buf)) {
+ perror("readlink failure");
+ exit(1);
+ }
+ execve(buf, argv, envp);
+ }
+
set_stklim();
setup_env_path();
@@ -140,7 +156,7 @@ int __init main(int argc, char **argv, char **envp)
#endif
change_sig(SIGPIPE, 0);
- ret = linux_main(argc, argv);
+ ret = linux_main(argc, argv, envp);
/*
* Disable SIGPROF - I have no idea why libc doesn't do this or turn
@@ -182,6 +198,7 @@ int __init main(int argc, char **argv, char **envp)
}
extern void *__real_malloc(int);
+extern void __real_free(void *);
/* workaround for -Wmissing-prototypes warnings */
void *__wrap_malloc(int size);
@@ -219,10 +236,6 @@ void *__wrap_calloc(int n, int size)
return ptr;
}
-extern void __real_free(void *);
-
-extern unsigned long high_physmem;
-
void __wrap_free(void *ptr)
{
unsigned long addr = (unsigned long) ptr;
diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c
index cf44d386f23c..72f302f4d197 100644
--- a/arch/um/os-Linux/mem.c
+++ b/arch/um/os-Linux/mem.c
@@ -39,10 +39,22 @@ void kasan_map_memory(void *start, size_t len)
strerror(errno));
exit(1);
}
+
+ if (madvise(start, len, MADV_DONTDUMP)) {
+ os_info("Couldn't set MAD_DONTDUMP on shadow memory: %s\n.",
+ strerror(errno));
+ exit(1);
+ }
+
+ if (madvise(start, len, MADV_DONTFORK)) {
+ os_info("Couldn't set MADV_DONTFORK on shadow memory: %s\n.",
+ strerror(errno));
+ exit(1);
+ }
}
/* Set by make_tempfile() during early boot. */
-static char *tempdir = NULL;
+char *tempdir = NULL;
/* Check if dir is on tmpfs. Return 0 if yes, -1 if no or error. */
static int __init check_tmpfs(const char *dir)
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index e52dd37ddadc..9f086f939420 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -12,94 +12,18 @@
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/ptrace.h>
+#include <sys/prctl.h>
#include <sys/wait.h>
#include <asm/unistd.h>
#include <init.h>
#include <longjmp.h>
#include <os.h>
-#define ARBITRARY_ADDR -1
-#define FAILURE_PID -1
-
-#define STAT_PATH_LEN sizeof("/proc/#######/stat\0")
-#define COMM_SCANF "%*[^)])"
-
-unsigned long os_process_pc(int pid)
-{
- char proc_stat[STAT_PATH_LEN], buf[256];
- unsigned long pc = ARBITRARY_ADDR;
- int fd, err;
-
- sprintf(proc_stat, "/proc/%d/stat", pid);
- fd = open(proc_stat, O_RDONLY, 0);
- if (fd < 0) {
- printk(UM_KERN_ERR "os_process_pc - couldn't open '%s', "
- "errno = %d\n", proc_stat, errno);
- goto out;
- }
- CATCH_EINTR(err = read(fd, buf, sizeof(buf)));
- if (err < 0) {
- printk(UM_KERN_ERR "os_process_pc - couldn't read '%s', "
- "err = %d\n", proc_stat, errno);
- goto out_close;
- }
- os_close_file(fd);
- pc = ARBITRARY_ADDR;
- if (sscanf(buf, "%*d " COMM_SCANF " %*c %*d %*d %*d %*d %*d %*d %*d "
- "%*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d "
- "%*d %*d %*d %*d %*d %lu", &pc) != 1)
- printk(UM_KERN_ERR "os_process_pc - couldn't find pc in '%s'\n",
- buf);
- out_close:
- close(fd);
- out:
- return pc;
-}
-
-int os_process_parent(int pid)
-{
- char stat[STAT_PATH_LEN];
- char data[256];
- int parent = FAILURE_PID, n, fd;
-
- if (pid == -1)
- return parent;
-
- snprintf(stat, sizeof(stat), "/proc/%d/stat", pid);
- fd = open(stat, O_RDONLY, 0);
- if (fd < 0) {
- printk(UM_KERN_ERR "Couldn't open '%s', errno = %d\n", stat,
- errno);
- return parent;
- }
-
- CATCH_EINTR(n = read(fd, data, sizeof(data)));
- close(fd);
-
- if (n < 0) {
- printk(UM_KERN_ERR "Couldn't read '%s', errno = %d\n", stat,
- errno);
- return parent;
- }
-
- parent = FAILURE_PID;
- n = sscanf(data, "%*d " COMM_SCANF " %*c %d", &parent);
- if (n != 1)
- printk(UM_KERN_ERR "Failed to scan '%s'\n", data);
-
- return parent;
-}
-
void os_alarm_process(int pid)
{
kill(pid, SIGALRM);
}
-void os_stop_process(int pid)
-{
- kill(pid, SIGSTOP);
-}
-
void os_kill_process(int pid, int reap_child)
{
kill(pid, SIGKILL);
@@ -130,11 +54,6 @@ int os_getpid(void)
return syscall(__NR_getpid);
}
-int os_getpgrp(void)
-{
- return getpgrp();
-}
-
int os_map_memory(void *virt, int fd, unsigned long long off, unsigned long len,
int r, int w, int x)
{
@@ -285,3 +204,8 @@ void init_new_thread_signals(void)
set_handler(SIGIO);
signal(SIGWINCH, SIG_IGN);
}
+
+void os_set_pdeathsig(void)
+{
+ prctl(PR_SET_PDEATHSIG, SIGKILL);
+}
diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c
index bd80b921add0..d7ca148807b2 100644
--- a/arch/um/os-Linux/registers.c
+++ b/arch/um/os-Linux/registers.c
@@ -10,11 +10,12 @@
#include <sysdep/ptrace.h>
#include <sysdep/ptrace_user.h>
#include <registers.h>
+#include <stdlib.h>
/* This is set once at boot time and not changed thereafter */
static unsigned long exec_regs[MAX_REG_NR];
-static unsigned long exec_fp_regs[FP_SIZE];
+static unsigned long *exec_fp_regs;
int init_pid_registers(int pid)
{
@@ -24,7 +25,11 @@ int init_pid_registers(int pid)
if (err < 0)
return -errno;
- arch_init_registers(pid);
+ err = arch_init_registers(pid);
+ if (err < 0)
+ return err;
+
+ exec_fp_regs = malloc(host_fp_size);
get_fp_registers(pid, exec_fp_regs);
return 0;
}
@@ -34,5 +39,5 @@ void get_safe_registers(unsigned long *regs, unsigned long *fp_regs)
memcpy(regs, exec_regs, sizeof(exec_regs));
if (fp_regs)
- memcpy(fp_regs, exec_fp_regs, sizeof(exec_fp_regs));
+ memcpy(fp_regs, exec_fp_regs, host_fp_size);
}
diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c
index 9e71794839e8..9aac8def4d63 100644
--- a/arch/um/os-Linux/sigio.c
+++ b/arch/um/os-Linux/sigio.c
@@ -55,6 +55,7 @@ static int write_sigio_thread(void *unused)
int i, n, respond_fd;
char c;
+ os_set_pdeathsig();
os_fix_helper_signals();
fds = &current_poll;
while (1) {
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index b11ed66c8bb0..9ea7269ffb77 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -26,7 +26,7 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = {
[SIGFPE] = relay_signal,
[SIGILL] = relay_signal,
[SIGWINCH] = winch,
- [SIGBUS] = bus_handler,
+ [SIGBUS] = relay_signal,
[SIGSEGV] = segv_handler,
[SIGIO] = sigio_handler,
};
@@ -65,7 +65,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
#define SIGALRM_MASK (1 << SIGALRM_BIT)
int signals_enabled;
-#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
+#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT)
static int signals_blocked, signals_blocked_pending;
#endif
static unsigned int signals_pending;
@@ -75,7 +75,7 @@ static void sig_handler(int sig, struct siginfo *si, mcontext_t *mc)
{
int enabled = signals_enabled;
-#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
+#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT)
if ((signals_blocked ||
__atomic_load_n(&signals_blocked_pending, __ATOMIC_SEQ_CST)) &&
(sig == SIGIO)) {
@@ -190,43 +190,8 @@ static void hard_handler(int sig, siginfo_t *si, void *p)
{
ucontext_t *uc = p;
mcontext_t *mc = &uc->uc_mcontext;
- unsigned long pending = 1UL << sig;
- do {
- int nested, bail;
-
- /*
- * pending comes back with one bit set for each
- * interrupt that arrived while setting up the stack,
- * plus a bit for this interrupt, plus the zero bit is
- * set if this is a nested interrupt.
- * If bail is true, then we interrupted another
- * handler setting up the stack. In this case, we
- * have to return, and the upper handler will deal
- * with this interrupt.
- */
- bail = to_irq_stack(&pending);
- if (bail)
- return;
-
- nested = pending & 1;
- pending &= ~1;
-
- while ((sig = ffs(pending)) != 0){
- sig--;
- pending &= ~(1 << sig);
- (*handlers[sig])(sig, (struct siginfo *)si, mc);
- }
-
- /*
- * Again, pending comes back with a mask of signals
- * that arrived while tearing down the stack. If this
- * is non-zero, we just go back, set up the stack
- * again, and handle the new interrupts.
- */
- if (!nested)
- pending = from_irq_stack(nested);
- } while (pending);
+ (*handlers[sig])(sig, (struct siginfo *)si, mc);
}
void set_handler(int sig)
@@ -297,7 +262,7 @@ void unblock_signals(void)
return;
signals_enabled = 1;
-#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
+#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT)
deliver_time_travel_irqs();
#endif
@@ -389,7 +354,7 @@ int um_set_signals_trace(int enable)
return ret;
}
-#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
+#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT)
void mark_sigio_pending(void)
{
/*
@@ -487,11 +452,3 @@ void unblock_signals_hard(void)
unblocking = false;
}
#endif
-
-int os_is_signal_stack(void)
-{
- stack_t ss;
- sigaltstack(NULL, &ss);
-
- return ss.ss_flags & SS_ONSTACK;
-}
diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c
index 9a13ac23c606..d7f1814b0e5a 100644
--- a/arch/um/os-Linux/skas/mem.c
+++ b/arch/um/os-Linux/skas/mem.c
@@ -217,24 +217,3 @@ int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len)
return 0;
}
-
-int protect(struct mm_id *mm_idp, unsigned long addr, unsigned long len,
- unsigned int prot)
-{
- struct stub_syscall *sc;
-
- /* Compress with previous syscall if that is possible */
- sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MPROTECT, addr);
- if (sc && sc->mem.prot == prot) {
- sc->mem.length += len;
- return 0;
- }
-
- sc = syscall_stub_alloc(mm_idp);
- sc->syscall = STUB_SYSCALL_MPROTECT;
- sc->mem.addr = addr;
- sc->mem.length = len;
- sc->mem.prot = prot;
-
- return 0;
-}
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index b6f656bcffb1..f683cfc9e51a 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -10,8 +10,11 @@
#include <sched.h>
#include <errno.h>
#include <string.h>
+#include <fcntl.h>
+#include <mem_user.h>
#include <sys/mman.h>
#include <sys/wait.h>
+#include <sys/stat.h>
#include <asm/unistd.h>
#include <as-layout.h>
#include <init.h>
@@ -141,16 +144,10 @@ bad_wait:
extern unsigned long current_stub_stack(void);
-static void get_skas_faultinfo(int pid, struct faultinfo *fi, unsigned long *aux_fp_regs)
+static void get_skas_faultinfo(int pid, struct faultinfo *fi)
{
int err;
- err = get_fp_registers(pid, aux_fp_regs);
- if (err < 0) {
- printk(UM_KERN_ERR "save_fp_registers returned %d\n",
- err);
- fatal_sigsegv();
- }
err = ptrace(PTRACE_CONT, pid, 0, SIGSEGV);
if (err) {
printk(UM_KERN_ERR "Failed to continue stub, pid = %d, "
@@ -164,18 +161,11 @@ static void get_skas_faultinfo(int pid, struct faultinfo *fi, unsigned long *aux
* the stub stack page. We just have to copy it.
*/
memcpy(fi, (void *)current_stub_stack(), sizeof(*fi));
-
- err = put_fp_registers(pid, aux_fp_regs);
- if (err < 0) {
- printk(UM_KERN_ERR "put_fp_registers returned %d\n",
- err);
- fatal_sigsegv();
- }
}
-static void handle_segv(int pid, struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
+static void handle_segv(int pid, struct uml_pt_regs *regs)
{
- get_skas_faultinfo(pid, &regs->faultinfo, aux_fp_regs);
+ get_skas_faultinfo(pid, &regs->faultinfo);
segv(regs->faultinfo, 0, 1, NULL);
}
@@ -189,69 +179,131 @@ static void handle_trap(int pid, struct uml_pt_regs *regs)
extern char __syscall_stub_start[];
-/**
- * userspace_tramp() - userspace trampoline
- * @stack: pointer to the new userspace stack page
- *
- * The userspace trampoline is used to setup a new userspace process in start_userspace() after it was clone()'ed.
- * This function will run on a temporary stack page.
- * It ptrace()'es itself, then
- * Two pages are mapped into the userspace address space:
- * - STUB_CODE (with EXEC), which contains the skas stub code
- * - STUB_DATA (with R/W), which contains a data page that is used to transfer certain data between the UML userspace process and the UML kernel.
- * Also for the userspace process a SIGSEGV handler is installed to catch pagefaults in the userspace process.
- * And last the process stops itself to give control to the UML kernel for this userspace process.
- *
- * Return: Always zero, otherwise the current userspace process is ended with non null exit() call
- */
+static int stub_exe_fd;
+
static int userspace_tramp(void *stack)
{
- struct sigaction sa;
- void *addr;
- int fd;
+ char *const argv[] = { "uml-userspace", NULL };
+ int pipe_fds[2];
unsigned long long offset;
- unsigned long segv_handler = STUB_CODE +
- (unsigned long) stub_segv_handler -
- (unsigned long) __syscall_stub_start;
-
- ptrace(PTRACE_TRACEME, 0, 0, 0);
-
- signal(SIGTERM, SIG_DFL);
- signal(SIGWINCH, SIG_IGN);
-
- fd = phys_mapping(uml_to_phys(__syscall_stub_start), &offset);
- addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE,
- PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset);
- if (addr == MAP_FAILED) {
- os_info("mapping mmap stub at 0x%lx failed, errno = %d\n",
- STUB_CODE, errno);
- exit(1);
+ struct stub_init_data init_data = {
+ .stub_start = STUB_START,
+ .segv_handler = STUB_CODE +
+ (unsigned long) stub_segv_handler -
+ (unsigned long) __syscall_stub_start,
+ };
+ struct iomem_region *iomem;
+ int ret;
+
+ init_data.stub_code_fd = phys_mapping(uml_to_phys(__syscall_stub_start),
+ &offset);
+ init_data.stub_code_offset = MMAP_OFFSET(offset);
+
+ init_data.stub_data_fd = phys_mapping(uml_to_phys(stack), &offset);
+ init_data.stub_data_offset = MMAP_OFFSET(offset);
+
+ /* Set CLOEXEC on all FDs and then unset on all memory related FDs */
+ close_range(0, ~0U, CLOSE_RANGE_CLOEXEC);
+
+ fcntl(init_data.stub_data_fd, F_SETFD, 0);
+ for (iomem = iomem_regions; iomem; iomem = iomem->next)
+ fcntl(iomem->fd, F_SETFD, 0);
+
+ /* Create a pipe for init_data (no CLOEXEC) and dup2 to STDIN */
+ if (pipe(pipe_fds))
+ exit(2);
+
+ if (dup2(pipe_fds[0], 0) < 0)
+ exit(3);
+ close(pipe_fds[0]);
+
+ /* Write init_data and close write side */
+ ret = write(pipe_fds[1], &init_data, sizeof(init_data));
+ close(pipe_fds[1]);
+
+ if (ret != sizeof(init_data))
+ exit(4);
+
+ execveat(stub_exe_fd, "", argv, NULL, AT_EMPTY_PATH);
+
+ exit(5);
+}
+
+extern char stub_exe_start[];
+extern char stub_exe_end[];
+
+extern char *tempdir;
+
+#define STUB_EXE_NAME_TEMPLATE "/uml-userspace-XXXXXX"
+
+#ifndef MFD_EXEC
+#define MFD_EXEC 0x0010U
+#endif
+
+static int __init init_stub_exe_fd(void)
+{
+ size_t written = 0;
+ char *tmpfile = NULL;
+
+ stub_exe_fd = memfd_create("uml-userspace",
+ MFD_EXEC | MFD_CLOEXEC | MFD_ALLOW_SEALING);
+
+ if (stub_exe_fd < 0) {
+ printk(UM_KERN_INFO "Could not create executable memfd, using temporary file!");
+
+ tmpfile = malloc(strlen(tempdir) +
+ strlen(STUB_EXE_NAME_TEMPLATE) + 1);
+ if (tmpfile == NULL)
+ panic("Failed to allocate memory for stub binary name");
+
+ strcpy(tmpfile, tempdir);
+ strcat(tmpfile, STUB_EXE_NAME_TEMPLATE);
+
+ stub_exe_fd = mkstemp(tmpfile);
+ if (stub_exe_fd < 0)
+ panic("Could not create temporary file for stub binary: %d",
+ -errno);
}
- fd = phys_mapping(uml_to_phys(stack), &offset);
- addr = mmap((void *) STUB_DATA,
- STUB_DATA_PAGES * UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_SHARED, fd, offset);
- if (addr == MAP_FAILED) {
- os_info("mapping segfault stack at 0x%lx failed, errno = %d\n",
- STUB_DATA, errno);
- exit(1);
+ while (written < stub_exe_end - stub_exe_start) {
+ ssize_t res = write(stub_exe_fd, stub_exe_start + written,
+ stub_exe_end - stub_exe_start - written);
+ if (res < 0) {
+ if (errno == EINTR)
+ continue;
+
+ if (tmpfile)
+ unlink(tmpfile);
+ panic("Failed write stub binary: %d", -errno);
+ }
+
+ written += res;
}
- set_sigstack((void *) STUB_DATA, STUB_DATA_PAGES * UM_KERN_PAGE_SIZE);
- sigemptyset(&sa.sa_mask);
- sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO;
- sa.sa_sigaction = (void *) segv_handler;
- sa.sa_restorer = NULL;
- if (sigaction(SIGSEGV, &sa, NULL) < 0) {
- os_info("%s - setting SIGSEGV handler failed - errno = %d\n",
- __func__, errno);
- exit(1);
+ if (!tmpfile) {
+ fcntl(stub_exe_fd, F_ADD_SEALS,
+ F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_SEAL);
+ } else {
+ if (fchmod(stub_exe_fd, 00500) < 0) {
+ unlink(tmpfile);
+ panic("Could not make stub binary executable: %d",
+ -errno);
+ }
+
+ close(stub_exe_fd);
+ stub_exe_fd = open(tmpfile, O_RDONLY | O_CLOEXEC | O_NOFOLLOW);
+ if (stub_exe_fd < 0) {
+ unlink(tmpfile);
+ panic("Could not reopen stub binary: %d", -errno);
+ }
+
+ unlink(tmpfile);
+ free(tmpfile);
}
- kill(os_getpid(), SIGSTOP);
return 0;
}
+__initcall(init_stub_exe_fd);
int userspace_pid[NR_CPUS];
@@ -270,7 +322,7 @@ int start_userspace(unsigned long stub_stack)
{
void *stack;
unsigned long sp;
- int pid, status, n, flags, err;
+ int pid, status, n, err;
/* setup a temporary stack page */
stack = mmap(NULL, UM_KERN_PAGE_SIZE,
@@ -286,10 +338,10 @@ int start_userspace(unsigned long stub_stack)
/* set stack pointer to the end of the stack page, so it can grow downwards */
sp = (unsigned long)stack + UM_KERN_PAGE_SIZE;
- flags = CLONE_FILES | SIGCHLD;
-
/* clone into new userspace process */
- pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack);
+ pid = clone(userspace_tramp, (void *) sp,
+ CLONE_VFORK | CLONE_VM | SIGCHLD,
+ (void *)stub_stack);
if (pid < 0) {
err = -errno;
printk(UM_KERN_ERR "%s : clone failed, errno = %d\n",
@@ -336,7 +388,10 @@ int start_userspace(unsigned long stub_stack)
return err;
}
-void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
+int unscheduled_userspace_iterations;
+extern unsigned long tt_extra_sched_jiffies;
+
+void userspace(struct uml_pt_regs *regs)
{
int err, status, op, pid = userspace_pid[0];
siginfo_t si;
@@ -345,6 +400,29 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
interrupt_end();
while (1) {
+ /*
+ * When we are in time-travel mode, userspace can theoretically
+ * do a *lot* of work without being scheduled. The problem with
+ * this is that it will prevent kernel bookkeeping (primarily
+ * the RCU) from running and this can for example cause OOM
+ * situations.
+ *
+ * This code accounts a jiffie against the scheduling clock
+ * after the defined userspace iterations in the same thread.
+ * By doing so the situation is effectively prevented.
+ */
+ if (time_travel_mode == TT_MODE_INFCPU ||
+ time_travel_mode == TT_MODE_EXTERNAL) {
+#ifdef CONFIG_UML_MAX_USERSPACE_ITERATIONS
+ if (CONFIG_UML_MAX_USERSPACE_ITERATIONS &&
+ unscheduled_userspace_iterations++ >
+ CONFIG_UML_MAX_USERSPACE_ITERATIONS) {
+ tt_extra_sched_jiffies += 1;
+ unscheduled_userspace_iterations = 0;
+ }
+#endif
+ }
+
time_travel_print_bc_msg();
current_mm_sync();
@@ -435,11 +513,11 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
case SIGSEGV:
if (PTRACE_FULL_FAULTINFO) {
get_skas_faultinfo(pid,
- &regs->faultinfo, aux_fp_regs);
+ &regs->faultinfo);
(*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si,
regs);
}
- else handle_segv(pid, regs, aux_fp_regs);
+ else handle_segv(pid, regs);
break;
case SIGTRAP + 0x80:
handle_trap(pid, regs);
@@ -487,6 +565,8 @@ void new_thread(void *stack, jmp_buf *buf, void (*handler)(void))
void switch_threads(jmp_buf *me, jmp_buf *you)
{
+ unscheduled_userspace_iterations = 0;
+
if (UML_SETJMP(me) == 0)
UML_LONGJMP(you, 1);
}
@@ -570,6 +650,7 @@ static bool noreboot;
static int __init noreboot_cmd_param(char *str, int *add)
{
+ *add = 0;
noreboot = true;
return 0;
}
diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c
index e09d65b05d1c..eb523ab1e218 100644
--- a/arch/um/os-Linux/umid.c
+++ b/arch/um/os-Linux/umid.c
@@ -358,6 +358,8 @@ char *get_umid(void)
static int __init set_uml_dir(char *name, int *add)
{
+ *add = 0;
+
if (*name == '\0') {
os_warn("uml_dir can't be an empty string\n");
return 0;
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index 1dca4ffbd572..4193e04d7e4a 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -52,8 +52,8 @@ void setup_machinename(char *machine_out)
struct utsname host;
uname(&host);
-#ifdef UML_CONFIG_UML_X86
-# ifndef UML_CONFIG_64BIT
+#if IS_ENABLED(CONFIG_UML_X86)
+# if !IS_ENABLED(CONFIG_64BIT)
if (!strcmp(host.machine, "x86_64")) {
strcpy(machine_out, "i686");
return;
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 186f13268401..986045d5e638 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -10,6 +10,7 @@ config UML_X86
def_bool y
select ARCH_BINFMT_ELF_EXTRA_PHDRS if X86_32
select DCACHE_WORD_ACCESS
+ select HAVE_EFFICIENT_UNALIGNED_ACCESS
config 64BIT
bool "64-bit kernel" if "$(SUBARCH)" = "x86"
@@ -28,17 +29,6 @@ config X86_64
def_bool 64BIT
select MODULES_USE_ELF_RELA
-config 3_LEVEL_PGTABLES
- bool "Three-level pagetables" if !64BIT
- default 64BIT
- help
- Three-level pagetables will let UML have more than 4G of physical
- memory. All the memory that can't be mapped directly will be treated
- as high memory.
-
- However, this it experimental on 32-bit architectures, so if unsure say
- N (on x86-64 it's automatically enabled, instead, as it's safe there).
-
config ARCH_HAS_SC_SIGNALS
def_bool !64BIT
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
index 36e67fc97c22..b42c31cd2390 100644
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -10,7 +10,7 @@ else
endif
obj-y = bugs_$(BITS).o delay.o fault.o \
- ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \
+ ptrace.o ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \
stub_segv.o \
sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \
mem_$(BITS).o subarch.o os-Linux/
diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h
index 6052200fe925..62ed5d68a978 100644
--- a/arch/x86/um/asm/elf.h
+++ b/arch/x86/um/asm/elf.h
@@ -8,6 +8,8 @@
#include <asm/user.h>
#include <skas.h>
+#define CORE_DUMP_USE_REGSET
+
#ifdef CONFIG_X86_32
#define R_386_NONE 0
diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h
index 2fef3da55533..2641d28d115c 100644
--- a/arch/x86/um/asm/ptrace.h
+++ b/arch/x86/um/asm/ptrace.h
@@ -2,6 +2,16 @@
#ifndef __UM_X86_PTRACE_H
#define __UM_X86_PTRACE_H
+/* This is here because signal.c needs the REGSET_FP_LEGACY definition */
+enum {
+ REGSET_GENERAL,
+#ifdef CONFIG_X86_32
+ REGSET_FP_LEGACY,
+#endif
+ REGSET_FP,
+ REGSET_XSTATE,
+};
+
#include <linux/compiler.h>
#ifndef CONFIG_X86_32
#define __FRAME_OFFSETS /* Needed to get the R* macros */
diff --git a/arch/x86/um/os-Linux/Makefile b/arch/x86/um/os-Linux/Makefile
index 5249bbc30dcd..77a308aaa5ec 100644
--- a/arch/x86/um/os-Linux/Makefile
+++ b/arch/x86/um/os-Linux/Makefile
@@ -3,7 +3,7 @@
# Licensed under the GPL
#
-obj-y = registers.o task_size.o mcontext.o
+obj-y = registers.o mcontext.o
obj-$(CONFIG_X86_32) += tls.o
diff --git a/arch/x86/um/os-Linux/registers.c b/arch/x86/um/os-Linux/registers.c
index f3638dd09cec..76eaeb93928c 100644
--- a/arch/x86/um/os-Linux/registers.c
+++ b/arch/x86/um/os-Linux/registers.c
@@ -16,133 +16,58 @@
#include <asm/sigcontext.h>
#include <linux/elf.h>
#include <registers.h>
+#include <sys/mman.h>
-static int have_xstate_support;
+unsigned long host_fp_size;
-int save_i387_registers(int pid, unsigned long *fp_regs)
-{
- if (ptrace(PTRACE_GETFPREGS, pid, 0, fp_regs) < 0)
- return -errno;
- return 0;
-}
-
-int save_fp_registers(int pid, unsigned long *fp_regs)
+int get_fp_registers(int pid, unsigned long *regs)
{
-#ifdef PTRACE_GETREGSET
- struct iovec iov;
+ struct iovec iov = {
+ .iov_base = regs,
+ .iov_len = host_fp_size,
+ };
- if (have_xstate_support) {
- iov.iov_base = fp_regs;
- iov.iov_len = FP_SIZE * sizeof(unsigned long);
- if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) < 0)
- return -errno;
- return 0;
- } else
-#endif
- return save_i387_registers(pid, fp_regs);
-}
-
-int restore_i387_registers(int pid, unsigned long *fp_regs)
-{
- if (ptrace(PTRACE_SETFPREGS, pid, 0, fp_regs) < 0)
+ if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) < 0)
return -errno;
return 0;
}
-int restore_fp_registers(int pid, unsigned long *fp_regs)
-{
-#ifdef PTRACE_SETREGSET
- struct iovec iov;
- if (have_xstate_support) {
- iov.iov_base = fp_regs;
- iov.iov_len = FP_SIZE * sizeof(unsigned long);
- if (ptrace(PTRACE_SETREGSET, pid, NT_X86_XSTATE, &iov) < 0)
- return -errno;
- return 0;
- } else
-#endif
- return restore_i387_registers(pid, fp_regs);
-}
-
-#ifdef __i386__
-int have_fpx_regs = 1;
-int save_fpx_registers(int pid, unsigned long *fp_regs)
+int put_fp_registers(int pid, unsigned long *regs)
{
- if (ptrace(PTRACE_GETFPXREGS, pid, 0, fp_regs) < 0)
- return -errno;
- return 0;
-}
+ struct iovec iov = {
+ .iov_base = regs,
+ .iov_len = host_fp_size,
+ };
-int restore_fpx_registers(int pid, unsigned long *fp_regs)
-{
- if (ptrace(PTRACE_SETFPXREGS, pid, 0, fp_regs) < 0)
+ if (ptrace(PTRACE_SETREGSET, pid, NT_X86_XSTATE, &iov) < 0)
return -errno;
return 0;
}
-int get_fp_registers(int pid, unsigned long *regs)
-{
- if (have_fpx_regs)
- return save_fpx_registers(pid, regs);
- else
- return save_fp_registers(pid, regs);
-}
-
-int put_fp_registers(int pid, unsigned long *regs)
-{
- if (have_fpx_regs)
- return restore_fpx_registers(pid, regs);
- else
- return restore_fp_registers(pid, regs);
-}
-
-void arch_init_registers(int pid)
-{
- struct user_fpxregs_struct fpx_regs;
- int err;
-
- err = ptrace(PTRACE_GETFPXREGS, pid, 0, &fpx_regs);
- if (!err)
- return;
-
- if (errno != EIO)
- panic("check_ptrace : PTRACE_GETFPXREGS failed, errno = %d",
- errno);
-
- have_fpx_regs = 0;
-}
-#else
-
-int get_fp_registers(int pid, unsigned long *regs)
+int arch_init_registers(int pid)
{
- return save_fp_registers(pid, regs);
+ struct iovec iov = {
+ /* Just use plenty of space, it does not cost us anything */
+ .iov_len = 2 * 1024 * 1024,
+ };
+ int ret;
+
+ iov.iov_base = mmap(NULL, iov.iov_len, PROT_WRITE | PROT_READ,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (iov.iov_base == MAP_FAILED)
+ return -ENOMEM;
+
+ /* GDB has x86_xsave_length, which uses x86_cpuid_count */
+ ret = ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov);
+ if (ret)
+ ret = -errno;
+ munmap(iov.iov_base, 2 * 1024 * 1024);
+
+ host_fp_size = iov.iov_len;
+
+ return ret;
}
-int put_fp_registers(int pid, unsigned long *regs)
-{
- return restore_fp_registers(pid, regs);
-}
-
-void arch_init_registers(int pid)
-{
-#ifdef PTRACE_GETREGSET
- void * fp_regs;
- struct iovec iov;
-
- fp_regs = malloc(FP_SIZE * sizeof(unsigned long));
- if(fp_regs == NULL)
- return;
-
- iov.iov_base = fp_regs;
- iov.iov_len = FP_SIZE * sizeof(unsigned long);
- if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) == 0)
- have_xstate_support = 1;
-
- free(fp_regs);
-#endif
-}
-#endif
-
unsigned long get_thread_reg(int reg, jmp_buf *buf)
{
switch (reg) {
diff --git a/arch/x86/um/os-Linux/task_size.c b/arch/x86/um/os-Linux/task_size.c
deleted file mode 100644
index 1dc9adc20b1c..000000000000
--- a/arch/x86/um/os-Linux/task_size.c
+++ /dev/null
@@ -1,151 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include <stdlib.h>
-#include <signal.h>
-#include <sys/mman.h>
-#include <longjmp.h>
-
-#ifdef __i386__
-
-static jmp_buf buf;
-
-static void segfault(int sig)
-{
- longjmp(buf, 1);
-}
-
-static int page_ok(unsigned long page)
-{
- unsigned long *address = (unsigned long *) (page << UM_KERN_PAGE_SHIFT);
- unsigned long n = ~0UL;
- void *mapped = NULL;
- int ok = 0;
-
- /*
- * First see if the page is readable. If it is, it may still
- * be a VDSO, so we go on to see if it's writable. If not
- * then try mapping memory there. If that fails, then we're
- * still in the kernel area. As a sanity check, we'll fail if
- * the mmap succeeds, but gives us an address different from
- * what we wanted.
- */
- if (setjmp(buf) == 0)
- n = *address;
- else {
- mapped = mmap(address, UM_KERN_PAGE_SIZE,
- PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (mapped == MAP_FAILED)
- return 0;
- if (mapped != address)
- goto out;
- }
-
- /*
- * Now, is it writeable? If so, then we're in user address
- * space. If not, then try mprotecting it and try the write
- * again.
- */
- if (setjmp(buf) == 0) {
- *address = n;
- ok = 1;
- goto out;
- } else if (mprotect(address, UM_KERN_PAGE_SIZE,
- PROT_READ | PROT_WRITE) != 0)
- goto out;
-
- if (setjmp(buf) == 0) {
- *address = n;
- ok = 1;
- }
-
- out:
- if (mapped != NULL)
- munmap(mapped, UM_KERN_PAGE_SIZE);
- return ok;
-}
-
-unsigned long os_get_top_address(void)
-{
- struct sigaction sa, old;
- unsigned long bottom = 0;
- /*
- * A 32-bit UML on a 64-bit host gets confused about the VDSO at
- * 0xffffe000. It is mapped, is readable, can be reprotected writeable
- * and written. However, exec discovers later that it can't be
- * unmapped. So, just set the highest address to be checked to just
- * below it. This might waste some address space on 4G/4G 32-bit
- * hosts, but shouldn't hurt otherwise.
- */
- unsigned long top = 0xffffd000 >> UM_KERN_PAGE_SHIFT;
- unsigned long test, original;
-
- printf("Locating the bottom of the address space ... ");
- fflush(stdout);
-
- /*
- * We're going to be longjmping out of the signal handler, so
- * SA_DEFER needs to be set.
- */
- sa.sa_handler = segfault;
- sigemptyset(&sa.sa_mask);
- sa.sa_flags = SA_NODEFER;
- if (sigaction(SIGSEGV, &sa, &old)) {
- perror("os_get_top_address");
- exit(1);
- }
-
- /* Manually scan the address space, bottom-up, until we find
- * the first valid page (or run out of them).
- */
- for (bottom = 0; bottom < top; bottom++) {
- if (page_ok(bottom))
- break;
- }
-
- /* If we've got this far, we ran out of pages. */
- if (bottom == top) {
- fprintf(stderr, "Unable to determine bottom of address "
- "space.\n");
- exit(1);
- }
-
- printf("0x%lx\n", bottom << UM_KERN_PAGE_SHIFT);
- printf("Locating the top of the address space ... ");
- fflush(stdout);
-
- original = bottom;
-
- /* This could happen with a 4G/4G split */
- if (page_ok(top))
- goto out;
-
- do {
- test = bottom + (top - bottom) / 2;
- if (page_ok(test))
- bottom = test;
- else
- top = test;
- } while (top - bottom > 1);
-
-out:
- /* Restore the old SIGSEGV handling */
- if (sigaction(SIGSEGV, &old, NULL)) {
- perror("os_get_top_address");
- exit(1);
- }
- top <<= UM_KERN_PAGE_SHIFT;
- printf("0x%lx\n", top);
-
- return top;
-}
-
-#else
-
-unsigned long os_get_top_address(void)
-{
- /* The old value of CONFIG_TOP_ADDR */
- return 0x7fc0002000;
-}
-
-#endif
diff --git a/arch/x86/um/ptrace.c b/arch/x86/um/ptrace.c
new file mode 100644
index 000000000000..57c504fd5626
--- /dev/null
+++ b/arch/x86/um/ptrace.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/sched.h>
+#include <linux/elf.h>
+#include <linux/regset.h>
+#include <asm/user32.h>
+#include <asm/sigcontext.h>
+
+#ifdef CONFIG_X86_32
+/*
+ * FPU tag word conversions.
+ */
+
+static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
+{
+ unsigned int tmp; /* to avoid 16 bit prefixes in the code */
+
+ /* Transform each pair of bits into 01 (valid) or 00 (empty) */
+ tmp = ~twd;
+ tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
+ /* and move the valid bits to the lower byte. */
+ tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
+ tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
+ tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
+ return tmp;
+}
+
+static inline unsigned long twd_fxsr_to_i387(struct user_fxsr_struct *fxsave)
+{
+ struct _fpxreg *st = NULL;
+ unsigned long twd = (unsigned long) fxsave->twd;
+ unsigned long tag;
+ unsigned long ret = 0xffff0000;
+ int i;
+
+#define FPREG_ADDR(f, n) ((char *)&(f)->st_space + (n) * 16)
+
+ for (i = 0; i < 8; i++) {
+ if (twd & 0x1) {
+ st = (struct _fpxreg *) FPREG_ADDR(fxsave, i);
+
+ switch (st->exponent & 0x7fff) {
+ case 0x7fff:
+ tag = 2; /* Special */
+ break;
+ case 0x0000:
+ if (!st->significand[0] &&
+ !st->significand[1] &&
+ !st->significand[2] &&
+ !st->significand[3]) {
+ tag = 1; /* Zero */
+ } else {
+ tag = 2; /* Special */
+ }
+ break;
+ default:
+ if (st->significand[3] & 0x8000)
+ tag = 0; /* Valid */
+ else
+ tag = 2; /* Special */
+ break;
+ }
+ } else {
+ tag = 3; /* Empty */
+ }
+ ret |= (tag << (2 * i));
+ twd = twd >> 1;
+ }
+ return ret;
+}
+
+/* Get/set the old 32bit i387 registers (pre-FPX) */
+static int fpregs_legacy_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ struct user_fxsr_struct *fxsave = (void *)target->thread.regs.regs.fp;
+ int i;
+
+ membuf_store(&to, (unsigned long)fxsave->cwd | 0xffff0000ul);
+ membuf_store(&to, (unsigned long)fxsave->swd | 0xffff0000ul);
+ membuf_store(&to, twd_fxsr_to_i387(fxsave));
+ membuf_store(&to, fxsave->fip);
+ membuf_store(&to, fxsave->fcs | ((unsigned long)fxsave->fop << 16));
+ membuf_store(&to, fxsave->foo);
+ membuf_store(&to, fxsave->fos);
+
+ for (i = 0; i < 8; i++)
+ membuf_write(&to, (void *)fxsave->st_space + i * 16, 10);
+
+ return 0;
+}
+
+static int fpregs_legacy_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ struct user_fxsr_struct *fxsave = (void *)target->thread.regs.regs.fp;
+ const struct user_i387_struct *from;
+ struct user_i387_struct buf;
+ int i;
+
+ if (ubuf) {
+ if (copy_from_user(&buf, ubuf, sizeof(buf)))
+ return -EFAULT;
+ from = &buf;
+ } else {
+ from = kbuf;
+ }
+
+ fxsave->cwd = (unsigned short)(from->cwd & 0xffff);
+ fxsave->swd = (unsigned short)(from->swd & 0xffff);
+ fxsave->twd = twd_i387_to_fxsr((unsigned short)(from->twd & 0xffff));
+ fxsave->fip = from->fip;
+ fxsave->fop = (unsigned short)((from->fcs & 0xffff0000ul) >> 16);
+ fxsave->fcs = (from->fcs & 0xffff);
+ fxsave->foo = from->foo;
+ fxsave->fos = from->fos;
+
+ for (i = 0; i < 8; i++) {
+ memcpy((void *)fxsave->st_space + i * 16,
+ (void *)from->st_space + i * 10, 10);
+ }
+
+ return 0;
+}
+#endif
+
+static int genregs_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ int reg;
+
+ for (reg = 0; to.left; reg++)
+ membuf_store(&to, getreg(target, reg * sizeof(unsigned long)));
+ return 0;
+}
+
+static int genregs_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret = 0;
+
+ if (kbuf) {
+ const unsigned long *k = kbuf;
+
+ while (count >= sizeof(*k) && !ret) {
+ ret = putreg(target, pos, *k++);
+ count -= sizeof(*k);
+ pos += sizeof(*k);
+ }
+ } else {
+ const unsigned long __user *u = ubuf;
+
+ while (count >= sizeof(*u) && !ret) {
+ unsigned long word;
+
+ ret = __get_user(word, u++);
+ if (ret)
+ break;
+ ret = putreg(target, pos, word);
+ count -= sizeof(*u);
+ pos += sizeof(*u);
+ }
+ }
+ return ret;
+}
+
+static int generic_fpregs_active(struct task_struct *target, const struct user_regset *regset)
+{
+ return regset->n;
+}
+
+static int generic_fpregs_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ void *fpregs = task_pt_regs(target)->regs.fp;
+
+ membuf_write(&to, fpregs, regset->size * regset->n);
+ return 0;
+}
+
+static int generic_fpregs_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ void *fpregs = task_pt_regs(target)->regs.fp;
+
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ fpregs, 0, regset->size * regset->n);
+}
+
+static struct user_regset uml_regsets[] __ro_after_init = {
+ [REGSET_GENERAL] = {
+ .core_note_type = NT_PRSTATUS,
+ .n = sizeof(struct user_regs_struct) / sizeof(long),
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .regset_get = genregs_get,
+ .set = genregs_set
+ },
+#ifdef CONFIG_X86_32
+ /* Old FP registers, they are needed in signal frames */
+ [REGSET_FP_LEGACY] = {
+ .core_note_type = NT_PRFPREG,
+ .n = sizeof(struct user_i387_ia32_struct) / sizeof(long),
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .active = generic_fpregs_active,
+ .regset_get = fpregs_legacy_get,
+ .set = fpregs_legacy_set,
+ },
+#endif
+ [REGSET_FP] = {
+#ifdef CONFIG_X86_32
+ .core_note_type = NT_PRXFPREG,
+ .n = sizeof(struct user32_fxsr_struct) / sizeof(long),
+#else
+ .core_note_type = NT_PRFPREG,
+ .n = sizeof(struct user_i387_struct) / sizeof(long),
+#endif
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .active = generic_fpregs_active,
+ .regset_get = generic_fpregs_get,
+ .set = generic_fpregs_set,
+ },
+ [REGSET_XSTATE] = {
+ .core_note_type = NT_X86_XSTATE,
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .active = generic_fpregs_active,
+ .regset_get = generic_fpregs_get,
+ .set = generic_fpregs_set,
+ },
+ /* TODO: Add TLS regset for 32bit */
+};
+
+static const struct user_regset_view user_uml_view = {
+#ifdef CONFIG_X86_32
+ .name = "i386", .e_machine = EM_386,
+#else
+ .name = "x86_64", .e_machine = EM_X86_64,
+#endif
+ .regsets = uml_regsets, .n = ARRAY_SIZE(uml_regsets)
+};
+
+const struct user_regset_view *
+task_user_regset_view(struct task_struct *tsk)
+{
+ return &user_uml_view;
+}
+
+static int __init init_regset_xstate_info(void)
+{
+ uml_regsets[REGSET_XSTATE].n =
+ host_fp_size / uml_regsets[REGSET_XSTATE].size;
+
+ return 0;
+}
+arch_initcall(init_regset_xstate_info);
diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c
index b0a71c6cdc6e..3af3cb821524 100644
--- a/arch/x86/um/ptrace_32.c
+++ b/arch/x86/um/ptrace_32.c
@@ -6,6 +6,7 @@
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/uaccess.h>
+#include <linux/regset.h>
#include <asm/ptrace-abi.h>
#include <registers.h>
#include <skas.h>
@@ -168,65 +169,6 @@ int peek_user(struct task_struct *child, long addr, long data)
return put_user(tmp, (unsigned long __user *) data);
}
-static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
-{
- int err, n, cpu = task_cpu(child);
- struct user_i387_struct fpregs;
-
- err = save_i387_registers(userspace_pid[cpu],
- (unsigned long *) &fpregs);
- if (err)
- return err;
-
- n = copy_to_user(buf, &fpregs, sizeof(fpregs));
- if(n > 0)
- return -EFAULT;
-
- return n;
-}
-
-static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
-{
- int n, cpu = task_cpu(child);
- struct user_i387_struct fpregs;
-
- n = copy_from_user(&fpregs, buf, sizeof(fpregs));
- if (n > 0)
- return -EFAULT;
-
- return restore_i387_registers(userspace_pid[cpu],
- (unsigned long *) &fpregs);
-}
-
-static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
-{
- int err, n, cpu = task_cpu(child);
- struct user_fxsr_struct fpregs;
-
- err = save_fpx_registers(userspace_pid[cpu], (unsigned long *) &fpregs);
- if (err)
- return err;
-
- n = copy_to_user(buf, &fpregs, sizeof(fpregs));
- if(n > 0)
- return -EFAULT;
-
- return n;
-}
-
-static int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
-{
- int n, cpu = task_cpu(child);
- struct user_fxsr_struct fpregs;
-
- n = copy_from_user(&fpregs, buf, sizeof(fpregs));
- if (n > 0)
- return -EFAULT;
-
- return restore_fpx_registers(userspace_pid[cpu],
- (unsigned long *) &fpregs);
-}
-
long subarch_ptrace(struct task_struct *child, long request,
unsigned long addr, unsigned long data)
{
@@ -234,17 +176,25 @@ long subarch_ptrace(struct task_struct *child, long request,
void __user *datap = (void __user *) data;
switch (request) {
case PTRACE_GETFPREGS: /* Get the child FPU state. */
- ret = get_fpregs(datap, child);
- break;
+ return copy_regset_to_user(child, task_user_regset_view(child),
+ REGSET_FP_LEGACY,
+ 0, sizeof(struct user_i387_struct),
+ datap);
case PTRACE_SETFPREGS: /* Set the child FPU state. */
- ret = set_fpregs(datap, child);
- break;
+ return copy_regset_from_user(child, task_user_regset_view(child),
+ REGSET_FP_LEGACY,
+ 0, sizeof(struct user_i387_struct),
+ datap);
case PTRACE_GETFPXREGS: /* Get the child FPU state. */
- ret = get_fpxregs(datap, child);
- break;
+ return copy_regset_to_user(child, task_user_regset_view(child),
+ REGSET_FP,
+ 0, sizeof(struct user_fxsr_struct),
+ datap);
case PTRACE_SETFPXREGS: /* Set the child FPU state. */
- ret = set_fpxregs(datap, child);
- break;
+ return copy_regset_from_user(child, task_user_regset_view(child),
+ REGSET_FP,
+ 0, sizeof(struct user_fxsr_struct),
+ datap);
default:
ret = -EIO;
}
diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c
index aa68d83d3f44..e0d4120a45c8 100644
--- a/arch/x86/um/ptrace_64.c
+++ b/arch/x86/um/ptrace_64.c
@@ -8,6 +8,7 @@
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/errno.h>
+#include <linux/regset.h>
#define __FRAME_OFFSETS
#include <asm/ptrace.h>
#include <linux/uaccess.h>
@@ -188,36 +189,6 @@ int peek_user(struct task_struct *child, long addr, long data)
return put_user(tmp, (unsigned long *) data);
}
-static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
-{
- int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
- struct user_i387_struct fpregs;
-
- err = save_i387_registers(userspace_pid[cpu],
- (unsigned long *) &fpregs);
- if (err)
- return err;
-
- n = copy_to_user(buf, &fpregs, sizeof(fpregs));
- if (n > 0)
- return -EFAULT;
-
- return n;
-}
-
-static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
-{
- int n, cpu = ((struct thread_info *) child->stack)->cpu;
- struct user_i387_struct fpregs;
-
- n = copy_from_user(&fpregs, buf, sizeof(fpregs));
- if (n > 0)
- return -EFAULT;
-
- return restore_i387_registers(userspace_pid[cpu],
- (unsigned long *) &fpregs);
-}
-
long subarch_ptrace(struct task_struct *child, long request,
unsigned long addr, unsigned long data)
{
@@ -226,11 +197,15 @@ long subarch_ptrace(struct task_struct *child, long request,
switch (request) {
case PTRACE_GETFPREGS: /* Get the child FPU state. */
- ret = get_fpregs(datap, child);
- break;
+ return copy_regset_to_user(child, task_user_regset_view(child),
+ REGSET_FP,
+ 0, sizeof(struct user_i387_struct),
+ datap);
case PTRACE_SETFPREGS: /* Set the child FPU state. */
- ret = set_fpregs(datap, child);
- break;
+ return copy_regset_from_user(child, task_user_regset_view(child),
+ REGSET_FP,
+ 0, sizeof(struct user_i387_struct),
+ datap);
case PTRACE_ARCH_PRCTL:
/* XXX Calls ptrace on the host - needs some SMP thinking */
ret = arch_prctl(child, data, (void __user *) addr);
diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h
index 6ca4ecabc55b..2dd4ca6713f8 100644
--- a/arch/x86/um/shared/sysdep/ptrace.h
+++ b/arch/x86/um/shared/sysdep/ptrace.h
@@ -56,12 +56,16 @@ struct syscall_args {
UPT_SYSCALL_ARG5(r), \
UPT_SYSCALL_ARG6(r) } } )
+extern unsigned long host_fp_size;
+
struct uml_pt_regs {
unsigned long gp[MAX_REG_NR];
- unsigned long fp[MAX_FP_NR];
struct faultinfo faultinfo;
long syscall;
int is_user;
+
+ /* Dynamically sized FP registers (holds an XSTATE) */
+ unsigned long fp[];
};
#define EMPTY_UML_PT_REGS { }
@@ -72,4 +76,6 @@ struct uml_pt_regs {
extern int user_context(unsigned long sp);
+extern int arch_init_registers(int pid);
+
#endif /* __SYSDEP_X86_PTRACE_H */
diff --git a/arch/x86/um/shared/sysdep/ptrace_32.h b/arch/x86/um/shared/sysdep/ptrace_32.h
index 0c4989842fbe..2392470cac4d 100644
--- a/arch/x86/um/shared/sysdep/ptrace_32.h
+++ b/arch/x86/um/shared/sysdep/ptrace_32.h
@@ -6,8 +6,6 @@
#ifndef __SYSDEP_I386_PTRACE_H
#define __SYSDEP_I386_PTRACE_H
-#define MAX_FP_NR HOST_FPX_SIZE
-
#define UPT_SYSCALL_ARG1(r) UPT_BX(r)
#define UPT_SYSCALL_ARG2(r) UPT_CX(r)
#define UPT_SYSCALL_ARG3(r) UPT_DX(r)
@@ -15,6 +13,4 @@
#define UPT_SYSCALL_ARG5(r) UPT_DI(r)
#define UPT_SYSCALL_ARG6(r) UPT_BP(r)
-extern void arch_init_registers(int pid);
-
#endif
diff --git a/arch/x86/um/shared/sysdep/ptrace_64.h b/arch/x86/um/shared/sysdep/ptrace_64.h
index 0dc223aa1c2d..e73573ac871f 100644
--- a/arch/x86/um/shared/sysdep/ptrace_64.h
+++ b/arch/x86/um/shared/sysdep/ptrace_64.h
@@ -8,8 +8,6 @@
#ifndef __SYSDEP_X86_64_PTRACE_H
#define __SYSDEP_X86_64_PTRACE_H
-#define MAX_FP_NR HOST_FP_SIZE
-
#define REGS_R8(r) ((r)[HOST_R8])
#define REGS_R9(r) ((r)[HOST_R9])
#define REGS_R10(r) ((r)[HOST_R10])
@@ -57,6 +55,4 @@
#define UPT_SYSCALL_ARG5(r) UPT_R8(r)
#define UPT_SYSCALL_ARG6(r) UPT_R9(r)
-extern void arch_init_registers(int pid);
-
#endif
diff --git a/arch/x86/um/shared/sysdep/ptrace_user.h b/arch/x86/um/shared/sysdep/ptrace_user.h
index 1d1a824fa652..98da23120538 100644
--- a/arch/x86/um/shared/sysdep/ptrace_user.h
+++ b/arch/x86/um/shared/sysdep/ptrace_user.h
@@ -11,12 +11,6 @@
#define REGS_IP_INDEX HOST_IP
#define REGS_SP_INDEX HOST_SP
-#ifdef __i386__
-#define FP_SIZE ((HOST_FPX_SIZE > HOST_FP_SIZE) ? HOST_FPX_SIZE : HOST_FP_SIZE)
-#else
-#define FP_SIZE HOST_FP_SIZE
-#endif
-
/*
* glibc before 2.27 does not include PTRACE_SYSEMU_SINGLESTEP in its enum,
* ensure we have a definition by (re-)defining it here.
diff --git a/arch/x86/um/shared/sysdep/stub_32.h b/arch/x86/um/shared/sysdep/stub_32.h
index 0b44a86dd346..390988132c0a 100644
--- a/arch/x86/um/shared/sysdep/stub_32.h
+++ b/arch/x86/um/shared/sysdep/stub_32.h
@@ -112,11 +112,23 @@ static __always_inline void *get_stub_data(void)
unsigned long ret;
asm volatile (
- "movl %%esp,%0 ;"
- "andl %1,%0"
+ "call _here_%=;"
+ "_here_%=:"
+ "popl %0;"
+ "andl %1, %0 ;"
+ "addl %2, %0 ;"
: "=a" (ret)
- : "g" (~(STUB_DATA_PAGES * UM_KERN_PAGE_SIZE - 1)));
+ : "g" (~(UM_KERN_PAGE_SIZE - 1)),
+ "g" (UM_KERN_PAGE_SIZE));
return (void *)ret;
}
+
+#define stub_start(fn) \
+ asm volatile ( \
+ "subl %0,%%esp ;" \
+ "movl %1, %%eax ; " \
+ "call *%%eax ;" \
+ :: "i" ((1 + STUB_DATA_PAGES) * UM_KERN_PAGE_SIZE), \
+ "i" (&fn))
#endif
diff --git a/arch/x86/um/shared/sysdep/stub_64.h b/arch/x86/um/shared/sysdep/stub_64.h
index 67f44284f1aa..294affbec742 100644
--- a/arch/x86/um/shared/sysdep/stub_64.h
+++ b/arch/x86/um/shared/sysdep/stub_64.h
@@ -28,6 +28,17 @@ static __always_inline long stub_syscall0(long syscall)
return ret;
}
+static __always_inline long stub_syscall1(long syscall, long arg1)
+{
+ long ret;
+
+ __asm__ volatile (__syscall
+ : "=a" (ret)
+ : "0" (syscall), "D" (arg1) : __syscall_clobber );
+
+ return ret;
+}
+
static __always_inline long stub_syscall2(long syscall, long arg1, long arg2)
{
long ret;
@@ -106,11 +117,21 @@ static __always_inline void *get_stub_data(void)
unsigned long ret;
asm volatile (
- "movq %%rsp,%0 ;"
- "andq %1,%0"
+ "lea 0(%%rip), %0;"
+ "andq %1, %0 ;"
+ "addq %2, %0 ;"
: "=a" (ret)
- : "g" (~(STUB_DATA_PAGES * UM_KERN_PAGE_SIZE - 1)));
+ : "g" (~(UM_KERN_PAGE_SIZE - 1)),
+ "g" (UM_KERN_PAGE_SIZE));
return (void *)ret;
}
+
+#define stub_start(fn) \
+ asm volatile ( \
+ "subq %0,%%rsp ;" \
+ "movq %1,%%rax ;" \
+ "call *%%rax ;" \
+ :: "i" ((1 + STUB_DATA_PAGES) * UM_KERN_PAGE_SIZE), \
+ "i" (&fn))
#endif
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index 2cc8c2309022..75087e85b6fd 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -16,145 +16,24 @@
#include <registers.h>
#include <skas.h>
-#ifdef CONFIG_X86_32
-
-/*
- * FPU tag word conversions.
- */
-
-static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
-{
- unsigned int tmp; /* to avoid 16 bit prefixes in the code */
-
- /* Transform each pair of bits into 01 (valid) or 00 (empty) */
- tmp = ~twd;
- tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
- /* and move the valid bits to the lower byte. */
- tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
- tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
- tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
- return tmp;
-}
-
-static inline unsigned long twd_fxsr_to_i387(struct user_fxsr_struct *fxsave)
-{
- struct _fpxreg *st = NULL;
- unsigned long twd = (unsigned long) fxsave->twd;
- unsigned long tag;
- unsigned long ret = 0xffff0000;
- int i;
-
-#define FPREG_ADDR(f, n) ((char *)&(f)->st_space + (n) * 16)
-
- for (i = 0; i < 8; i++) {
- if (twd & 0x1) {
- st = (struct _fpxreg *) FPREG_ADDR(fxsave, i);
-
- switch (st->exponent & 0x7fff) {
- case 0x7fff:
- tag = 2; /* Special */
- break;
- case 0x0000:
- if ( !st->significand[0] &&
- !st->significand[1] &&
- !st->significand[2] &&
- !st->significand[3] ) {
- tag = 1; /* Zero */
- } else {
- tag = 2; /* Special */
- }
- break;
- default:
- if (st->significand[3] & 0x8000) {
- tag = 0; /* Valid */
- } else {
- tag = 2; /* Special */
- }
- break;
- }
- } else {
- tag = 3; /* Empty */
- }
- ret |= (tag << (2 * i));
- twd = twd >> 1;
- }
- return ret;
-}
-
-static int convert_fxsr_to_user(struct _fpstate __user *buf,
- struct user_fxsr_struct *fxsave)
-{
- unsigned long env[7];
- struct _fpreg __user *to;
- struct _fpxreg *from;
- int i;
-
- env[0] = (unsigned long)fxsave->cwd | 0xffff0000ul;
- env[1] = (unsigned long)fxsave->swd | 0xffff0000ul;
- env[2] = twd_fxsr_to_i387(fxsave);
- env[3] = fxsave->fip;
- env[4] = fxsave->fcs | ((unsigned long)fxsave->fop << 16);
- env[5] = fxsave->foo;
- env[6] = fxsave->fos;
-
- if (__copy_to_user(buf, env, 7 * sizeof(unsigned long)))
- return 1;
-
- to = &buf->_st[0];
- from = (struct _fpxreg *) &fxsave->st_space[0];
- for (i = 0; i < 8; i++, to++, from++) {
- unsigned long __user *t = (unsigned long __user *)to;
- unsigned long *f = (unsigned long *)from;
-
- if (__put_user(*f, t) ||
- __put_user(*(f + 1), t + 1) ||
- __put_user(from->exponent, &to->exponent))
- return 1;
- }
- return 0;
-}
-
-static int convert_fxsr_from_user(struct user_fxsr_struct *fxsave,
- struct _fpstate __user *buf)
-{
- unsigned long env[7];
- struct _fpxreg *to;
- struct _fpreg __user *from;
- int i;
-
- if (copy_from_user( env, buf, 7 * sizeof(long)))
- return 1;
-
- fxsave->cwd = (unsigned short)(env[0] & 0xffff);
- fxsave->swd = (unsigned short)(env[1] & 0xffff);
- fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff));
- fxsave->fip = env[3];
- fxsave->fop = (unsigned short)((env[4] & 0xffff0000ul) >> 16);
- fxsave->fcs = (env[4] & 0xffff);
- fxsave->foo = env[5];
- fxsave->fos = env[6];
-
- to = (struct _fpxreg *) &fxsave->st_space[0];
- from = &buf->_st[0];
- for (i = 0; i < 8; i++, to++, from++) {
- unsigned long *t = (unsigned long *)to;
- unsigned long __user *f = (unsigned long __user *)from;
-
- if (__get_user(*t, f) ||
- __get_user(*(t + 1), f + 1) ||
- __get_user(to->exponent, &from->exponent))
- return 1;
- }
- return 0;
-}
-
-extern int have_fpx_regs;
+#include <linux/regset.h>
+#include <asm/sigframe.h>
+#ifdef CONFIG_X86_32
+struct _xstate_64 {
+ struct _fpstate_64 fpstate;
+ struct _header xstate_hdr;
+ struct _ymmh_state ymmh;
+ /* New processor state extensions go here: */
+};
+#else
+#define _xstate_64 _xstate
#endif
static int copy_sc_from_user(struct pt_regs *regs,
struct sigcontext __user *from)
{
+ struct _xstate_64 __user *from_fp64;
struct sigcontext sc;
int err;
@@ -203,35 +82,27 @@ static int copy_sc_from_user(struct pt_regs *regs,
#undef GETREG
#ifdef CONFIG_X86_32
- if (have_fpx_regs) {
- struct user_fxsr_struct fpx;
- int pid = userspace_pid[current_thread_info()->cpu];
+ from_fp64 = ((void __user *)sc.fpstate) +
+ offsetof(struct _fpstate_32, _fxsr_env);
+#else
+ from_fp64 = (void __user *)sc.fpstate;
+#endif
- err = copy_from_user(&fpx,
- &((struct _fpstate __user *)sc.fpstate)->_fxsr_env[0],
- sizeof(struct user_fxsr_struct));
- if (err)
- return 1;
+ err = copy_from_user(regs->regs.fp, from_fp64, host_fp_size);
+ if (err)
+ return 1;
- err = convert_fxsr_from_user(&fpx, (void *)sc.fpstate);
- if (err)
- return 1;
-
- err = restore_fpx_registers(pid, (unsigned long *) &fpx);
- if (err < 0) {
- printk(KERN_ERR "copy_sc_from_user - "
- "restore_fpx_registers failed, errno = %d\n",
- -err);
- return 1;
- }
- } else
+#ifdef CONFIG_X86_32
+ /* Data is duplicated and this copy is the important one */
+ err = copy_regset_from_user(current,
+ task_user_regset_view(current),
+ REGSET_FP_LEGACY, 0,
+ sizeof(struct user_i387_struct),
+ (void __user *)sc.fpstate);
+ if (err < 0)
+ return err;
#endif
- {
- err = copy_from_user(regs->regs.fp, (void *)sc.fpstate,
- sizeof(struct _xstate));
- if (err)
- return 1;
- }
+
return 0;
}
@@ -239,6 +110,7 @@ static int copy_sc_to_user(struct sigcontext __user *to,
struct _xstate __user *to_fp, struct pt_regs *regs,
unsigned long mask)
{
+ struct _xstate_64 __user *to_fp64;
struct sigcontext sc;
struct faultinfo * fi = &current->thread.arch.faultinfo;
int err;
@@ -290,35 +162,46 @@ static int copy_sc_to_user(struct sigcontext __user *to,
return 1;
#ifdef CONFIG_X86_32
- if (have_fpx_regs) {
- int pid = userspace_pid[current_thread_info()->cpu];
- struct user_fxsr_struct fpx;
-
- err = save_fpx_registers(pid, (unsigned long *) &fpx);
- if (err < 0){
- printk(KERN_ERR "copy_sc_to_user - save_fpx_registers "
- "failed, errno = %d\n", err);
- return 1;
- }
-
- err = convert_fxsr_to_user(&to_fp->fpstate, &fpx);
- if (err)
- return 1;
+ err = copy_regset_to_user(current,
+ task_user_regset_view(current),
+ REGSET_FP_LEGACY, 0,
+ sizeof(struct _fpstate_32), to_fp);
+ if (err < 0)
+ return err;
- err |= __put_user(fpx.swd, &to_fp->fpstate.status);
- err |= __put_user(X86_FXSR_MAGIC, &to_fp->fpstate.magic);
- if (err)
- return 1;
+ __put_user(X86_FXSR_MAGIC, &to_fp->fpstate.magic);
+
+ BUILD_BUG_ON(offsetof(struct _xstate, xstate_hdr) !=
+ offsetof(struct _xstate_64, xstate_hdr) +
+ offsetof(struct _fpstate_32, _fxsr_env));
+ to_fp64 = (void __user *)to_fp +
+ offsetof(struct _fpstate_32, _fxsr_env);
+#else
+ to_fp64 = to_fp;
+#endif /* CONFIG_X86_32 */
+
+ if (copy_to_user(to_fp64, regs->regs.fp, host_fp_size))
+ return 1;
- if (copy_to_user(&to_fp->fpstate._fxsr_env[0], &fpx,
- sizeof(struct user_fxsr_struct)))
- return 1;
- } else
+ /*
+ * Put magic/size values for userspace. We do not bother to verify them
+ * later on, however, userspace needs them should it try to read the
+ * XSTATE data. And ptrace does not fill in these parts.
+ */
+ BUILD_BUG_ON(sizeof(int) != FP_XSTATE_MAGIC2_SIZE);
+#ifdef CONFIG_X86_32
+ __put_user(offsetof(struct _fpstate_32, _fxsr_env) +
+ host_fp_size + FP_XSTATE_MAGIC2_SIZE,
+ &to_fp64->fpstate.sw_reserved.extended_size);
+#else
+ __put_user(host_fp_size + FP_XSTATE_MAGIC2_SIZE,
+ &to_fp64->fpstate.sw_reserved.extended_size);
#endif
- {
- if (copy_to_user(to_fp, regs->regs.fp, sizeof(struct _xstate)))
- return 1;
- }
+ __put_user(host_fp_size, &to_fp64->fpstate.sw_reserved.xstate_size);
+
+ __put_user(FP_XSTATE_MAGIC1, &to_fp64->fpstate.sw_reserved.magic1);
+ __put_user(FP_XSTATE_MAGIC2,
+ (int __user *)((void __user *)to_fp64 + host_fp_size));
return 0;
}
@@ -336,34 +219,15 @@ static int copy_ucontext_to_user(struct ucontext __user *uc,
return err;
}
-struct sigframe
-{
- char __user *pretcode;
- int sig;
- struct sigcontext sc;
- struct _xstate fpstate;
- unsigned long extramask[_NSIG_WORDS-1];
- char retcode[8];
-};
-
-struct rt_sigframe
-{
- char __user *pretcode;
- int sig;
- struct siginfo __user *pinfo;
- void __user *puc;
- struct siginfo info;
- struct ucontext uc;
- struct _xstate fpstate;
- char retcode[8];
-};
-
int setup_signal_stack_sc(unsigned long stack_top, struct ksignal *ksig,
struct pt_regs *regs, sigset_t *mask)
{
+ size_t math_size = offsetof(struct _fpstate_32, _fxsr_env) +
+ host_fp_size + FP_XSTATE_MAGIC2_SIZE;
struct sigframe __user *frame;
void __user *restorer;
int err = 0, sig = ksig->sig;
+ unsigned long fp_to;
/* This is the same calculation as i386 - ((sp + 4) & 15) == 0 */
stack_top = ((stack_top + 4) & -16UL) - 4;
@@ -371,13 +235,21 @@ int setup_signal_stack_sc(unsigned long stack_top, struct ksignal *ksig,
if (!access_ok(frame, sizeof(*frame)))
return 1;
+ /* Add required space for math frame */
+ frame = (struct sigframe __user *)((unsigned long)frame - math_size);
+
restorer = frame->retcode;
if (ksig->ka.sa.sa_flags & SA_RESTORER)
restorer = ksig->ka.sa.sa_restorer;
- err |= __put_user(restorer, &frame->pretcode);
+ err |= __put_user(restorer, (void __user * __user *)&frame->pretcode);
err |= __put_user(sig, &frame->sig);
- err |= copy_sc_to_user(&frame->sc, &frame->fpstate, regs, mask->sig[0]);
+
+ fp_to = (unsigned long)frame + sizeof(*frame);
+
+ err |= copy_sc_to_user(&frame->sc,
+ (struct _xstate __user *)fp_to,
+ regs, mask->sig[0]);
if (_NSIG_WORDS > 1)
err |= __copy_to_user(&frame->extramask, &mask->sig[1],
sizeof(frame->extramask));
@@ -407,26 +279,35 @@ int setup_signal_stack_sc(unsigned long stack_top, struct ksignal *ksig,
int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig,
struct pt_regs *regs, sigset_t *mask)
{
+ size_t math_size = offsetof(struct _fpstate_32, _fxsr_env) +
+ host_fp_size + FP_XSTATE_MAGIC2_SIZE;
struct rt_sigframe __user *frame;
void __user *restorer;
int err = 0, sig = ksig->sig;
+ unsigned long fp_to;
stack_top &= -8UL;
frame = (struct rt_sigframe __user *) stack_top - 1;
if (!access_ok(frame, sizeof(*frame)))
return 1;
+ /* Add required space for math frame */
+ frame = (struct rt_sigframe __user *)((unsigned long)frame - math_size);
+
restorer = frame->retcode;
if (ksig->ka.sa.sa_flags & SA_RESTORER)
restorer = ksig->ka.sa.sa_restorer;
- err |= __put_user(restorer, &frame->pretcode);
+ err |= __put_user(restorer, (void __user * __user *)&frame->pretcode);
err |= __put_user(sig, &frame->sig);
- err |= __put_user(&frame->info, &frame->pinfo);
- err |= __put_user(&frame->uc, &frame->puc);
+ err |= __put_user(&frame->info, (void __user * __user *)&frame->pinfo);
+ err |= __put_user(&frame->uc, (void __user * __user *)&frame->puc);
err |= copy_siginfo_to_user(&frame->info, &ksig->info);
- err |= copy_ucontext_to_user(&frame->uc, &frame->fpstate, mask,
- PT_REGS_SP(regs));
+
+ fp_to = (unsigned long)frame + sizeof(*frame);
+
+ err |= copy_ucontext_to_user(&frame->uc, (struct _xstate __user *)fp_to,
+ mask, PT_REGS_SP(regs));
/*
* This is movl $,%eax ; int $0x80
@@ -478,27 +359,24 @@ SYSCALL_DEFINE0(sigreturn)
#else
-struct rt_sigframe
-{
- char __user *pretcode;
- struct ucontext uc;
- struct siginfo info;
- struct _xstate fpstate;
-};
-
int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig,
struct pt_regs *regs, sigset_t *set)
{
+ unsigned long math_size = host_fp_size + FP_XSTATE_MAGIC2_SIZE;
struct rt_sigframe __user *frame;
int err = 0, sig = ksig->sig;
unsigned long fp_to;
frame = (struct rt_sigframe __user *)
round_down(stack_top - sizeof(struct rt_sigframe), 16);
+
+ /* Add required space for math frame */
+ frame = (struct rt_sigframe __user *)((unsigned long)frame - math_size);
+
/* Subtract 128 for a red zone and 8 for proper alignment */
frame = (struct rt_sigframe __user *) ((unsigned long) frame - 128 - 8);
- if (!access_ok(frame, sizeof(*frame)))
+ if (!access_ok(frame, sizeof(*frame) + math_size))
goto out;
if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
@@ -509,12 +387,14 @@ int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig,
/* Create the ucontext. */
err |= __put_user(0, &frame->uc.uc_flags);
- err |= __put_user(0, &frame->uc.uc_link);
+ err |= __put_user(NULL, &frame->uc.uc_link);
err |= __save_altstack(&frame->uc.uc_stack, PT_REGS_SP(regs));
- err |= copy_sc_to_user(&frame->uc.uc_mcontext, &frame->fpstate, regs,
- set->sig[0]);
- fp_to = (unsigned long)&frame->fpstate;
+ fp_to = (unsigned long)frame + sizeof(*frame);
+
+ err |= copy_sc_to_user(&frame->uc.uc_mcontext,
+ (struct _xstate __user *)fp_to,
+ regs, set->sig[0]);
err |= __put_user(fp_to, &frame->uc.uc_mcontext.fpstate);
if (sizeof(*set) == 16) {
@@ -531,7 +411,7 @@ int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig,
*/
/* x86-64 should always use SA_RESTORER. */
if (ksig->ka.sa.sa_flags & SA_RESTORER)
- err |= __put_user((void *)ksig->ka.sa.sa_restorer,
+ err |= __put_user((void __user *)ksig->ka.sa.sa_restorer,
&frame->pretcode);
else
/* could use a vstub here */
diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c
index 1c77d9946199..d6e1cd9956bf 100644
--- a/arch/x86/um/user-offsets.c
+++ b/arch/x86/um/user-offsets.c
@@ -20,9 +20,6 @@ void foo(void);
void foo(void)
{
#ifdef __i386__
- DEFINE_LONGS(HOST_FP_SIZE, sizeof(struct user_fpregs_struct));
- DEFINE_LONGS(HOST_FPX_SIZE, sizeof(struct user_fpxregs_struct));
-
DEFINE(HOST_IP, EIP);
DEFINE(HOST_SP, UESP);
DEFINE(HOST_EFLAGS, EFL);
@@ -41,11 +38,6 @@ void foo(void)
DEFINE(HOST_GS, GS);
DEFINE(HOST_ORIG_AX, ORIG_EAX);
#else
-#ifdef FP_XSTATE_MAGIC1
- DEFINE_LONGS(HOST_FP_SIZE, 2696);
-#else
- DEFINE(HOST_FP_SIZE, sizeof(struct _fpstate) / sizeof(unsigned long));
-#endif
DEFINE_LONGS(HOST_BX, RBX);
DEFINE_LONGS(HOST_CX, RCX);
DEFINE_LONGS(HOST_DI, RDI);
diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile
index 6a77ea6434ff..7478d11dacb7 100644
--- a/arch/x86/um/vdso/Makefile
+++ b/arch/x86/um/vdso/Makefile
@@ -56,7 +56,6 @@ CFLAGS_REMOVE_um_vdso.o = -pg -fprofile-arcs -ftest-coverage
quiet_cmd_vdso = VDSO $@
cmd_vdso = $(CC) -nostdlib -o $@ \
$(CC_FLAGS_LTO) $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
- -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \
- sh $(src)/checkundef.sh '$(NM)' '$@'
+ -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^)
-VDSO_LDFLAGS = -fPIC -shared -Wl,--hash-style=sysv -z noexecstack
+VDSO_LDFLAGS = -fPIC -shared -Wl,--hash-style=sysv -z noexecstack -Wl,--no-undefined
diff --git a/arch/x86/um/vdso/checkundef.sh b/arch/x86/um/vdso/checkundef.sh
deleted file mode 100644
index 8e3ea6bb956f..000000000000
--- a/arch/x86/um/vdso/checkundef.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-nm="$1"
-file="$2"
-$nm "$file" | grep '^ *U' > /dev/null 2>&1
-if [ $? -eq 1 ]; then
- exit 0
-else
- echo "$file: undefined symbols found" >&2
- exit 1
-fi
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 6d1cf2436ead..7e51d2cec64b 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -57,6 +57,7 @@ static int __init hostfs_args(char *options, int *add)
{
char *ptr;
+ *add = 0;
ptr = strchr(options, ',');
if (ptr != NULL)
*ptr++ = '\0';
@@ -471,8 +472,8 @@ static int hostfs_write_begin(struct file *file, struct address_space *mapping,
*foliop = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
mapping_gfp_mask(mapping));
- if (!*foliop)
- return -ENOMEM;
+ if (IS_ERR(*foliop))
+ return PTR_ERR(*foliop);
return 0;
}