summaryrefslogtreecommitdiff
path: root/arch/arm64/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r--arch/arm64/kernel/acpi.c19
-rw-r--r--arch/arm64/kernel/asm-offsets.c2
-rw-r--r--arch/arm64/kernel/cpufeature.c66
-rw-r--r--arch/arm64/kernel/efi.c1
-rw-r--r--arch/arm64/kernel/entry-common.c219
-rw-r--r--arch/arm64/kernel/entry-fpsimd.S13
-rw-r--r--arch/arm64/kernel/entry.S114
-rw-r--r--arch/arm64/kernel/fpsimd.c14
-rw-r--r--arch/arm64/kernel/head.S11
-rw-r--r--arch/arm64/kernel/idreg-override.c2
-rw-r--r--arch/arm64/kernel/mte.c180
-rw-r--r--arch/arm64/kernel/pci.c29
-rw-r--r--arch/arm64/kernel/perf_event.c2
-rw-r--r--arch/arm64/kernel/pointer_auth.c10
-rw-r--r--arch/arm64/kernel/process.c81
-rw-r--r--arch/arm64/kernel/ptrace.c5
-rw-r--r--arch/arm64/kernel/signal.c73
-rw-r--r--arch/arm64/kernel/signal32.c45
-rw-r--r--arch/arm64/kernel/suspend.c1
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S4
20 files changed, 568 insertions, 323 deletions
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index f3851724fe35..1c9c2f7a1c04 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -273,7 +273,8 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr)
return __pgprot(PROT_DEVICE_nGnRnE);
}
-void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
+static void __iomem *__acpi_os_ioremap(acpi_physical_address phys,
+ acpi_size size, bool memory)
{
efi_memory_desc_t *md, *region = NULL;
pgprot_t prot;
@@ -299,9 +300,11 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
* It is fine for AML to remap regions that are not represented in the
* EFI memory map at all, as it only describes normal memory, and MMIO
* regions that require a virtual mapping to make them accessible to
- * the EFI runtime services.
+ * the EFI runtime services. Determine the region default
+ * attributes by checking the requested memory semantics.
*/
- prot = __pgprot(PROT_DEVICE_nGnRnE);
+ prot = memory ? __pgprot(PROT_NORMAL_NC) :
+ __pgprot(PROT_DEVICE_nGnRnE);
if (region) {
switch (region->type) {
case EFI_LOADER_CODE:
@@ -361,6 +364,16 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
return __ioremap(phys, size, prot);
}
+void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
+{
+ return __acpi_os_ioremap(phys, size, false);
+}
+
+void __iomem *acpi_os_memmap(acpi_physical_address phys, acpi_size size)
+{
+ return __acpi_os_ioremap(phys, size, true);
+}
+
/*
* Claim Synchronous External Aborts as a firmware first notification.
*
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index c85670692afa..551427ae8cc5 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -52,7 +52,7 @@ int main(void)
DEFINE(THREAD_KEYS_KERNEL, offsetof(struct task_struct, thread.keys_kernel));
#endif
#ifdef CONFIG_ARM64_MTE
- DEFINE(THREAD_GCR_EL1_USER, offsetof(struct task_struct, thread.gcr_user_excl));
+ DEFINE(THREAD_MTE_CTRL, offsetof(struct task_struct, thread.mte_ctrl));
#endif
BLANK();
DEFINE(S_X0, offsetof(struct pt_regs, regs[0]));
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 0ead8bfedf20..f8a3067d10c6 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -67,6 +67,7 @@
#include <linux/crash_dump.h>
#include <linux/sort.h>
#include <linux/stop_machine.h>
+#include <linux/sysfs.h>
#include <linux/types.h>
#include <linux/minmax.h>
#include <linux/mm.h>
@@ -239,8 +240,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL3_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL2_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_ELx_64BIT_ONLY),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_ELx_64BIT_ONLY),
ARM64_FTR_END,
};
@@ -1321,6 +1322,31 @@ const struct cpumask *system_32bit_el0_cpumask(void)
return cpu_possible_mask;
}
+static int __init parse_32bit_el0_param(char *str)
+{
+ allow_mismatched_32bit_el0 = true;
+ return 0;
+}
+early_param("allow_mismatched_32bit_el0", parse_32bit_el0_param);
+
+static ssize_t aarch32_el0_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ const struct cpumask *mask = system_32bit_el0_cpumask();
+
+ return sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(mask));
+}
+static const DEVICE_ATTR_RO(aarch32_el0);
+
+static int __init aarch32_el0_sysfs_init(void)
+{
+ if (!allow_mismatched_32bit_el0)
+ return 0;
+
+ return device_create_file(cpu_subsys.dev_root, &dev_attr_aarch32_el0);
+}
+device_initcall(aarch32_el0_sysfs_init);
+
static bool has_32bit_el0(const struct arm64_cpu_capabilities *entry, int scope)
{
if (!has_cpuid_feature(entry, scope))
@@ -1561,8 +1587,6 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
if (!cpu)
arm64_use_ng_mappings = true;
-
- return;
}
#else
static void
@@ -1734,7 +1758,7 @@ static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused)
u64 val = read_sysreg_s(SYS_CLIDR_EL1);
/* Check that CLIDR_EL1.LOU{U,IS} are both 0 */
- WARN_ON(val & (7 << 27 | 7 << 21));
+ WARN_ON(CLIDR_LOUU(val) || CLIDR_LOUIS(val));
}
#ifdef CONFIG_ARM64_PAN
@@ -1843,6 +1867,9 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused)
#ifdef CONFIG_ARM64_MTE
static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
{
+ sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0);
+ isb();
+
/*
* Clear the tags in the zero page. This needs to be done via the
* linear map which has the Tagged attribute.
@@ -1956,7 +1983,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.sys_reg = SYS_ID_AA64PFR0_EL1,
.sign = FTR_UNSIGNED,
.field_pos = ID_AA64PFR0_EL0_SHIFT,
- .min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT,
+ .min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT,
},
#ifdef CONFIG_KVM
{
@@ -1967,7 +1994,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.sys_reg = SYS_ID_AA64PFR0_EL1,
.sign = FTR_UNSIGNED,
.field_pos = ID_AA64PFR0_EL1_SHIFT,
- .min_field_value = ID_AA64PFR0_EL1_32BIT_64BIT,
+ .min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT,
},
{
.desc = "Protected KVM",
@@ -2901,15 +2928,38 @@ void __init setup_cpu_features(void)
static int enable_mismatched_32bit_el0(unsigned int cpu)
{
+ /*
+ * The first 32-bit-capable CPU we detected and so can no longer
+ * be offlined by userspace. -1 indicates we haven't yet onlined
+ * a 32-bit-capable CPU.
+ */
+ static int lucky_winner = -1;
+
struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu);
bool cpu_32bit = id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0);
if (cpu_32bit) {
cpumask_set_cpu(cpu, cpu_32bit_el0_mask);
static_branch_enable_cpuslocked(&arm64_mismatched_32bit_el0);
- setup_elf_hwcaps(compat_elf_hwcaps);
}
+ if (cpumask_test_cpu(0, cpu_32bit_el0_mask) == cpu_32bit)
+ return 0;
+
+ if (lucky_winner >= 0)
+ return 0;
+
+ /*
+ * We've detected a mismatch. We need to keep one of our CPUs with
+ * 32-bit EL0 online so that is_cpu_allowed() doesn't end up rejecting
+ * every CPU in the system for a 32-bit task.
+ */
+ lucky_winner = cpu_32bit ? cpu : cpumask_any_and(cpu_32bit_el0_mask,
+ cpu_active_mask);
+ get_cpu_device(lucky_winner)->offline_disabled = true;
+ setup_elf_hwcaps(compat_elf_hwcaps);
+ pr_info("Asymmetric 32-bit EL0 support detected on CPU %u; CPU hot-unplug disabled on CPU %u\n",
+ cpu, lucky_winner);
return 0;
}
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index fa02efb28e88..e1be6c429810 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -55,6 +55,7 @@ static __init pteval_t create_mapping_protection(efi_memory_desc_t *md)
/* we will fill this structure from the stub, so don't put it in .bss */
struct screen_info screen_info __section(".data");
+EXPORT_SYMBOL(screen_info);
int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
{
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index db8b2e2d02c2..32f9796c4ffe 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -26,10 +26,14 @@
#include <asm/system_misc.h>
/*
+ * Handle IRQ/context state management when entering from kernel mode.
+ * Before this function is called it is not safe to call regular kernel code,
+ * intrumentable code, or any code which may trigger an exception.
+ *
* This is intended to match the logic in irqentry_enter(), handling the kernel
* mode transitions only.
*/
-static void noinstr enter_from_kernel_mode(struct pt_regs *regs)
+static __always_inline void __enter_from_kernel_mode(struct pt_regs *regs)
{
regs->exit_rcu = false;
@@ -45,20 +49,26 @@ static void noinstr enter_from_kernel_mode(struct pt_regs *regs)
lockdep_hardirqs_off(CALLER_ADDR0);
rcu_irq_enter_check_tick();
trace_hardirqs_off_finish();
+}
+static void noinstr enter_from_kernel_mode(struct pt_regs *regs)
+{
+ __enter_from_kernel_mode(regs);
mte_check_tfsr_entry();
}
/*
+ * Handle IRQ/context state management when exiting to kernel mode.
+ * After this function returns it is not safe to call regular kernel code,
+ * intrumentable code, or any code which may trigger an exception.
+ *
* This is intended to match the logic in irqentry_exit(), handling the kernel
* mode transitions only, and with preemption handled elsewhere.
*/
-static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
+static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs)
{
lockdep_assert_irqs_disabled();
- mte_check_tfsr_exit();
-
if (interrupts_enabled(regs)) {
if (regs->exit_rcu) {
trace_hardirqs_on_prepare();
@@ -75,6 +85,71 @@ static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
}
}
+static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
+{
+ mte_check_tfsr_exit();
+ __exit_to_kernel_mode(regs);
+}
+
+/*
+ * Handle IRQ/context state management when entering from user mode.
+ * Before this function is called it is not safe to call regular kernel code,
+ * intrumentable code, or any code which may trigger an exception.
+ */
+static __always_inline void __enter_from_user_mode(void)
+{
+ lockdep_hardirqs_off(CALLER_ADDR0);
+ CT_WARN_ON(ct_state() != CONTEXT_USER);
+ user_exit_irqoff();
+ trace_hardirqs_off_finish();
+}
+
+static __always_inline void enter_from_user_mode(struct pt_regs *regs)
+{
+ __enter_from_user_mode();
+}
+
+/*
+ * Handle IRQ/context state management when exiting to user mode.
+ * After this function returns it is not safe to call regular kernel code,
+ * intrumentable code, or any code which may trigger an exception.
+ */
+static __always_inline void __exit_to_user_mode(void)
+{
+ trace_hardirqs_on_prepare();
+ lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+ user_enter_irqoff();
+ lockdep_hardirqs_on(CALLER_ADDR0);
+}
+
+static __always_inline void prepare_exit_to_user_mode(struct pt_regs *regs)
+{
+ unsigned long flags;
+
+ local_daif_mask();
+
+ flags = READ_ONCE(current_thread_info()->flags);
+ if (unlikely(flags & _TIF_WORK_MASK))
+ do_notify_resume(regs, flags);
+}
+
+static __always_inline void exit_to_user_mode(struct pt_regs *regs)
+{
+ prepare_exit_to_user_mode(regs);
+ mte_check_tfsr_exit();
+ __exit_to_user_mode();
+}
+
+asmlinkage void noinstr asm_exit_to_user_mode(struct pt_regs *regs)
+{
+ exit_to_user_mode(regs);
+}
+
+/*
+ * Handle IRQ/context state management when entering an NMI from user/kernel
+ * mode. Before this function is called it is not safe to call regular kernel
+ * code, intrumentable code, or any code which may trigger an exception.
+ */
static void noinstr arm64_enter_nmi(struct pt_regs *regs)
{
regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
@@ -88,6 +163,11 @@ static void noinstr arm64_enter_nmi(struct pt_regs *regs)
ftrace_nmi_enter();
}
+/*
+ * Handle IRQ/context state management when exiting an NMI from user/kernel
+ * mode. After this function returns it is not safe to call regular kernel
+ * code, intrumentable code, or any code which may trigger an exception.
+ */
static void noinstr arm64_exit_nmi(struct pt_regs *regs)
{
bool restore = regs->lockdep_hardirqs;
@@ -105,6 +185,40 @@ static void noinstr arm64_exit_nmi(struct pt_regs *regs)
__nmi_exit();
}
+/*
+ * Handle IRQ/context state management when entering a debug exception from
+ * kernel mode. Before this function is called it is not safe to call regular
+ * kernel code, intrumentable code, or any code which may trigger an exception.
+ */
+static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs)
+{
+ regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
+
+ lockdep_hardirqs_off(CALLER_ADDR0);
+ rcu_nmi_enter();
+
+ trace_hardirqs_off_finish();
+}
+
+/*
+ * Handle IRQ/context state management when exiting a debug exception from
+ * kernel mode. After this function returns it is not safe to call regular
+ * kernel code, intrumentable code, or any code which may trigger an exception.
+ */
+static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs)
+{
+ bool restore = regs->lockdep_hardirqs;
+
+ if (restore) {
+ trace_hardirqs_on_prepare();
+ lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+ }
+
+ rcu_nmi_exit();
+ if (restore)
+ lockdep_hardirqs_on(CALLER_ADDR0);
+}
+
static void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs)
{
if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
@@ -265,30 +379,6 @@ static void noinstr el1_undef(struct pt_regs *regs)
exit_to_kernel_mode(regs);
}
-static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs)
-{
- regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
-
- lockdep_hardirqs_off(CALLER_ADDR0);
- rcu_nmi_enter();
-
- trace_hardirqs_off_finish();
-}
-
-static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs)
-{
- bool restore = regs->lockdep_hardirqs;
-
- if (restore) {
- trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
- }
-
- rcu_nmi_exit();
- if (restore)
- lockdep_hardirqs_on(CALLER_ADDR0);
-}
-
static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
{
unsigned long far = read_sysreg(far_el1);
@@ -382,31 +472,14 @@ asmlinkage void noinstr el1h_64_error_handler(struct pt_regs *regs)
arm64_exit_nmi(regs);
}
-asmlinkage void noinstr enter_from_user_mode(void)
-{
- lockdep_hardirqs_off(CALLER_ADDR0);
- CT_WARN_ON(ct_state() != CONTEXT_USER);
- user_exit_irqoff();
- trace_hardirqs_off_finish();
-}
-
-asmlinkage void noinstr exit_to_user_mode(void)
-{
- mte_check_tfsr_exit();
-
- trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
- user_enter_irqoff();
- lockdep_hardirqs_on(CALLER_ADDR0);
-}
-
static void noinstr el0_da(struct pt_regs *regs, unsigned long esr)
{
unsigned long far = read_sysreg(far_el1);
- enter_from_user_mode();
+ enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_mem_abort(far, esr, regs);
+ exit_to_user_mode(regs);
}
static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr)
@@ -421,37 +494,42 @@ static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr)
if (!is_ttbr0_addr(far))
arm64_apply_bp_hardening();
- enter_from_user_mode();
+ enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_mem_abort(far, esr, regs);
+ exit_to_user_mode(regs);
}
static void noinstr el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode();
+ enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_fpsimd_acc(esr, regs);
+ exit_to_user_mode(regs);
}
static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode();
+ enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_sve_acc(esr, regs);
+ exit_to_user_mode(regs);
}
static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode();
+ enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_fpsimd_exc(esr, regs);
+ exit_to_user_mode(regs);
}
static void noinstr el0_sys(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode();
+ enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_sysinstr(esr, regs);
+ exit_to_user_mode(regs);
}
static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr)
@@ -461,37 +539,42 @@ static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr)
if (!is_ttbr0_addr(instruction_pointer(regs)))
arm64_apply_bp_hardening();
- enter_from_user_mode();
+ enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_sp_pc_abort(far, esr, regs);
+ exit_to_user_mode(regs);
}
static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode();
+ enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_sp_pc_abort(regs->sp, esr, regs);
+ exit_to_user_mode(regs);
}
static void noinstr el0_undef(struct pt_regs *regs)
{
- enter_from_user_mode();
+ enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_undefinstr(regs);
+ exit_to_user_mode(regs);
}
static void noinstr el0_bti(struct pt_regs *regs)
{
- enter_from_user_mode();
+ enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_bti(regs);
+ exit_to_user_mode(regs);
}
static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode();
+ enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
bad_el0_sync(regs, 0, esr);
+ exit_to_user_mode(regs);
}
static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
@@ -499,23 +582,26 @@ static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
/* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */
unsigned long far = read_sysreg(far_el1);
- enter_from_user_mode();
+ enter_from_user_mode(regs);
do_debug_exception(far, esr, regs);
local_daif_restore(DAIF_PROCCTX);
+ exit_to_user_mode(regs);
}
static void noinstr el0_svc(struct pt_regs *regs)
{
- enter_from_user_mode();
+ enter_from_user_mode(regs);
cortex_a76_erratum_1463225_svc_handler();
do_el0_svc(regs);
+ exit_to_user_mode(regs);
}
static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode();
+ enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_ptrauth_fault(regs, esr);
+ exit_to_user_mode(regs);
}
asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
@@ -574,7 +660,7 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
static void noinstr el0_interrupt(struct pt_regs *regs,
void (*handler)(struct pt_regs *))
{
- enter_from_user_mode();
+ enter_from_user_mode(regs);
write_sysreg(DAIF_PROCCTX_NOIRQ, daif);
@@ -582,6 +668,8 @@ static void noinstr el0_interrupt(struct pt_regs *regs,
arm64_apply_bp_hardening();
do_interrupt_handler(regs, handler);
+
+ exit_to_user_mode(regs);
}
static void noinstr __el0_irq_handler_common(struct pt_regs *regs)
@@ -608,12 +696,13 @@ static void noinstr __el0_error_handler_common(struct pt_regs *regs)
{
unsigned long esr = read_sysreg(esr_el1);
- enter_from_user_mode();
+ enter_from_user_mode(regs);
local_daif_restore(DAIF_ERRCTX);
arm64_enter_nmi(regs);
do_serror(regs, esr);
arm64_exit_nmi(regs);
local_daif_restore(DAIF_PROCCTX);
+ exit_to_user_mode(regs);
}
asmlinkage void noinstr el0t_64_error_handler(struct pt_regs *regs)
@@ -624,16 +713,18 @@ asmlinkage void noinstr el0t_64_error_handler(struct pt_regs *regs)
#ifdef CONFIG_COMPAT
static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode();
+ enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_cp15instr(esr, regs);
+ exit_to_user_mode(regs);
}
static void noinstr el0_svc_compat(struct pt_regs *regs)
{
- enter_from_user_mode();
+ enter_from_user_mode(regs);
cortex_a76_erratum_1463225_svc_handler();
do_el0_svc_compat(regs);
+ exit_to_user_mode(regs);
}
asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs)
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index 0a7a64753878..196e921f61de 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -33,11 +33,24 @@ SYM_FUNC_END(fpsimd_load_state)
#ifdef CONFIG_ARM64_SVE
+/*
+ * Save the SVE state
+ *
+ * x0 - pointer to buffer for state
+ * x1 - pointer to storage for FPSR
+ */
SYM_FUNC_START(sve_save_state)
sve_save 0, x1, 2
ret
SYM_FUNC_END(sve_save_state)
+/*
+ * Load the SVE state
+ *
+ * x0 - pointer to buffer for state
+ * x1 - pointer to storage for FPSR
+ * x2 - VQ-1
+ */
SYM_FUNC_START(sve_load_state)
sve_load 0, x1, x2, 3, x4
ret
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 863d44f73028..bc6d5a970a13 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -29,16 +29,6 @@
#include <asm/asm-uaccess.h>
#include <asm/unistd.h>
-/*
- * Context tracking and irqflag tracing need to instrument transitions between
- * user and kernel mode.
- */
- .macro user_enter_irqoff
-#if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS)
- bl exit_to_user_mode
-#endif
- .endm
-
.macro clear_gp_regs
.irp n,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29
mov x\n, xzr
@@ -133,42 +123,46 @@ alternative_cb_end
.endm
/* Check for MTE asynchronous tag check faults */
- .macro check_mte_async_tcf, tmp, ti_flags
+ .macro check_mte_async_tcf, tmp, ti_flags, thread_sctlr
#ifdef CONFIG_ARM64_MTE
.arch_extension lse
alternative_if_not ARM64_MTE
b 1f
alternative_else_nop_endif
+ /*
+ * Asynchronous tag check faults are only possible in ASYNC (2) or
+ * ASYM (3) modes. In each of these modes bit 1 of SCTLR_EL1.TCF0 is
+ * set, so skip the check if it is unset.
+ */
+ tbz \thread_sctlr, #(SCTLR_EL1_TCF0_SHIFT + 1), 1f
mrs_s \tmp, SYS_TFSRE0_EL1
tbz \tmp, #SYS_TFSR_EL1_TF0_SHIFT, 1f
/* Asynchronous TCF occurred for TTBR0 access, set the TI flag */
mov \tmp, #_TIF_MTE_ASYNC_FAULT
add \ti_flags, tsk, #TSK_TI_FLAGS
stset \tmp, [\ti_flags]
- msr_s SYS_TFSRE0_EL1, xzr
1:
#endif
.endm
/* Clear the MTE asynchronous tag check faults */
- .macro clear_mte_async_tcf
+ .macro clear_mte_async_tcf thread_sctlr
#ifdef CONFIG_ARM64_MTE
alternative_if ARM64_MTE
+ /* See comment in check_mte_async_tcf above. */
+ tbz \thread_sctlr, #(SCTLR_EL1_TCF0_SHIFT + 1), 1f
dsb ish
msr_s SYS_TFSRE0_EL1, xzr
+1:
alternative_else_nop_endif
#endif
.endm
- .macro mte_set_gcr, tmp, tmp2
+ .macro mte_set_gcr, mte_ctrl, tmp
#ifdef CONFIG_ARM64_MTE
- /*
- * Calculate and set the exclude mask preserving
- * the RRND (bit[16]) setting.
- */
- mrs_s \tmp2, SYS_GCR_EL1
- bfi \tmp2, \tmp, #0, #16
- msr_s SYS_GCR_EL1, \tmp2
+ ubfx \tmp, \mte_ctrl, #MTE_CTRL_GCR_USER_EXCL_SHIFT, #16
+ orr \tmp, \tmp, #SYS_GCR_EL1_RRND
+ msr_s SYS_GCR_EL1, \tmp
#endif
.endm
@@ -177,10 +171,8 @@ alternative_else_nop_endif
alternative_if_not ARM64_MTE
b 1f
alternative_else_nop_endif
- ldr_l \tmp, gcr_kernel_excl
-
- mte_set_gcr \tmp, \tmp2
- isb
+ mov \tmp, KERNEL_GCR_EL1
+ msr_s SYS_GCR_EL1, \tmp
1:
#endif
.endm
@@ -190,7 +182,7 @@ alternative_else_nop_endif
alternative_if_not ARM64_MTE
b 1f
alternative_else_nop_endif
- ldr \tmp, [\tsk, #THREAD_GCR_EL1_USER]
+ ldr \tmp, [\tsk, #THREAD_MTE_CTRL]
mte_set_gcr \tmp, \tmp2
1:
@@ -231,8 +223,8 @@ alternative_else_nop_endif
disable_step_tsk x19, x20
/* Check for asynchronous tag check faults in user space */
- check_mte_async_tcf x22, x23
- apply_ssbd 1, x22, x23
+ ldr x0, [tsk, THREAD_SCTLR_USER]
+ check_mte_async_tcf x22, x23, x0
#ifdef CONFIG_ARM64_PTR_AUTH
alternative_if ARM64_HAS_ADDRESS_AUTH
@@ -245,7 +237,6 @@ alternative_if ARM64_HAS_ADDRESS_AUTH
* was disabled on kernel exit then we would have left the kernel IA
* installed so there is no need to install it again.
*/
- ldr x0, [tsk, THREAD_SCTLR_USER]
tbz x0, SCTLR_ELx_ENIA_SHIFT, 1f
__ptrauth_keys_install_kernel_nosync tsk, x20, x22, x23
b 2f
@@ -254,12 +245,26 @@ alternative_if ARM64_HAS_ADDRESS_AUTH
orr x0, x0, SCTLR_ELx_ENIA
msr sctlr_el1, x0
2:
- isb
alternative_else_nop_endif
#endif
+ apply_ssbd 1, x22, x23
+
mte_set_kernel_gcr x22, x23
+ /*
+ * Any non-self-synchronizing system register updates required for
+ * kernel entry should be placed before this point.
+ */
+alternative_if ARM64_MTE
+ isb
+ b 1f
+alternative_else_nop_endif
+alternative_if ARM64_HAS_ADDRESS_AUTH
+ isb
+alternative_else_nop_endif
+1:
+
scs_load tsk
.else
add x21, sp, #PT_REGS_SIZE
@@ -362,6 +367,10 @@ alternative_else_nop_endif
3:
scs_save tsk
+ /* Ignore asynchronous tag check faults in the uaccess routines */
+ ldr x0, [tsk, THREAD_SCTLR_USER]
+ clear_mte_async_tcf x0
+
#ifdef CONFIG_ARM64_PTR_AUTH
alternative_if ARM64_HAS_ADDRESS_AUTH
/*
@@ -371,7 +380,6 @@ alternative_if ARM64_HAS_ADDRESS_AUTH
*
* No kernel C function calls after this.
*/
- ldr x0, [tsk, THREAD_SCTLR_USER]
tbz x0, SCTLR_ELx_ENIA_SHIFT, 1f
__ptrauth_keys_install_user tsk, x0, x1, x2
b 2f
@@ -474,18 +482,6 @@ SYM_CODE_END(__swpan_exit_el0)
/* GPRs used by entry code */
tsk .req x28 // current thread_info
-/*
- * Interrupt handling.
- */
- .macro gic_prio_kentry_setup, tmp:req
-#ifdef CONFIG_ARM64_PSEUDO_NMI
- alternative_if ARM64_HAS_IRQ_PRIO_MASKING
- mov \tmp, #(GIC_PRIO_PSR_I_SET | GIC_PRIO_IRQON)
- msr_s SYS_ICC_PMR_EL1, \tmp
- alternative_else_nop_endif
-#endif
- .endm
-
.text
/*
@@ -517,12 +513,13 @@ SYM_CODE_START(vectors)
SYM_CODE_END(vectors)
#ifdef CONFIG_VMAP_STACK
+SYM_CODE_START_LOCAL(__bad_stack)
/*
* We detected an overflow in kernel_ventry, which switched to the
* overflow stack. Stash the exception regs, and head to our overflow
* handler.
*/
-__bad_stack:
+
/* Restore the original x0 value */
mrs x0, tpidrro_el0
@@ -542,6 +539,7 @@ __bad_stack:
/* Time to die */
bl handle_bad_stack
ASM_BUG()
+SYM_CODE_END(__bad_stack)
#endif /* CONFIG_VMAP_STACK */
@@ -585,37 +583,13 @@ SYM_CODE_START_LOCAL(ret_to_kernel)
kernel_exit 1
SYM_CODE_END(ret_to_kernel)
-/*
- * "slow" syscall return path.
- */
SYM_CODE_START_LOCAL(ret_to_user)
- disable_daif
- gic_prio_kentry_setup tmp=x3
-#ifdef CONFIG_TRACE_IRQFLAGS
- bl trace_hardirqs_off
-#endif
- ldr x19, [tsk, #TSK_TI_FLAGS]
- and x2, x19, #_TIF_WORK_MASK
- cbnz x2, work_pending
-finish_ret_to_user:
- user_enter_irqoff
- /* Ignore asynchronous tag check faults in the uaccess routines */
- clear_mte_async_tcf
+ ldr x19, [tsk, #TSK_TI_FLAGS] // re-check for single-step
enable_step_tsk x19, x2
#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
bl stackleak_erase
#endif
kernel_exit 0
-
-/*
- * Ok, we need to do extra processing, enter the slow path.
- */
-work_pending:
- mov x0, sp // 'regs'
- mov x1, x19
- bl do_notify_resume
- ldr x19, [tsk, #TSK_TI_FLAGS] // re-check for single-step
- b finish_ret_to_user
SYM_CODE_END(ret_to_user)
.popsection // .entry.text
@@ -781,6 +755,8 @@ SYM_CODE_START(ret_from_fork)
mov x0, x20
blr x19
1: get_current_task tsk
+ mov x0, sp
+ bl asm_exit_to_user_mode
b ret_to_user
SYM_CODE_END(ret_from_fork)
NOKPROBE(ret_from_fork)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index e57b23f95284..5a294f20e9de 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -162,6 +162,8 @@ extern void __percpu *efi_sve_state;
DEFINE_PER_CPU(bool, fpsimd_context_busy);
EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy);
+static void fpsimd_bind_task_to_cpu(void);
+
static void __get_cpu_fpsimd_context(void)
{
bool busy = __this_cpu_xchg(fpsimd_context_busy, true);
@@ -518,12 +520,6 @@ void sve_alloc(struct task_struct *task)
/* This is a small allocation (maximum ~8KB) and Should Not Fail. */
task->thread.sve_state =
kzalloc(sve_state_size(task), GFP_KERNEL);
-
- /*
- * If future SVE revisions can have larger vectors though,
- * this may cease to be true:
- */
- BUG_ON(!task->thread.sve_state);
}
@@ -943,6 +939,10 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs)
}
sve_alloc(current);
+ if (!current->thread.sve_state) {
+ force_sig(SIGKILL);
+ return;
+ }
get_cpu_fpsimd_context();
@@ -1112,7 +1112,7 @@ void fpsimd_signal_preserve_current_state(void)
* The caller must have ownership of the cpu FPSIMD context before calling
* this function.
*/
-void fpsimd_bind_task_to_cpu(void)
+static void fpsimd_bind_task_to_cpu(void)
{
struct fpsimd_last_state_struct *last =
this_cpu_ptr(&fpsimd_last_state);
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index c5c994a73a64..17962452e31d 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -177,7 +177,7 @@ SYM_CODE_END(preserve_boot_args)
* to be composed of multiple pages. (This effectively scales the end index).
*
* vstart: virtual address of start of range
- * vend: virtual address of end of range
+ * vend: virtual address of end of range - we map [vstart, vend]
* shift: shift used to transform virtual address into index
* ptrs: number of entries in page table
* istart: index in table corresponding to vstart
@@ -214,17 +214,18 @@ SYM_CODE_END(preserve_boot_args)
*
* tbl: location of page table
* rtbl: address to be used for first level page table entry (typically tbl + PAGE_SIZE)
- * vstart: start address to map
- * vend: end address to map - we map [vstart, vend]
+ * vstart: virtual address of start of range
+ * vend: virtual address of end of range - we map [vstart, vend - 1]
* flags: flags to use to map last level entries
* phys: physical address corresponding to vstart - physical memory is contiguous
* pgds: the number of pgd entries
*
* Temporaries: istart, iend, tmp, count, sv - these need to be different registers
- * Preserves: vstart, vend, flags
- * Corrupts: tbl, rtbl, istart, iend, tmp, count, sv
+ * Preserves: vstart, flags
+ * Corrupts: tbl, rtbl, vend, istart, iend, tmp, count, sv
*/
.macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, tmp, count, sv
+ sub \vend, \vend, #1
add \rtbl, \tbl, #PAGE_SIZE
mov \sv, \rtbl
mov \count, #0
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
index 53a381a7f65d..d8e606fe3c21 100644
--- a/arch/arm64/kernel/idreg-override.c
+++ b/arch/arm64/kernel/idreg-override.c
@@ -54,6 +54,7 @@ static const struct ftr_set_desc pfr1 __initconst = {
.override = &id_aa64pfr1_override,
.fields = {
{ "bt", ID_AA64PFR1_BT_SHIFT },
+ { "mte", ID_AA64PFR1_MTE_SHIFT},
{}
},
};
@@ -100,6 +101,7 @@ static const struct {
{ "arm64.nopauth",
"id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 "
"id_aa64isar1.api=0 id_aa64isar1.apa=0" },
+ { "arm64.nomte", "id_aa64pfr1.mte=0" },
{ "nokaslr", "kaslr.disabled=1" },
};
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 36f51b0e438a..9d314a3bad3b 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -4,6 +4,7 @@
*/
#include <linux/bitops.h>
+#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/prctl.h>
@@ -22,9 +23,7 @@
#include <asm/ptrace.h>
#include <asm/sysreg.h>
-u64 gcr_kernel_excl __ro_after_init;
-
-static bool report_fault_once = true;
+static DEFINE_PER_CPU_READ_MOSTLY(u64, mte_tcf_preferred);
#ifdef CONFIG_KASAN_HW_TAGS
/* Whether the MTE asynchronous mode is enabled. */
@@ -101,26 +100,6 @@ int memcmp_pages(struct page *page1, struct page *page2)
return ret;
}
-void mte_init_tags(u64 max_tag)
-{
- static bool gcr_kernel_excl_initialized;
-
- if (!gcr_kernel_excl_initialized) {
- /*
- * The format of the tags in KASAN is 0xFF and in MTE is 0xF.
- * This conversion extracts an MTE tag from a KASAN tag.
- */
- u64 incl = GENMASK(FIELD_GET(MTE_TAG_MASK >> MTE_TAG_SHIFT,
- max_tag), 0);
-
- gcr_kernel_excl = ~incl & SYS_GCR_EL1_EXCL_MASK;
- gcr_kernel_excl_initialized = true;
- }
-
- /* Enable the kernel exclude mask for random tags generation. */
- write_sysreg_s(SYS_GCR_EL1_RRND | gcr_kernel_excl, SYS_GCR_EL1);
-}
-
static inline void __mte_enable_kernel(const char *mode, unsigned long tcf)
{
/* Enable MTE Sync Mode for EL1. */
@@ -160,16 +139,6 @@ void mte_enable_kernel_async(void)
}
#endif
-void mte_set_report_once(bool state)
-{
- WRITE_ONCE(report_fault_once, state);
-}
-
-bool mte_report_once(void)
-{
- return READ_ONCE(report_fault_once);
-}
-
#ifdef CONFIG_KASAN_HW_TAGS
void mte_check_tfsr_el1(void)
{
@@ -193,14 +162,26 @@ void mte_check_tfsr_el1(void)
}
#endif
-static void set_gcr_el1_excl(u64 excl)
+static void mte_update_sctlr_user(struct task_struct *task)
{
- current->thread.gcr_user_excl = excl;
-
/*
- * SYS_GCR_EL1 will be set to current->thread.gcr_user_excl value
- * by mte_set_user_gcr() in kernel_exit,
+ * This must be called with preemption disabled and can only be called
+ * on the current or next task since the CPU must match where the thread
+ * is going to run. The caller is responsible for calling
+ * update_sctlr_el1() later in the same preemption disabled block.
*/
+ unsigned long sctlr = task->thread.sctlr_user;
+ unsigned long mte_ctrl = task->thread.mte_ctrl;
+ unsigned long pref, resolved_mte_tcf;
+
+ pref = __this_cpu_read(mte_tcf_preferred);
+ resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl;
+ sctlr &= ~SCTLR_EL1_TCF0_MASK;
+ if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC)
+ sctlr |= SCTLR_EL1_TCF0_ASYNC;
+ else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC)
+ sctlr |= SCTLR_EL1_TCF0_SYNC;
+ task->thread.sctlr_user = sctlr;
}
void mte_thread_init_user(void)
@@ -212,15 +193,14 @@ void mte_thread_init_user(void)
dsb(ish);
write_sysreg_s(0, SYS_TFSRE0_EL1);
clear_thread_flag(TIF_MTE_ASYNC_FAULT);
- /* disable tag checking */
- set_task_sctlr_el1((current->thread.sctlr_user & ~SCTLR_EL1_TCF0_MASK) |
- SCTLR_EL1_TCF0_NONE);
- /* reset tag generation mask */
- set_gcr_el1_excl(SYS_GCR_EL1_EXCL_MASK);
+ /* disable tag checking and reset tag generation mask */
+ set_mte_ctrl(current, 0);
}
void mte_thread_switch(struct task_struct *next)
{
+ mte_update_sctlr_user(next);
+
/*
* Check if an async tag exception occurred at EL1.
*
@@ -248,44 +228,25 @@ void mte_suspend_enter(void)
mte_check_tfsr_el1();
}
-void mte_suspend_exit(void)
-{
- if (!system_supports_mte())
- return;
-
- sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK, gcr_kernel_excl);
- isb();
-}
-
long set_mte_ctrl(struct task_struct *task, unsigned long arg)
{
- u64 sctlr = task->thread.sctlr_user & ~SCTLR_EL1_TCF0_MASK;
- u64 gcr_excl = ~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) &
- SYS_GCR_EL1_EXCL_MASK;
+ u64 mte_ctrl = (~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) &
+ SYS_GCR_EL1_EXCL_MASK) << MTE_CTRL_GCR_USER_EXCL_SHIFT;
if (!system_supports_mte())
return 0;
- switch (arg & PR_MTE_TCF_MASK) {
- case PR_MTE_TCF_NONE:
- sctlr |= SCTLR_EL1_TCF0_NONE;
- break;
- case PR_MTE_TCF_SYNC:
- sctlr |= SCTLR_EL1_TCF0_SYNC;
- break;
- case PR_MTE_TCF_ASYNC:
- sctlr |= SCTLR_EL1_TCF0_ASYNC;
- break;
- default:
- return -EINVAL;
- }
-
- if (task != current) {
- task->thread.sctlr_user = sctlr;
- task->thread.gcr_user_excl = gcr_excl;
- } else {
- set_task_sctlr_el1(sctlr);
- set_gcr_el1_excl(gcr_excl);
+ if (arg & PR_MTE_TCF_ASYNC)
+ mte_ctrl |= MTE_CTRL_TCF_ASYNC;
+ if (arg & PR_MTE_TCF_SYNC)
+ mte_ctrl |= MTE_CTRL_TCF_SYNC;
+
+ task->thread.mte_ctrl = mte_ctrl;
+ if (task == current) {
+ preempt_disable();
+ mte_update_sctlr_user(task);
+ update_sctlr_el1(task->thread.sctlr_user);
+ preempt_enable();
}
return 0;
@@ -294,24 +255,18 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg)
long get_mte_ctrl(struct task_struct *task)
{
unsigned long ret;
- u64 incl = ~task->thread.gcr_user_excl & SYS_GCR_EL1_EXCL_MASK;
+ u64 mte_ctrl = task->thread.mte_ctrl;
+ u64 incl = (~mte_ctrl >> MTE_CTRL_GCR_USER_EXCL_SHIFT) &
+ SYS_GCR_EL1_EXCL_MASK;
if (!system_supports_mte())
return 0;
ret = incl << PR_MTE_TAG_SHIFT;
-
- switch (task->thread.sctlr_user & SCTLR_EL1_TCF0_MASK) {
- case SCTLR_EL1_TCF0_NONE:
- ret |= PR_MTE_TCF_NONE;
- break;
- case SCTLR_EL1_TCF0_SYNC:
- ret |= PR_MTE_TCF_SYNC;
- break;
- case SCTLR_EL1_TCF0_ASYNC:
+ if (mte_ctrl & MTE_CTRL_TCF_ASYNC)
ret |= PR_MTE_TCF_ASYNC;
- break;
- }
+ if (mte_ctrl & MTE_CTRL_TCF_SYNC)
+ ret |= PR_MTE_TCF_SYNC;
return ret;
}
@@ -450,3 +405,54 @@ int mte_ptrace_copy_tags(struct task_struct *child, long request,
return ret;
}
+
+static ssize_t mte_tcf_preferred_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ switch (per_cpu(mte_tcf_preferred, dev->id)) {
+ case MTE_CTRL_TCF_ASYNC:
+ return sysfs_emit(buf, "async\n");
+ case MTE_CTRL_TCF_SYNC:
+ return sysfs_emit(buf, "sync\n");
+ default:
+ return sysfs_emit(buf, "???\n");
+ }
+}
+
+static ssize_t mte_tcf_preferred_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ u64 tcf;
+
+ if (sysfs_streq(buf, "async"))
+ tcf = MTE_CTRL_TCF_ASYNC;
+ else if (sysfs_streq(buf, "sync"))
+ tcf = MTE_CTRL_TCF_SYNC;
+ else
+ return -EINVAL;
+
+ device_lock(dev);
+ per_cpu(mte_tcf_preferred, dev->id) = tcf;
+ device_unlock(dev);
+
+ return count;
+}
+static DEVICE_ATTR_RW(mte_tcf_preferred);
+
+static int register_mte_tcf_preferred_sysctl(void)
+{
+ unsigned int cpu;
+
+ if (!system_supports_mte())
+ return 0;
+
+ for_each_possible_cpu(cpu) {
+ per_cpu(mte_tcf_preferred, cpu) = MTE_CTRL_TCF_ASYNC;
+ device_create_file(get_cpu_device(cpu),
+ &dev_attr_mte_tcf_preferred);
+ }
+
+ return 0;
+}
+subsys_initcall(register_mte_tcf_preferred_sysctl);
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index 1006ed2d7c60..2276689b5411 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -82,14 +82,29 @@ int acpi_pci_bus_find_domain_nr(struct pci_bus *bus)
int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
{
- if (!acpi_disabled) {
- struct pci_config_window *cfg = bridge->bus->sysdata;
- struct acpi_device *adev = to_acpi_device(cfg->parent);
- struct device *bus_dev = &bridge->bus->dev;
+ struct pci_config_window *cfg;
+ struct acpi_device *adev;
+ struct device *bus_dev;
- ACPI_COMPANION_SET(&bridge->dev, adev);
- set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev)));
- }
+ if (acpi_disabled)
+ return 0;
+
+ cfg = bridge->bus->sysdata;
+
+ /*
+ * On Hyper-V there is no corresponding ACPI device for a root bridge,
+ * therefore ->parent is set as NULL by the driver. And set 'adev' as
+ * NULL in this case because there is no proper ACPI device.
+ */
+ if (!cfg->parent)
+ adev = NULL;
+ else
+ adev = to_acpi_device(cfg->parent);
+
+ bus_dev = &bridge->bus->dev;
+
+ ACPI_COMPANION_SET(&bridge->dev, adev);
+ set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev)));
return 0;
}
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index d07788dad388..b4044469527e 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -1055,7 +1055,7 @@ static void __armv8pmu_probe_pmu(void *info)
dfr0 = read_sysreg(id_aa64dfr0_el1);
pmuver = cpuid_feature_extract_unsigned_field(dfr0,
ID_AA64DFR0_PMUVER_SHIFT);
- if (pmuver == 0xf || pmuver == 0)
+ if (pmuver == ID_AA64DFR0_PMUVER_IMP_DEF || pmuver == 0)
return;
cpu_pmu->pmuver = pmuver;
diff --git a/arch/arm64/kernel/pointer_auth.c b/arch/arm64/kernel/pointer_auth.c
index 60901ab0a7fe..2708b620b4ae 100644
--- a/arch/arm64/kernel/pointer_auth.c
+++ b/arch/arm64/kernel/pointer_auth.c
@@ -67,7 +67,7 @@ static u64 arg_to_enxx_mask(unsigned long arg)
int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys,
unsigned long enabled)
{
- u64 sctlr = tsk->thread.sctlr_user;
+ u64 sctlr;
if (!system_supports_address_auth())
return -EINVAL;
@@ -78,12 +78,14 @@ int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys,
if ((keys & ~PR_PAC_ENABLED_KEYS_MASK) || (enabled & ~keys))
return -EINVAL;
+ preempt_disable();
+ sctlr = tsk->thread.sctlr_user;
sctlr &= ~arg_to_enxx_mask(keys);
sctlr |= arg_to_enxx_mask(enabled);
+ tsk->thread.sctlr_user = sctlr;
if (tsk == current)
- set_task_sctlr_el1(sctlr);
- else
- tsk->thread.sctlr_user = sctlr;
+ update_sctlr_el1(sctlr);
+ preempt_enable();
return 0;
}
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index c8989b999250..19100fe8f7e4 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -6,9 +6,6 @@
* Copyright (C) 1996-2000 Russell King - Converted to ARM.
* Copyright (C) 2012 ARM Ltd.
*/
-
-#include <stdarg.h>
-
#include <linux/compat.h>
#include <linux/efi.h>
#include <linux/elf.h>
@@ -21,6 +18,7 @@
#include <linux/mman.h>
#include <linux/mm.h>
#include <linux/nospec.h>
+#include <linux/sched.h>
#include <linux/stddef.h>
#include <linux/sysctl.h>
#include <linux/unistd.h>
@@ -163,7 +161,7 @@ static void print_pstate(struct pt_regs *regs)
u64 pstate = regs->pstate;
if (compat_user_mode(regs)) {
- printk("pstate: %08llx (%c%c%c%c %c %s %s %c%c%c)\n",
+ printk("pstate: %08llx (%c%c%c%c %c %s %s %c%c%c %cDIT %cSSBS)\n",
pstate,
pstate & PSR_AA32_N_BIT ? 'N' : 'n',
pstate & PSR_AA32_Z_BIT ? 'Z' : 'z',
@@ -174,12 +172,14 @@ static void print_pstate(struct pt_regs *regs)
pstate & PSR_AA32_E_BIT ? "BE" : "LE",
pstate & PSR_AA32_A_BIT ? 'A' : 'a',
pstate & PSR_AA32_I_BIT ? 'I' : 'i',
- pstate & PSR_AA32_F_BIT ? 'F' : 'f');
+ pstate & PSR_AA32_F_BIT ? 'F' : 'f',
+ pstate & PSR_AA32_DIT_BIT ? '+' : '-',
+ pstate & PSR_AA32_SSBS_BIT ? '+' : '-');
} else {
const char *btype_str = btypes[(pstate & PSR_BTYPE_MASK) >>
PSR_BTYPE_SHIFT];
- printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO %cTCO BTYPE=%s)\n",
+ printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO %cTCO %cDIT %cSSBS BTYPE=%s)\n",
pstate,
pstate & PSR_N_BIT ? 'N' : 'n',
pstate & PSR_Z_BIT ? 'Z' : 'z',
@@ -192,6 +192,8 @@ static void print_pstate(struct pt_regs *regs)
pstate & PSR_PAN_BIT ? '+' : '-',
pstate & PSR_UAO_BIT ? '+' : '-',
pstate & PSR_TCO_BIT ? '+' : '-',
+ pstate & PSR_DIT_BIT ? '+' : '-',
+ pstate & PSR_SSBS_BIT ? '+' : '-',
btype_str);
}
}
@@ -468,16 +470,13 @@ static void erratum_1418040_thread_switch(struct task_struct *prev,
write_sysreg(val, cntkctl_el1);
}
-static void compat_thread_switch(struct task_struct *next)
-{
- if (!is_compat_thread(task_thread_info(next)))
- return;
-
- if (static_branch_unlikely(&arm64_mismatched_32bit_el0))
- set_tsk_thread_flag(next, TIF_NOTIFY_RESUME);
-}
-
-static void update_sctlr_el1(u64 sctlr)
+/*
+ * __switch_to() checks current->thread.sctlr_user as an optimisation. Therefore
+ * this function must be called with preemption disabled and the update to
+ * sctlr_user must be made in the same preemption disabled block so that
+ * __switch_to() does not see the variable update before the SCTLR_EL1 one.
+ */
+void update_sctlr_el1(u64 sctlr)
{
/*
* EnIA must not be cleared while in the kernel as this is necessary for
@@ -489,19 +488,6 @@ static void update_sctlr_el1(u64 sctlr)
isb();
}
-void set_task_sctlr_el1(u64 sctlr)
-{
- /*
- * __switch_to() checks current->thread.sctlr as an
- * optimisation. Disable preemption so that it does not see
- * the variable update before the SCTLR_EL1 one.
- */
- preempt_disable();
- current->thread.sctlr_user = sctlr;
- update_sctlr_el1(sctlr);
- preempt_enable();
-}
-
/*
* Thread switching.
*/
@@ -518,7 +504,6 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
ssbs_thread_switch(next);
erratum_1418040_thread_switch(prev, next);
ptrauth_thread_switch_user(next);
- compat_thread_switch(next);
/*
* Complete any pending TLB or cache maintenance on this CPU in case
@@ -579,6 +564,28 @@ unsigned long arch_align_stack(unsigned long sp)
return sp & ~0xf;
}
+#ifdef CONFIG_COMPAT
+int compat_elf_check_arch(const struct elf32_hdr *hdr)
+{
+ if (!system_supports_32bit_el0())
+ return false;
+
+ if ((hdr)->e_machine != EM_ARM)
+ return false;
+
+ if (!((hdr)->e_flags & EF_ARM_EABI_MASK))
+ return false;
+
+ /*
+ * Prevent execve() of a 32-bit program from a deadline task
+ * if the restricted affinity mask would be inadmissible on an
+ * asymmetric system.
+ */
+ return !static_branch_unlikely(&arm64_mismatched_32bit_el0) ||
+ !dl_task_check_affinity(current, system_32bit_el0_cpumask());
+}
+#endif
+
/*
* Called from setup_new_exec() after (COMPAT_)SET_PERSONALITY.
*/
@@ -588,8 +595,20 @@ void arch_setup_new_exec(void)
if (is_compat_task()) {
mmflags = MMCF_AARCH32;
+
+ /*
+ * Restrict the CPU affinity mask for a 32-bit task so that
+ * it contains only 32-bit-capable CPUs.
+ *
+ * From the perspective of the task, this looks similar to
+ * what would happen if the 64-bit-only CPUs were hot-unplugged
+ * at the point of execve(), although we try a bit harder to
+ * honour the cpuset hierarchy.
+ */
if (static_branch_unlikely(&arm64_mismatched_32bit_el0))
- set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
+ force_compatible_cpus_allowed_ptr(current);
+ } else if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) {
+ relax_compatible_cpus_allowed_ptr(current);
}
current->mm->context.flags = mmflags;
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index b381a1ee9ea7..e26196a33cf4 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -845,6 +845,11 @@ static int sve_set(struct task_struct *target,
}
sve_alloc(target);
+ if (!target->thread.sve_state) {
+ ret = -ENOMEM;
+ clear_tsk_thread_flag(target, TIF_SVE);
+ goto out;
+ }
/*
* Ensure target->thread.sve_state is up to date with target's
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 23036334f4dc..9fe70b12b34f 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -290,6 +290,11 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
/* From now, fpsimd_thread_switch() won't touch thread.sve_state */
sve_alloc(current);
+ if (!current->thread.sve_state) {
+ clear_thread_flag(TIF_SVE);
+ return -ENOMEM;
+ }
+
err = __copy_from_user(current->thread.sve_state,
(char __user const *)user->sve +
SVE_SIG_REGS_OFFSET,
@@ -912,21 +917,7 @@ static void do_signal(struct pt_regs *regs)
restore_saved_sigmask();
}
-static bool cpu_affinity_invalid(struct pt_regs *regs)
-{
- if (!compat_user_mode(regs))
- return false;
-
- /*
- * We're preemptible, but a reschedule will cause us to check the
- * affinity again.
- */
- return !cpumask_test_cpu(raw_smp_processor_id(),
- system_32bit_el0_cpumask());
-}
-
-asmlinkage void do_notify_resume(struct pt_regs *regs,
- unsigned long thread_flags)
+void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
{
do {
if (thread_flags & _TIF_NEED_RESCHED) {
@@ -952,19 +943,6 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
if (thread_flags & _TIF_NOTIFY_RESUME) {
tracehook_notify_resume(regs);
rseq_handle_notify_resume(NULL, regs);
-
- /*
- * If we reschedule after checking the affinity
- * then we must ensure that TIF_NOTIFY_RESUME
- * is set so that we check the affinity again.
- * Since tracehook_notify_resume() clears the
- * flag, ensure that the compiler doesn't move
- * it after the affinity check.
- */
- barrier();
-
- if (cpu_affinity_invalid(regs))
- force_sig(SIGKILL);
}
if (thread_flags & _TIF_FOREIGN_FPSTATE)
@@ -1000,3 +978,42 @@ void __init minsigstksz_setup(void)
round_up(sizeof(struct frame_record), 16) +
16; /* max alignment padding */
}
+
+/*
+ * Compile-time assertions for siginfo_t offsets. Check NSIG* as well, as
+ * changes likely come with new fields that should be added below.
+ */
+static_assert(NSIGILL == 11);
+static_assert(NSIGFPE == 15);
+static_assert(NSIGSEGV == 9);
+static_assert(NSIGBUS == 5);
+static_assert(NSIGTRAP == 6);
+static_assert(NSIGCHLD == 6);
+static_assert(NSIGSYS == 2);
+static_assert(sizeof(siginfo_t) == 128);
+static_assert(__alignof__(siginfo_t) == 8);
+static_assert(offsetof(siginfo_t, si_signo) == 0x00);
+static_assert(offsetof(siginfo_t, si_errno) == 0x04);
+static_assert(offsetof(siginfo_t, si_code) == 0x08);
+static_assert(offsetof(siginfo_t, si_pid) == 0x10);
+static_assert(offsetof(siginfo_t, si_uid) == 0x14);
+static_assert(offsetof(siginfo_t, si_tid) == 0x10);
+static_assert(offsetof(siginfo_t, si_overrun) == 0x14);
+static_assert(offsetof(siginfo_t, si_status) == 0x18);
+static_assert(offsetof(siginfo_t, si_utime) == 0x20);
+static_assert(offsetof(siginfo_t, si_stime) == 0x28);
+static_assert(offsetof(siginfo_t, si_value) == 0x18);
+static_assert(offsetof(siginfo_t, si_int) == 0x18);
+static_assert(offsetof(siginfo_t, si_ptr) == 0x18);
+static_assert(offsetof(siginfo_t, si_addr) == 0x10);
+static_assert(offsetof(siginfo_t, si_addr_lsb) == 0x18);
+static_assert(offsetof(siginfo_t, si_lower) == 0x20);
+static_assert(offsetof(siginfo_t, si_upper) == 0x28);
+static_assert(offsetof(siginfo_t, si_pkey) == 0x20);
+static_assert(offsetof(siginfo_t, si_perf_data) == 0x18);
+static_assert(offsetof(siginfo_t, si_perf_type) == 0x20);
+static_assert(offsetof(siginfo_t, si_band) == 0x10);
+static_assert(offsetof(siginfo_t, si_fd) == 0x18);
+static_assert(offsetof(siginfo_t, si_call_addr) == 0x10);
+static_assert(offsetof(siginfo_t, si_syscall) == 0x18);
+static_assert(offsetof(siginfo_t, si_arch) == 0x1c);
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index 2f507f565c48..d984282b979f 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -46,8 +46,6 @@ struct compat_aux_sigframe {
unsigned long end_magic;
} __attribute__((__aligned__(8)));
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set)
{
compat_sigset_t cset;
@@ -190,10 +188,8 @@ static int compat_restore_sigframe(struct pt_regs *regs,
unsigned long psr;
err = get_sigset_t(&set, &sf->uc.uc_sigmask);
- if (err == 0) {
- sigdelsetmask(&set, ~_BLOCKABLE);
+ if (err == 0)
set_current_blocked(&set);
- }
__get_user_error(regs->regs[0], &sf->uc.uc_mcontext.arm_r0, err);
__get_user_error(regs->regs[1], &sf->uc.uc_mcontext.arm_r1, err);
@@ -457,3 +453,42 @@ void compat_setup_restart_syscall(struct pt_regs *regs)
{
regs->regs[7] = __NR_compat_restart_syscall;
}
+
+/*
+ * Compile-time assertions for siginfo_t offsets. Check NSIG* as well, as
+ * changes likely come with new fields that should be added below.
+ */
+static_assert(NSIGILL == 11);
+static_assert(NSIGFPE == 15);
+static_assert(NSIGSEGV == 9);
+static_assert(NSIGBUS == 5);
+static_assert(NSIGTRAP == 6);
+static_assert(NSIGCHLD == 6);
+static_assert(NSIGSYS == 2);
+static_assert(sizeof(compat_siginfo_t) == 128);
+static_assert(__alignof__(compat_siginfo_t) == 4);
+static_assert(offsetof(compat_siginfo_t, si_signo) == 0x00);
+static_assert(offsetof(compat_siginfo_t, si_errno) == 0x04);
+static_assert(offsetof(compat_siginfo_t, si_code) == 0x08);
+static_assert(offsetof(compat_siginfo_t, si_pid) == 0x0c);
+static_assert(offsetof(compat_siginfo_t, si_uid) == 0x10);
+static_assert(offsetof(compat_siginfo_t, si_tid) == 0x0c);
+static_assert(offsetof(compat_siginfo_t, si_overrun) == 0x10);
+static_assert(offsetof(compat_siginfo_t, si_status) == 0x14);
+static_assert(offsetof(compat_siginfo_t, si_utime) == 0x18);
+static_assert(offsetof(compat_siginfo_t, si_stime) == 0x1c);
+static_assert(offsetof(compat_siginfo_t, si_value) == 0x14);
+static_assert(offsetof(compat_siginfo_t, si_int) == 0x14);
+static_assert(offsetof(compat_siginfo_t, si_ptr) == 0x14);
+static_assert(offsetof(compat_siginfo_t, si_addr) == 0x0c);
+static_assert(offsetof(compat_siginfo_t, si_addr_lsb) == 0x10);
+static_assert(offsetof(compat_siginfo_t, si_lower) == 0x14);
+static_assert(offsetof(compat_siginfo_t, si_upper) == 0x18);
+static_assert(offsetof(compat_siginfo_t, si_pkey) == 0x14);
+static_assert(offsetof(compat_siginfo_t, si_perf_data) == 0x10);
+static_assert(offsetof(compat_siginfo_t, si_perf_type) == 0x14);
+static_assert(offsetof(compat_siginfo_t, si_band) == 0x0c);
+static_assert(offsetof(compat_siginfo_t, si_fd) == 0x10);
+static_assert(offsetof(compat_siginfo_t, si_call_addr) == 0x0c);
+static_assert(offsetof(compat_siginfo_t, si_syscall) == 0x10);
+static_assert(offsetof(compat_siginfo_t, si_arch) == 0x14);
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 938ce6fbee8a..19ee7c33769d 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -76,7 +76,6 @@ void notrace __cpu_suspend_exit(void)
spectre_v4_enable_mitigation(NULL);
/* Restore additional feature-specific configuration */
- mte_suspend_exit();
ptrauth_suspend_exit();
}
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 709d2c433c5e..f6b1a88245db 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -181,6 +181,8 @@ SECTIONS
/* everything from this point to __init_begin will be marked RO NX */
RO_DATA(PAGE_SIZE)
+ HYPERVISOR_DATA_SECTIONS
+
idmap_pg_dir = .;
. += IDMAP_DIR_SIZE;
idmap_pg_end = .;
@@ -260,8 +262,6 @@ SECTIONS
_sdata = .;
RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN)
- HYPERVISOR_DATA_SECTIONS
-
/*
* Data written with the MMU off but read with the MMU on requires
* cache lines to be invalidated, discarding up to a Cache Writeback