diff options
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r-- | arch/arm64/kernel/acpi.c | 19 | ||||
-rw-r--r-- | arch/arm64/kernel/asm-offsets.c | 2 | ||||
-rw-r--r-- | arch/arm64/kernel/cpufeature.c | 66 | ||||
-rw-r--r-- | arch/arm64/kernel/efi.c | 1 | ||||
-rw-r--r-- | arch/arm64/kernel/entry-common.c | 219 | ||||
-rw-r--r-- | arch/arm64/kernel/entry-fpsimd.S | 13 | ||||
-rw-r--r-- | arch/arm64/kernel/entry.S | 114 | ||||
-rw-r--r-- | arch/arm64/kernel/fpsimd.c | 14 | ||||
-rw-r--r-- | arch/arm64/kernel/head.S | 11 | ||||
-rw-r--r-- | arch/arm64/kernel/idreg-override.c | 2 | ||||
-rw-r--r-- | arch/arm64/kernel/mte.c | 180 | ||||
-rw-r--r-- | arch/arm64/kernel/pci.c | 29 | ||||
-rw-r--r-- | arch/arm64/kernel/perf_event.c | 2 | ||||
-rw-r--r-- | arch/arm64/kernel/pointer_auth.c | 10 | ||||
-rw-r--r-- | arch/arm64/kernel/process.c | 81 | ||||
-rw-r--r-- | arch/arm64/kernel/ptrace.c | 5 | ||||
-rw-r--r-- | arch/arm64/kernel/signal.c | 73 | ||||
-rw-r--r-- | arch/arm64/kernel/signal32.c | 45 | ||||
-rw-r--r-- | arch/arm64/kernel/suspend.c | 1 | ||||
-rw-r--r-- | arch/arm64/kernel/vmlinux.lds.S | 4 |
20 files changed, 568 insertions, 323 deletions
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index f3851724fe35..1c9c2f7a1c04 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -273,7 +273,8 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr) return __pgprot(PROT_DEVICE_nGnRnE); } -void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) +static void __iomem *__acpi_os_ioremap(acpi_physical_address phys, + acpi_size size, bool memory) { efi_memory_desc_t *md, *region = NULL; pgprot_t prot; @@ -299,9 +300,11 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) * It is fine for AML to remap regions that are not represented in the * EFI memory map at all, as it only describes normal memory, and MMIO * regions that require a virtual mapping to make them accessible to - * the EFI runtime services. + * the EFI runtime services. Determine the region default + * attributes by checking the requested memory semantics. */ - prot = __pgprot(PROT_DEVICE_nGnRnE); + prot = memory ? __pgprot(PROT_NORMAL_NC) : + __pgprot(PROT_DEVICE_nGnRnE); if (region) { switch (region->type) { case EFI_LOADER_CODE: @@ -361,6 +364,16 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) return __ioremap(phys, size, prot); } +void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) +{ + return __acpi_os_ioremap(phys, size, false); +} + +void __iomem *acpi_os_memmap(acpi_physical_address phys, acpi_size size) +{ + return __acpi_os_ioremap(phys, size, true); +} + /* * Claim Synchronous External Aborts as a firmware first notification. * diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index c85670692afa..551427ae8cc5 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -52,7 +52,7 @@ int main(void) DEFINE(THREAD_KEYS_KERNEL, offsetof(struct task_struct, thread.keys_kernel)); #endif #ifdef CONFIG_ARM64_MTE - DEFINE(THREAD_GCR_EL1_USER, offsetof(struct task_struct, thread.gcr_user_excl)); + DEFINE(THREAD_MTE_CTRL, offsetof(struct task_struct, thread.mte_ctrl)); #endif BLANK(); DEFINE(S_X0, offsetof(struct pt_regs, regs[0])); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 0ead8bfedf20..f8a3067d10c6 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -67,6 +67,7 @@ #include <linux/crash_dump.h> #include <linux/sort.h> #include <linux/stop_machine.h> +#include <linux/sysfs.h> #include <linux/types.h> #include <linux/minmax.h> #include <linux/mm.h> @@ -239,8 +240,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL2_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_ELx_64BIT_ONLY), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_ELx_64BIT_ONLY), ARM64_FTR_END, }; @@ -1321,6 +1322,31 @@ const struct cpumask *system_32bit_el0_cpumask(void) return cpu_possible_mask; } +static int __init parse_32bit_el0_param(char *str) +{ + allow_mismatched_32bit_el0 = true; + return 0; +} +early_param("allow_mismatched_32bit_el0", parse_32bit_el0_param); + +static ssize_t aarch32_el0_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + const struct cpumask *mask = system_32bit_el0_cpumask(); + + return sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(mask)); +} +static const DEVICE_ATTR_RO(aarch32_el0); + +static int __init aarch32_el0_sysfs_init(void) +{ + if (!allow_mismatched_32bit_el0) + return 0; + + return device_create_file(cpu_subsys.dev_root, &dev_attr_aarch32_el0); +} +device_initcall(aarch32_el0_sysfs_init); + static bool has_32bit_el0(const struct arm64_cpu_capabilities *entry, int scope) { if (!has_cpuid_feature(entry, scope)) @@ -1561,8 +1587,6 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) if (!cpu) arm64_use_ng_mappings = true; - - return; } #else static void @@ -1734,7 +1758,7 @@ static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused) u64 val = read_sysreg_s(SYS_CLIDR_EL1); /* Check that CLIDR_EL1.LOU{U,IS} are both 0 */ - WARN_ON(val & (7 << 27 | 7 << 21)); + WARN_ON(CLIDR_LOUU(val) || CLIDR_LOUIS(val)); } #ifdef CONFIG_ARM64_PAN @@ -1843,6 +1867,9 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused) #ifdef CONFIG_ARM64_MTE static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) { + sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0); + isb(); + /* * Clear the tags in the zero page. This needs to be done via the * linear map which has the Tagged attribute. @@ -1956,7 +1983,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_EL0_SHIFT, - .min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT, + .min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT, }, #ifdef CONFIG_KVM { @@ -1967,7 +1994,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_EL1_SHIFT, - .min_field_value = ID_AA64PFR0_EL1_32BIT_64BIT, + .min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT, }, { .desc = "Protected KVM", @@ -2901,15 +2928,38 @@ void __init setup_cpu_features(void) static int enable_mismatched_32bit_el0(unsigned int cpu) { + /* + * The first 32-bit-capable CPU we detected and so can no longer + * be offlined by userspace. -1 indicates we haven't yet onlined + * a 32-bit-capable CPU. + */ + static int lucky_winner = -1; + struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu); bool cpu_32bit = id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0); if (cpu_32bit) { cpumask_set_cpu(cpu, cpu_32bit_el0_mask); static_branch_enable_cpuslocked(&arm64_mismatched_32bit_el0); - setup_elf_hwcaps(compat_elf_hwcaps); } + if (cpumask_test_cpu(0, cpu_32bit_el0_mask) == cpu_32bit) + return 0; + + if (lucky_winner >= 0) + return 0; + + /* + * We've detected a mismatch. We need to keep one of our CPUs with + * 32-bit EL0 online so that is_cpu_allowed() doesn't end up rejecting + * every CPU in the system for a 32-bit task. + */ + lucky_winner = cpu_32bit ? cpu : cpumask_any_and(cpu_32bit_el0_mask, + cpu_active_mask); + get_cpu_device(lucky_winner)->offline_disabled = true; + setup_elf_hwcaps(compat_elf_hwcaps); + pr_info("Asymmetric 32-bit EL0 support detected on CPU %u; CPU hot-unplug disabled on CPU %u\n", + cpu, lucky_winner); return 0; } diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index fa02efb28e88..e1be6c429810 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -55,6 +55,7 @@ static __init pteval_t create_mapping_protection(efi_memory_desc_t *md) /* we will fill this structure from the stub, so don't put it in .bss */ struct screen_info screen_info __section(".data"); +EXPORT_SYMBOL(screen_info); int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) { diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index db8b2e2d02c2..32f9796c4ffe 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -26,10 +26,14 @@ #include <asm/system_misc.h> /* + * Handle IRQ/context state management when entering from kernel mode. + * Before this function is called it is not safe to call regular kernel code, + * intrumentable code, or any code which may trigger an exception. + * * This is intended to match the logic in irqentry_enter(), handling the kernel * mode transitions only. */ -static void noinstr enter_from_kernel_mode(struct pt_regs *regs) +static __always_inline void __enter_from_kernel_mode(struct pt_regs *regs) { regs->exit_rcu = false; @@ -45,20 +49,26 @@ static void noinstr enter_from_kernel_mode(struct pt_regs *regs) lockdep_hardirqs_off(CALLER_ADDR0); rcu_irq_enter_check_tick(); trace_hardirqs_off_finish(); +} +static void noinstr enter_from_kernel_mode(struct pt_regs *regs) +{ + __enter_from_kernel_mode(regs); mte_check_tfsr_entry(); } /* + * Handle IRQ/context state management when exiting to kernel mode. + * After this function returns it is not safe to call regular kernel code, + * intrumentable code, or any code which may trigger an exception. + * * This is intended to match the logic in irqentry_exit(), handling the kernel * mode transitions only, and with preemption handled elsewhere. */ -static void noinstr exit_to_kernel_mode(struct pt_regs *regs) +static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs) { lockdep_assert_irqs_disabled(); - mte_check_tfsr_exit(); - if (interrupts_enabled(regs)) { if (regs->exit_rcu) { trace_hardirqs_on_prepare(); @@ -75,6 +85,71 @@ static void noinstr exit_to_kernel_mode(struct pt_regs *regs) } } +static void noinstr exit_to_kernel_mode(struct pt_regs *regs) +{ + mte_check_tfsr_exit(); + __exit_to_kernel_mode(regs); +} + +/* + * Handle IRQ/context state management when entering from user mode. + * Before this function is called it is not safe to call regular kernel code, + * intrumentable code, or any code which may trigger an exception. + */ +static __always_inline void __enter_from_user_mode(void) +{ + lockdep_hardirqs_off(CALLER_ADDR0); + CT_WARN_ON(ct_state() != CONTEXT_USER); + user_exit_irqoff(); + trace_hardirqs_off_finish(); +} + +static __always_inline void enter_from_user_mode(struct pt_regs *regs) +{ + __enter_from_user_mode(); +} + +/* + * Handle IRQ/context state management when exiting to user mode. + * After this function returns it is not safe to call regular kernel code, + * intrumentable code, or any code which may trigger an exception. + */ +static __always_inline void __exit_to_user_mode(void) +{ + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + user_enter_irqoff(); + lockdep_hardirqs_on(CALLER_ADDR0); +} + +static __always_inline void prepare_exit_to_user_mode(struct pt_regs *regs) +{ + unsigned long flags; + + local_daif_mask(); + + flags = READ_ONCE(current_thread_info()->flags); + if (unlikely(flags & _TIF_WORK_MASK)) + do_notify_resume(regs, flags); +} + +static __always_inline void exit_to_user_mode(struct pt_regs *regs) +{ + prepare_exit_to_user_mode(regs); + mte_check_tfsr_exit(); + __exit_to_user_mode(); +} + +asmlinkage void noinstr asm_exit_to_user_mode(struct pt_regs *regs) +{ + exit_to_user_mode(regs); +} + +/* + * Handle IRQ/context state management when entering an NMI from user/kernel + * mode. Before this function is called it is not safe to call regular kernel + * code, intrumentable code, or any code which may trigger an exception. + */ static void noinstr arm64_enter_nmi(struct pt_regs *regs) { regs->lockdep_hardirqs = lockdep_hardirqs_enabled(); @@ -88,6 +163,11 @@ static void noinstr arm64_enter_nmi(struct pt_regs *regs) ftrace_nmi_enter(); } +/* + * Handle IRQ/context state management when exiting an NMI from user/kernel + * mode. After this function returns it is not safe to call regular kernel + * code, intrumentable code, or any code which may trigger an exception. + */ static void noinstr arm64_exit_nmi(struct pt_regs *regs) { bool restore = regs->lockdep_hardirqs; @@ -105,6 +185,40 @@ static void noinstr arm64_exit_nmi(struct pt_regs *regs) __nmi_exit(); } +/* + * Handle IRQ/context state management when entering a debug exception from + * kernel mode. Before this function is called it is not safe to call regular + * kernel code, intrumentable code, or any code which may trigger an exception. + */ +static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs) +{ + regs->lockdep_hardirqs = lockdep_hardirqs_enabled(); + + lockdep_hardirqs_off(CALLER_ADDR0); + rcu_nmi_enter(); + + trace_hardirqs_off_finish(); +} + +/* + * Handle IRQ/context state management when exiting a debug exception from + * kernel mode. After this function returns it is not safe to call regular + * kernel code, intrumentable code, or any code which may trigger an exception. + */ +static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs) +{ + bool restore = regs->lockdep_hardirqs; + + if (restore) { + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + } + + rcu_nmi_exit(); + if (restore) + lockdep_hardirqs_on(CALLER_ADDR0); +} + static void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs) { if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) @@ -265,30 +379,6 @@ static void noinstr el1_undef(struct pt_regs *regs) exit_to_kernel_mode(regs); } -static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs) -{ - regs->lockdep_hardirqs = lockdep_hardirqs_enabled(); - - lockdep_hardirqs_off(CALLER_ADDR0); - rcu_nmi_enter(); - - trace_hardirqs_off_finish(); -} - -static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs) -{ - bool restore = regs->lockdep_hardirqs; - - if (restore) { - trace_hardirqs_on_prepare(); - lockdep_hardirqs_on_prepare(CALLER_ADDR0); - } - - rcu_nmi_exit(); - if (restore) - lockdep_hardirqs_on(CALLER_ADDR0); -} - static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -382,31 +472,14 @@ asmlinkage void noinstr el1h_64_error_handler(struct pt_regs *regs) arm64_exit_nmi(regs); } -asmlinkage void noinstr enter_from_user_mode(void) -{ - lockdep_hardirqs_off(CALLER_ADDR0); - CT_WARN_ON(ct_state() != CONTEXT_USER); - user_exit_irqoff(); - trace_hardirqs_off_finish(); -} - -asmlinkage void noinstr exit_to_user_mode(void) -{ - mte_check_tfsr_exit(); - - trace_hardirqs_on_prepare(); - lockdep_hardirqs_on_prepare(CALLER_ADDR0); - user_enter_irqoff(); - lockdep_hardirqs_on(CALLER_ADDR0); -} - static void noinstr el0_da(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); - enter_from_user_mode(); + enter_from_user_mode(regs); local_daif_restore(DAIF_PROCCTX); do_mem_abort(far, esr, regs); + exit_to_user_mode(regs); } static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr) @@ -421,37 +494,42 @@ static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr) if (!is_ttbr0_addr(far)) arm64_apply_bp_hardening(); - enter_from_user_mode(); + enter_from_user_mode(regs); local_daif_restore(DAIF_PROCCTX); do_mem_abort(far, esr, regs); + exit_to_user_mode(regs); } static void noinstr el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr) { - enter_from_user_mode(); + enter_from_user_mode(regs); local_daif_restore(DAIF_PROCCTX); do_fpsimd_acc(esr, regs); + exit_to_user_mode(regs); } static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr) { - enter_from_user_mode(); + enter_from_user_mode(regs); local_daif_restore(DAIF_PROCCTX); do_sve_acc(esr, regs); + exit_to_user_mode(regs); } static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr) { - enter_from_user_mode(); + enter_from_user_mode(regs); local_daif_restore(DAIF_PROCCTX); do_fpsimd_exc(esr, regs); + exit_to_user_mode(regs); } static void noinstr el0_sys(struct pt_regs *regs, unsigned long esr) { - enter_from_user_mode(); + enter_from_user_mode(regs); local_daif_restore(DAIF_PROCCTX); do_sysinstr(esr, regs); + exit_to_user_mode(regs); } static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr) @@ -461,37 +539,42 @@ static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr) if (!is_ttbr0_addr(instruction_pointer(regs))) arm64_apply_bp_hardening(); - enter_from_user_mode(); + enter_from_user_mode(regs); local_daif_restore(DAIF_PROCCTX); do_sp_pc_abort(far, esr, regs); + exit_to_user_mode(regs); } static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr) { - enter_from_user_mode(); + enter_from_user_mode(regs); local_daif_restore(DAIF_PROCCTX); do_sp_pc_abort(regs->sp, esr, regs); + exit_to_user_mode(regs); } static void noinstr el0_undef(struct pt_regs *regs) { - enter_from_user_mode(); + enter_from_user_mode(regs); local_daif_restore(DAIF_PROCCTX); do_undefinstr(regs); + exit_to_user_mode(regs); } static void noinstr el0_bti(struct pt_regs *regs) { - enter_from_user_mode(); + enter_from_user_mode(regs); local_daif_restore(DAIF_PROCCTX); do_bti(regs); + exit_to_user_mode(regs); } static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr) { - enter_from_user_mode(); + enter_from_user_mode(regs); local_daif_restore(DAIF_PROCCTX); bad_el0_sync(regs, 0, esr); + exit_to_user_mode(regs); } static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr) @@ -499,23 +582,26 @@ static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr) /* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */ unsigned long far = read_sysreg(far_el1); - enter_from_user_mode(); + enter_from_user_mode(regs); do_debug_exception(far, esr, regs); local_daif_restore(DAIF_PROCCTX); + exit_to_user_mode(regs); } static void noinstr el0_svc(struct pt_regs *regs) { - enter_from_user_mode(); + enter_from_user_mode(regs); cortex_a76_erratum_1463225_svc_handler(); do_el0_svc(regs); + exit_to_user_mode(regs); } static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr) { - enter_from_user_mode(); + enter_from_user_mode(regs); local_daif_restore(DAIF_PROCCTX); do_ptrauth_fault(regs, esr); + exit_to_user_mode(regs); } asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs) @@ -574,7 +660,7 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs) static void noinstr el0_interrupt(struct pt_regs *regs, void (*handler)(struct pt_regs *)) { - enter_from_user_mode(); + enter_from_user_mode(regs); write_sysreg(DAIF_PROCCTX_NOIRQ, daif); @@ -582,6 +668,8 @@ static void noinstr el0_interrupt(struct pt_regs *regs, arm64_apply_bp_hardening(); do_interrupt_handler(regs, handler); + + exit_to_user_mode(regs); } static void noinstr __el0_irq_handler_common(struct pt_regs *regs) @@ -608,12 +696,13 @@ static void noinstr __el0_error_handler_common(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); - enter_from_user_mode(); + enter_from_user_mode(regs); local_daif_restore(DAIF_ERRCTX); arm64_enter_nmi(regs); do_serror(regs, esr); arm64_exit_nmi(regs); local_daif_restore(DAIF_PROCCTX); + exit_to_user_mode(regs); } asmlinkage void noinstr el0t_64_error_handler(struct pt_regs *regs) @@ -624,16 +713,18 @@ asmlinkage void noinstr el0t_64_error_handler(struct pt_regs *regs) #ifdef CONFIG_COMPAT static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr) { - enter_from_user_mode(); + enter_from_user_mode(regs); local_daif_restore(DAIF_PROCCTX); do_cp15instr(esr, regs); + exit_to_user_mode(regs); } static void noinstr el0_svc_compat(struct pt_regs *regs) { - enter_from_user_mode(); + enter_from_user_mode(regs); cortex_a76_erratum_1463225_svc_handler(); do_el0_svc_compat(regs); + exit_to_user_mode(regs); } asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs) diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index 0a7a64753878..196e921f61de 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -33,11 +33,24 @@ SYM_FUNC_END(fpsimd_load_state) #ifdef CONFIG_ARM64_SVE +/* + * Save the SVE state + * + * x0 - pointer to buffer for state + * x1 - pointer to storage for FPSR + */ SYM_FUNC_START(sve_save_state) sve_save 0, x1, 2 ret SYM_FUNC_END(sve_save_state) +/* + * Load the SVE state + * + * x0 - pointer to buffer for state + * x1 - pointer to storage for FPSR + * x2 - VQ-1 + */ SYM_FUNC_START(sve_load_state) sve_load 0, x1, x2, 3, x4 ret diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 863d44f73028..bc6d5a970a13 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -29,16 +29,6 @@ #include <asm/asm-uaccess.h> #include <asm/unistd.h> -/* - * Context tracking and irqflag tracing need to instrument transitions between - * user and kernel mode. - */ - .macro user_enter_irqoff -#if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS) - bl exit_to_user_mode -#endif - .endm - .macro clear_gp_regs .irp n,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29 mov x\n, xzr @@ -133,42 +123,46 @@ alternative_cb_end .endm /* Check for MTE asynchronous tag check faults */ - .macro check_mte_async_tcf, tmp, ti_flags + .macro check_mte_async_tcf, tmp, ti_flags, thread_sctlr #ifdef CONFIG_ARM64_MTE .arch_extension lse alternative_if_not ARM64_MTE b 1f alternative_else_nop_endif + /* + * Asynchronous tag check faults are only possible in ASYNC (2) or + * ASYM (3) modes. In each of these modes bit 1 of SCTLR_EL1.TCF0 is + * set, so skip the check if it is unset. + */ + tbz \thread_sctlr, #(SCTLR_EL1_TCF0_SHIFT + 1), 1f mrs_s \tmp, SYS_TFSRE0_EL1 tbz \tmp, #SYS_TFSR_EL1_TF0_SHIFT, 1f /* Asynchronous TCF occurred for TTBR0 access, set the TI flag */ mov \tmp, #_TIF_MTE_ASYNC_FAULT add \ti_flags, tsk, #TSK_TI_FLAGS stset \tmp, [\ti_flags] - msr_s SYS_TFSRE0_EL1, xzr 1: #endif .endm /* Clear the MTE asynchronous tag check faults */ - .macro clear_mte_async_tcf + .macro clear_mte_async_tcf thread_sctlr #ifdef CONFIG_ARM64_MTE alternative_if ARM64_MTE + /* See comment in check_mte_async_tcf above. */ + tbz \thread_sctlr, #(SCTLR_EL1_TCF0_SHIFT + 1), 1f dsb ish msr_s SYS_TFSRE0_EL1, xzr +1: alternative_else_nop_endif #endif .endm - .macro mte_set_gcr, tmp, tmp2 + .macro mte_set_gcr, mte_ctrl, tmp #ifdef CONFIG_ARM64_MTE - /* - * Calculate and set the exclude mask preserving - * the RRND (bit[16]) setting. - */ - mrs_s \tmp2, SYS_GCR_EL1 - bfi \tmp2, \tmp, #0, #16 - msr_s SYS_GCR_EL1, \tmp2 + ubfx \tmp, \mte_ctrl, #MTE_CTRL_GCR_USER_EXCL_SHIFT, #16 + orr \tmp, \tmp, #SYS_GCR_EL1_RRND + msr_s SYS_GCR_EL1, \tmp #endif .endm @@ -177,10 +171,8 @@ alternative_else_nop_endif alternative_if_not ARM64_MTE b 1f alternative_else_nop_endif - ldr_l \tmp, gcr_kernel_excl - - mte_set_gcr \tmp, \tmp2 - isb + mov \tmp, KERNEL_GCR_EL1 + msr_s SYS_GCR_EL1, \tmp 1: #endif .endm @@ -190,7 +182,7 @@ alternative_else_nop_endif alternative_if_not ARM64_MTE b 1f alternative_else_nop_endif - ldr \tmp, [\tsk, #THREAD_GCR_EL1_USER] + ldr \tmp, [\tsk, #THREAD_MTE_CTRL] mte_set_gcr \tmp, \tmp2 1: @@ -231,8 +223,8 @@ alternative_else_nop_endif disable_step_tsk x19, x20 /* Check for asynchronous tag check faults in user space */ - check_mte_async_tcf x22, x23 - apply_ssbd 1, x22, x23 + ldr x0, [tsk, THREAD_SCTLR_USER] + check_mte_async_tcf x22, x23, x0 #ifdef CONFIG_ARM64_PTR_AUTH alternative_if ARM64_HAS_ADDRESS_AUTH @@ -245,7 +237,6 @@ alternative_if ARM64_HAS_ADDRESS_AUTH * was disabled on kernel exit then we would have left the kernel IA * installed so there is no need to install it again. */ - ldr x0, [tsk, THREAD_SCTLR_USER] tbz x0, SCTLR_ELx_ENIA_SHIFT, 1f __ptrauth_keys_install_kernel_nosync tsk, x20, x22, x23 b 2f @@ -254,12 +245,26 @@ alternative_if ARM64_HAS_ADDRESS_AUTH orr x0, x0, SCTLR_ELx_ENIA msr sctlr_el1, x0 2: - isb alternative_else_nop_endif #endif + apply_ssbd 1, x22, x23 + mte_set_kernel_gcr x22, x23 + /* + * Any non-self-synchronizing system register updates required for + * kernel entry should be placed before this point. + */ +alternative_if ARM64_MTE + isb + b 1f +alternative_else_nop_endif +alternative_if ARM64_HAS_ADDRESS_AUTH + isb +alternative_else_nop_endif +1: + scs_load tsk .else add x21, sp, #PT_REGS_SIZE @@ -362,6 +367,10 @@ alternative_else_nop_endif 3: scs_save tsk + /* Ignore asynchronous tag check faults in the uaccess routines */ + ldr x0, [tsk, THREAD_SCTLR_USER] + clear_mte_async_tcf x0 + #ifdef CONFIG_ARM64_PTR_AUTH alternative_if ARM64_HAS_ADDRESS_AUTH /* @@ -371,7 +380,6 @@ alternative_if ARM64_HAS_ADDRESS_AUTH * * No kernel C function calls after this. */ - ldr x0, [tsk, THREAD_SCTLR_USER] tbz x0, SCTLR_ELx_ENIA_SHIFT, 1f __ptrauth_keys_install_user tsk, x0, x1, x2 b 2f @@ -474,18 +482,6 @@ SYM_CODE_END(__swpan_exit_el0) /* GPRs used by entry code */ tsk .req x28 // current thread_info -/* - * Interrupt handling. - */ - .macro gic_prio_kentry_setup, tmp:req -#ifdef CONFIG_ARM64_PSEUDO_NMI - alternative_if ARM64_HAS_IRQ_PRIO_MASKING - mov \tmp, #(GIC_PRIO_PSR_I_SET | GIC_PRIO_IRQON) - msr_s SYS_ICC_PMR_EL1, \tmp - alternative_else_nop_endif -#endif - .endm - .text /* @@ -517,12 +513,13 @@ SYM_CODE_START(vectors) SYM_CODE_END(vectors) #ifdef CONFIG_VMAP_STACK +SYM_CODE_START_LOCAL(__bad_stack) /* * We detected an overflow in kernel_ventry, which switched to the * overflow stack. Stash the exception regs, and head to our overflow * handler. */ -__bad_stack: + /* Restore the original x0 value */ mrs x0, tpidrro_el0 @@ -542,6 +539,7 @@ __bad_stack: /* Time to die */ bl handle_bad_stack ASM_BUG() +SYM_CODE_END(__bad_stack) #endif /* CONFIG_VMAP_STACK */ @@ -585,37 +583,13 @@ SYM_CODE_START_LOCAL(ret_to_kernel) kernel_exit 1 SYM_CODE_END(ret_to_kernel) -/* - * "slow" syscall return path. - */ SYM_CODE_START_LOCAL(ret_to_user) - disable_daif - gic_prio_kentry_setup tmp=x3 -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_off -#endif - ldr x19, [tsk, #TSK_TI_FLAGS] - and x2, x19, #_TIF_WORK_MASK - cbnz x2, work_pending -finish_ret_to_user: - user_enter_irqoff - /* Ignore asynchronous tag check faults in the uaccess routines */ - clear_mte_async_tcf + ldr x19, [tsk, #TSK_TI_FLAGS] // re-check for single-step enable_step_tsk x19, x2 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK bl stackleak_erase #endif kernel_exit 0 - -/* - * Ok, we need to do extra processing, enter the slow path. - */ -work_pending: - mov x0, sp // 'regs' - mov x1, x19 - bl do_notify_resume - ldr x19, [tsk, #TSK_TI_FLAGS] // re-check for single-step - b finish_ret_to_user SYM_CODE_END(ret_to_user) .popsection // .entry.text @@ -781,6 +755,8 @@ SYM_CODE_START(ret_from_fork) mov x0, x20 blr x19 1: get_current_task tsk + mov x0, sp + bl asm_exit_to_user_mode b ret_to_user SYM_CODE_END(ret_from_fork) NOKPROBE(ret_from_fork) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index e57b23f95284..5a294f20e9de 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -162,6 +162,8 @@ extern void __percpu *efi_sve_state; DEFINE_PER_CPU(bool, fpsimd_context_busy); EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy); +static void fpsimd_bind_task_to_cpu(void); + static void __get_cpu_fpsimd_context(void) { bool busy = __this_cpu_xchg(fpsimd_context_busy, true); @@ -518,12 +520,6 @@ void sve_alloc(struct task_struct *task) /* This is a small allocation (maximum ~8KB) and Should Not Fail. */ task->thread.sve_state = kzalloc(sve_state_size(task), GFP_KERNEL); - - /* - * If future SVE revisions can have larger vectors though, - * this may cease to be true: - */ - BUG_ON(!task->thread.sve_state); } @@ -943,6 +939,10 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs) } sve_alloc(current); + if (!current->thread.sve_state) { + force_sig(SIGKILL); + return; + } get_cpu_fpsimd_context(); @@ -1112,7 +1112,7 @@ void fpsimd_signal_preserve_current_state(void) * The caller must have ownership of the cpu FPSIMD context before calling * this function. */ -void fpsimd_bind_task_to_cpu(void) +static void fpsimd_bind_task_to_cpu(void) { struct fpsimd_last_state_struct *last = this_cpu_ptr(&fpsimd_last_state); diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index c5c994a73a64..17962452e31d 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -177,7 +177,7 @@ SYM_CODE_END(preserve_boot_args) * to be composed of multiple pages. (This effectively scales the end index). * * vstart: virtual address of start of range - * vend: virtual address of end of range + * vend: virtual address of end of range - we map [vstart, vend] * shift: shift used to transform virtual address into index * ptrs: number of entries in page table * istart: index in table corresponding to vstart @@ -214,17 +214,18 @@ SYM_CODE_END(preserve_boot_args) * * tbl: location of page table * rtbl: address to be used for first level page table entry (typically tbl + PAGE_SIZE) - * vstart: start address to map - * vend: end address to map - we map [vstart, vend] + * vstart: virtual address of start of range + * vend: virtual address of end of range - we map [vstart, vend - 1] * flags: flags to use to map last level entries * phys: physical address corresponding to vstart - physical memory is contiguous * pgds: the number of pgd entries * * Temporaries: istart, iend, tmp, count, sv - these need to be different registers - * Preserves: vstart, vend, flags - * Corrupts: tbl, rtbl, istart, iend, tmp, count, sv + * Preserves: vstart, flags + * Corrupts: tbl, rtbl, vend, istart, iend, tmp, count, sv */ .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, tmp, count, sv + sub \vend, \vend, #1 add \rtbl, \tbl, #PAGE_SIZE mov \sv, \rtbl mov \count, #0 diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 53a381a7f65d..d8e606fe3c21 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -54,6 +54,7 @@ static const struct ftr_set_desc pfr1 __initconst = { .override = &id_aa64pfr1_override, .fields = { { "bt", ID_AA64PFR1_BT_SHIFT }, + { "mte", ID_AA64PFR1_MTE_SHIFT}, {} }, }; @@ -100,6 +101,7 @@ static const struct { { "arm64.nopauth", "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 " "id_aa64isar1.api=0 id_aa64isar1.apa=0" }, + { "arm64.nomte", "id_aa64pfr1.mte=0" }, { "nokaslr", "kaslr.disabled=1" }, }; diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 36f51b0e438a..9d314a3bad3b 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -4,6 +4,7 @@ */ #include <linux/bitops.h> +#include <linux/cpu.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/prctl.h> @@ -22,9 +23,7 @@ #include <asm/ptrace.h> #include <asm/sysreg.h> -u64 gcr_kernel_excl __ro_after_init; - -static bool report_fault_once = true; +static DEFINE_PER_CPU_READ_MOSTLY(u64, mte_tcf_preferred); #ifdef CONFIG_KASAN_HW_TAGS /* Whether the MTE asynchronous mode is enabled. */ @@ -101,26 +100,6 @@ int memcmp_pages(struct page *page1, struct page *page2) return ret; } -void mte_init_tags(u64 max_tag) -{ - static bool gcr_kernel_excl_initialized; - - if (!gcr_kernel_excl_initialized) { - /* - * The format of the tags in KASAN is 0xFF and in MTE is 0xF. - * This conversion extracts an MTE tag from a KASAN tag. - */ - u64 incl = GENMASK(FIELD_GET(MTE_TAG_MASK >> MTE_TAG_SHIFT, - max_tag), 0); - - gcr_kernel_excl = ~incl & SYS_GCR_EL1_EXCL_MASK; - gcr_kernel_excl_initialized = true; - } - - /* Enable the kernel exclude mask for random tags generation. */ - write_sysreg_s(SYS_GCR_EL1_RRND | gcr_kernel_excl, SYS_GCR_EL1); -} - static inline void __mte_enable_kernel(const char *mode, unsigned long tcf) { /* Enable MTE Sync Mode for EL1. */ @@ -160,16 +139,6 @@ void mte_enable_kernel_async(void) } #endif -void mte_set_report_once(bool state) -{ - WRITE_ONCE(report_fault_once, state); -} - -bool mte_report_once(void) -{ - return READ_ONCE(report_fault_once); -} - #ifdef CONFIG_KASAN_HW_TAGS void mte_check_tfsr_el1(void) { @@ -193,14 +162,26 @@ void mte_check_tfsr_el1(void) } #endif -static void set_gcr_el1_excl(u64 excl) +static void mte_update_sctlr_user(struct task_struct *task) { - current->thread.gcr_user_excl = excl; - /* - * SYS_GCR_EL1 will be set to current->thread.gcr_user_excl value - * by mte_set_user_gcr() in kernel_exit, + * This must be called with preemption disabled and can only be called + * on the current or next task since the CPU must match where the thread + * is going to run. The caller is responsible for calling + * update_sctlr_el1() later in the same preemption disabled block. */ + unsigned long sctlr = task->thread.sctlr_user; + unsigned long mte_ctrl = task->thread.mte_ctrl; + unsigned long pref, resolved_mte_tcf; + + pref = __this_cpu_read(mte_tcf_preferred); + resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl; + sctlr &= ~SCTLR_EL1_TCF0_MASK; + if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC) + sctlr |= SCTLR_EL1_TCF0_ASYNC; + else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC) + sctlr |= SCTLR_EL1_TCF0_SYNC; + task->thread.sctlr_user = sctlr; } void mte_thread_init_user(void) @@ -212,15 +193,14 @@ void mte_thread_init_user(void) dsb(ish); write_sysreg_s(0, SYS_TFSRE0_EL1); clear_thread_flag(TIF_MTE_ASYNC_FAULT); - /* disable tag checking */ - set_task_sctlr_el1((current->thread.sctlr_user & ~SCTLR_EL1_TCF0_MASK) | - SCTLR_EL1_TCF0_NONE); - /* reset tag generation mask */ - set_gcr_el1_excl(SYS_GCR_EL1_EXCL_MASK); + /* disable tag checking and reset tag generation mask */ + set_mte_ctrl(current, 0); } void mte_thread_switch(struct task_struct *next) { + mte_update_sctlr_user(next); + /* * Check if an async tag exception occurred at EL1. * @@ -248,44 +228,25 @@ void mte_suspend_enter(void) mte_check_tfsr_el1(); } -void mte_suspend_exit(void) -{ - if (!system_supports_mte()) - return; - - sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK, gcr_kernel_excl); - isb(); -} - long set_mte_ctrl(struct task_struct *task, unsigned long arg) { - u64 sctlr = task->thread.sctlr_user & ~SCTLR_EL1_TCF0_MASK; - u64 gcr_excl = ~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) & - SYS_GCR_EL1_EXCL_MASK; + u64 mte_ctrl = (~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) & + SYS_GCR_EL1_EXCL_MASK) << MTE_CTRL_GCR_USER_EXCL_SHIFT; if (!system_supports_mte()) return 0; - switch (arg & PR_MTE_TCF_MASK) { - case PR_MTE_TCF_NONE: - sctlr |= SCTLR_EL1_TCF0_NONE; - break; - case PR_MTE_TCF_SYNC: - sctlr |= SCTLR_EL1_TCF0_SYNC; - break; - case PR_MTE_TCF_ASYNC: - sctlr |= SCTLR_EL1_TCF0_ASYNC; - break; - default: - return -EINVAL; - } - - if (task != current) { - task->thread.sctlr_user = sctlr; - task->thread.gcr_user_excl = gcr_excl; - } else { - set_task_sctlr_el1(sctlr); - set_gcr_el1_excl(gcr_excl); + if (arg & PR_MTE_TCF_ASYNC) + mte_ctrl |= MTE_CTRL_TCF_ASYNC; + if (arg & PR_MTE_TCF_SYNC) + mte_ctrl |= MTE_CTRL_TCF_SYNC; + + task->thread.mte_ctrl = mte_ctrl; + if (task == current) { + preempt_disable(); + mte_update_sctlr_user(task); + update_sctlr_el1(task->thread.sctlr_user); + preempt_enable(); } return 0; @@ -294,24 +255,18 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg) long get_mte_ctrl(struct task_struct *task) { unsigned long ret; - u64 incl = ~task->thread.gcr_user_excl & SYS_GCR_EL1_EXCL_MASK; + u64 mte_ctrl = task->thread.mte_ctrl; + u64 incl = (~mte_ctrl >> MTE_CTRL_GCR_USER_EXCL_SHIFT) & + SYS_GCR_EL1_EXCL_MASK; if (!system_supports_mte()) return 0; ret = incl << PR_MTE_TAG_SHIFT; - - switch (task->thread.sctlr_user & SCTLR_EL1_TCF0_MASK) { - case SCTLR_EL1_TCF0_NONE: - ret |= PR_MTE_TCF_NONE; - break; - case SCTLR_EL1_TCF0_SYNC: - ret |= PR_MTE_TCF_SYNC; - break; - case SCTLR_EL1_TCF0_ASYNC: + if (mte_ctrl & MTE_CTRL_TCF_ASYNC) ret |= PR_MTE_TCF_ASYNC; - break; - } + if (mte_ctrl & MTE_CTRL_TCF_SYNC) + ret |= PR_MTE_TCF_SYNC; return ret; } @@ -450,3 +405,54 @@ int mte_ptrace_copy_tags(struct task_struct *child, long request, return ret; } + +static ssize_t mte_tcf_preferred_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + switch (per_cpu(mte_tcf_preferred, dev->id)) { + case MTE_CTRL_TCF_ASYNC: + return sysfs_emit(buf, "async\n"); + case MTE_CTRL_TCF_SYNC: + return sysfs_emit(buf, "sync\n"); + default: + return sysfs_emit(buf, "???\n"); + } +} + +static ssize_t mte_tcf_preferred_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + u64 tcf; + + if (sysfs_streq(buf, "async")) + tcf = MTE_CTRL_TCF_ASYNC; + else if (sysfs_streq(buf, "sync")) + tcf = MTE_CTRL_TCF_SYNC; + else + return -EINVAL; + + device_lock(dev); + per_cpu(mte_tcf_preferred, dev->id) = tcf; + device_unlock(dev); + + return count; +} +static DEVICE_ATTR_RW(mte_tcf_preferred); + +static int register_mte_tcf_preferred_sysctl(void) +{ + unsigned int cpu; + + if (!system_supports_mte()) + return 0; + + for_each_possible_cpu(cpu) { + per_cpu(mte_tcf_preferred, cpu) = MTE_CTRL_TCF_ASYNC; + device_create_file(get_cpu_device(cpu), + &dev_attr_mte_tcf_preferred); + } + + return 0; +} +subsys_initcall(register_mte_tcf_preferred_sysctl); diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c index 1006ed2d7c60..2276689b5411 100644 --- a/arch/arm64/kernel/pci.c +++ b/arch/arm64/kernel/pci.c @@ -82,14 +82,29 @@ int acpi_pci_bus_find_domain_nr(struct pci_bus *bus) int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) { - if (!acpi_disabled) { - struct pci_config_window *cfg = bridge->bus->sysdata; - struct acpi_device *adev = to_acpi_device(cfg->parent); - struct device *bus_dev = &bridge->bus->dev; + struct pci_config_window *cfg; + struct acpi_device *adev; + struct device *bus_dev; - ACPI_COMPANION_SET(&bridge->dev, adev); - set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev))); - } + if (acpi_disabled) + return 0; + + cfg = bridge->bus->sysdata; + + /* + * On Hyper-V there is no corresponding ACPI device for a root bridge, + * therefore ->parent is set as NULL by the driver. And set 'adev' as + * NULL in this case because there is no proper ACPI device. + */ + if (!cfg->parent) + adev = NULL; + else + adev = to_acpi_device(cfg->parent); + + bus_dev = &bridge->bus->dev; + + ACPI_COMPANION_SET(&bridge->dev, adev); + set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev))); return 0; } diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index d07788dad388..b4044469527e 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -1055,7 +1055,7 @@ static void __armv8pmu_probe_pmu(void *info) dfr0 = read_sysreg(id_aa64dfr0_el1); pmuver = cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_PMUVER_SHIFT); - if (pmuver == 0xf || pmuver == 0) + if (pmuver == ID_AA64DFR0_PMUVER_IMP_DEF || pmuver == 0) return; cpu_pmu->pmuver = pmuver; diff --git a/arch/arm64/kernel/pointer_auth.c b/arch/arm64/kernel/pointer_auth.c index 60901ab0a7fe..2708b620b4ae 100644 --- a/arch/arm64/kernel/pointer_auth.c +++ b/arch/arm64/kernel/pointer_auth.c @@ -67,7 +67,7 @@ static u64 arg_to_enxx_mask(unsigned long arg) int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys, unsigned long enabled) { - u64 sctlr = tsk->thread.sctlr_user; + u64 sctlr; if (!system_supports_address_auth()) return -EINVAL; @@ -78,12 +78,14 @@ int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys, if ((keys & ~PR_PAC_ENABLED_KEYS_MASK) || (enabled & ~keys)) return -EINVAL; + preempt_disable(); + sctlr = tsk->thread.sctlr_user; sctlr &= ~arg_to_enxx_mask(keys); sctlr |= arg_to_enxx_mask(enabled); + tsk->thread.sctlr_user = sctlr; if (tsk == current) - set_task_sctlr_el1(sctlr); - else - tsk->thread.sctlr_user = sctlr; + update_sctlr_el1(sctlr); + preempt_enable(); return 0; } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index c8989b999250..19100fe8f7e4 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -6,9 +6,6 @@ * Copyright (C) 1996-2000 Russell King - Converted to ARM. * Copyright (C) 2012 ARM Ltd. */ - -#include <stdarg.h> - #include <linux/compat.h> #include <linux/efi.h> #include <linux/elf.h> @@ -21,6 +18,7 @@ #include <linux/mman.h> #include <linux/mm.h> #include <linux/nospec.h> +#include <linux/sched.h> #include <linux/stddef.h> #include <linux/sysctl.h> #include <linux/unistd.h> @@ -163,7 +161,7 @@ static void print_pstate(struct pt_regs *regs) u64 pstate = regs->pstate; if (compat_user_mode(regs)) { - printk("pstate: %08llx (%c%c%c%c %c %s %s %c%c%c)\n", + printk("pstate: %08llx (%c%c%c%c %c %s %s %c%c%c %cDIT %cSSBS)\n", pstate, pstate & PSR_AA32_N_BIT ? 'N' : 'n', pstate & PSR_AA32_Z_BIT ? 'Z' : 'z', @@ -174,12 +172,14 @@ static void print_pstate(struct pt_regs *regs) pstate & PSR_AA32_E_BIT ? "BE" : "LE", pstate & PSR_AA32_A_BIT ? 'A' : 'a', pstate & PSR_AA32_I_BIT ? 'I' : 'i', - pstate & PSR_AA32_F_BIT ? 'F' : 'f'); + pstate & PSR_AA32_F_BIT ? 'F' : 'f', + pstate & PSR_AA32_DIT_BIT ? '+' : '-', + pstate & PSR_AA32_SSBS_BIT ? '+' : '-'); } else { const char *btype_str = btypes[(pstate & PSR_BTYPE_MASK) >> PSR_BTYPE_SHIFT]; - printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO %cTCO BTYPE=%s)\n", + printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO %cTCO %cDIT %cSSBS BTYPE=%s)\n", pstate, pstate & PSR_N_BIT ? 'N' : 'n', pstate & PSR_Z_BIT ? 'Z' : 'z', @@ -192,6 +192,8 @@ static void print_pstate(struct pt_regs *regs) pstate & PSR_PAN_BIT ? '+' : '-', pstate & PSR_UAO_BIT ? '+' : '-', pstate & PSR_TCO_BIT ? '+' : '-', + pstate & PSR_DIT_BIT ? '+' : '-', + pstate & PSR_SSBS_BIT ? '+' : '-', btype_str); } } @@ -468,16 +470,13 @@ static void erratum_1418040_thread_switch(struct task_struct *prev, write_sysreg(val, cntkctl_el1); } -static void compat_thread_switch(struct task_struct *next) -{ - if (!is_compat_thread(task_thread_info(next))) - return; - - if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) - set_tsk_thread_flag(next, TIF_NOTIFY_RESUME); -} - -static void update_sctlr_el1(u64 sctlr) +/* + * __switch_to() checks current->thread.sctlr_user as an optimisation. Therefore + * this function must be called with preemption disabled and the update to + * sctlr_user must be made in the same preemption disabled block so that + * __switch_to() does not see the variable update before the SCTLR_EL1 one. + */ +void update_sctlr_el1(u64 sctlr) { /* * EnIA must not be cleared while in the kernel as this is necessary for @@ -489,19 +488,6 @@ static void update_sctlr_el1(u64 sctlr) isb(); } -void set_task_sctlr_el1(u64 sctlr) -{ - /* - * __switch_to() checks current->thread.sctlr as an - * optimisation. Disable preemption so that it does not see - * the variable update before the SCTLR_EL1 one. - */ - preempt_disable(); - current->thread.sctlr_user = sctlr; - update_sctlr_el1(sctlr); - preempt_enable(); -} - /* * Thread switching. */ @@ -518,7 +504,6 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, ssbs_thread_switch(next); erratum_1418040_thread_switch(prev, next); ptrauth_thread_switch_user(next); - compat_thread_switch(next); /* * Complete any pending TLB or cache maintenance on this CPU in case @@ -579,6 +564,28 @@ unsigned long arch_align_stack(unsigned long sp) return sp & ~0xf; } +#ifdef CONFIG_COMPAT +int compat_elf_check_arch(const struct elf32_hdr *hdr) +{ + if (!system_supports_32bit_el0()) + return false; + + if ((hdr)->e_machine != EM_ARM) + return false; + + if (!((hdr)->e_flags & EF_ARM_EABI_MASK)) + return false; + + /* + * Prevent execve() of a 32-bit program from a deadline task + * if the restricted affinity mask would be inadmissible on an + * asymmetric system. + */ + return !static_branch_unlikely(&arm64_mismatched_32bit_el0) || + !dl_task_check_affinity(current, system_32bit_el0_cpumask()); +} +#endif + /* * Called from setup_new_exec() after (COMPAT_)SET_PERSONALITY. */ @@ -588,8 +595,20 @@ void arch_setup_new_exec(void) if (is_compat_task()) { mmflags = MMCF_AARCH32; + + /* + * Restrict the CPU affinity mask for a 32-bit task so that + * it contains only 32-bit-capable CPUs. + * + * From the perspective of the task, this looks similar to + * what would happen if the 64-bit-only CPUs were hot-unplugged + * at the point of execve(), although we try a bit harder to + * honour the cpuset hierarchy. + */ if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) - set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); + force_compatible_cpus_allowed_ptr(current); + } else if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) { + relax_compatible_cpus_allowed_ptr(current); } current->mm->context.flags = mmflags; diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index b381a1ee9ea7..e26196a33cf4 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -845,6 +845,11 @@ static int sve_set(struct task_struct *target, } sve_alloc(target); + if (!target->thread.sve_state) { + ret = -ENOMEM; + clear_tsk_thread_flag(target, TIF_SVE); + goto out; + } /* * Ensure target->thread.sve_state is up to date with target's diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 23036334f4dc..9fe70b12b34f 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -290,6 +290,11 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) /* From now, fpsimd_thread_switch() won't touch thread.sve_state */ sve_alloc(current); + if (!current->thread.sve_state) { + clear_thread_flag(TIF_SVE); + return -ENOMEM; + } + err = __copy_from_user(current->thread.sve_state, (char __user const *)user->sve + SVE_SIG_REGS_OFFSET, @@ -912,21 +917,7 @@ static void do_signal(struct pt_regs *regs) restore_saved_sigmask(); } -static bool cpu_affinity_invalid(struct pt_regs *regs) -{ - if (!compat_user_mode(regs)) - return false; - - /* - * We're preemptible, but a reschedule will cause us to check the - * affinity again. - */ - return !cpumask_test_cpu(raw_smp_processor_id(), - system_32bit_el0_cpumask()); -} - -asmlinkage void do_notify_resume(struct pt_regs *regs, - unsigned long thread_flags) +void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags) { do { if (thread_flags & _TIF_NEED_RESCHED) { @@ -952,19 +943,6 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, if (thread_flags & _TIF_NOTIFY_RESUME) { tracehook_notify_resume(regs); rseq_handle_notify_resume(NULL, regs); - - /* - * If we reschedule after checking the affinity - * then we must ensure that TIF_NOTIFY_RESUME - * is set so that we check the affinity again. - * Since tracehook_notify_resume() clears the - * flag, ensure that the compiler doesn't move - * it after the affinity check. - */ - barrier(); - - if (cpu_affinity_invalid(regs)) - force_sig(SIGKILL); } if (thread_flags & _TIF_FOREIGN_FPSTATE) @@ -1000,3 +978,42 @@ void __init minsigstksz_setup(void) round_up(sizeof(struct frame_record), 16) + 16; /* max alignment padding */ } + +/* + * Compile-time assertions for siginfo_t offsets. Check NSIG* as well, as + * changes likely come with new fields that should be added below. + */ +static_assert(NSIGILL == 11); +static_assert(NSIGFPE == 15); +static_assert(NSIGSEGV == 9); +static_assert(NSIGBUS == 5); +static_assert(NSIGTRAP == 6); +static_assert(NSIGCHLD == 6); +static_assert(NSIGSYS == 2); +static_assert(sizeof(siginfo_t) == 128); +static_assert(__alignof__(siginfo_t) == 8); +static_assert(offsetof(siginfo_t, si_signo) == 0x00); +static_assert(offsetof(siginfo_t, si_errno) == 0x04); +static_assert(offsetof(siginfo_t, si_code) == 0x08); +static_assert(offsetof(siginfo_t, si_pid) == 0x10); +static_assert(offsetof(siginfo_t, si_uid) == 0x14); +static_assert(offsetof(siginfo_t, si_tid) == 0x10); +static_assert(offsetof(siginfo_t, si_overrun) == 0x14); +static_assert(offsetof(siginfo_t, si_status) == 0x18); +static_assert(offsetof(siginfo_t, si_utime) == 0x20); +static_assert(offsetof(siginfo_t, si_stime) == 0x28); +static_assert(offsetof(siginfo_t, si_value) == 0x18); +static_assert(offsetof(siginfo_t, si_int) == 0x18); +static_assert(offsetof(siginfo_t, si_ptr) == 0x18); +static_assert(offsetof(siginfo_t, si_addr) == 0x10); +static_assert(offsetof(siginfo_t, si_addr_lsb) == 0x18); +static_assert(offsetof(siginfo_t, si_lower) == 0x20); +static_assert(offsetof(siginfo_t, si_upper) == 0x28); +static_assert(offsetof(siginfo_t, si_pkey) == 0x20); +static_assert(offsetof(siginfo_t, si_perf_data) == 0x18); +static_assert(offsetof(siginfo_t, si_perf_type) == 0x20); +static_assert(offsetof(siginfo_t, si_band) == 0x10); +static_assert(offsetof(siginfo_t, si_fd) == 0x18); +static_assert(offsetof(siginfo_t, si_call_addr) == 0x10); +static_assert(offsetof(siginfo_t, si_syscall) == 0x18); +static_assert(offsetof(siginfo_t, si_arch) == 0x1c); diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 2f507f565c48..d984282b979f 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -46,8 +46,6 @@ struct compat_aux_sigframe { unsigned long end_magic; } __attribute__((__aligned__(8))); -#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) - static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set) { compat_sigset_t cset; @@ -190,10 +188,8 @@ static int compat_restore_sigframe(struct pt_regs *regs, unsigned long psr; err = get_sigset_t(&set, &sf->uc.uc_sigmask); - if (err == 0) { - sigdelsetmask(&set, ~_BLOCKABLE); + if (err == 0) set_current_blocked(&set); - } __get_user_error(regs->regs[0], &sf->uc.uc_mcontext.arm_r0, err); __get_user_error(regs->regs[1], &sf->uc.uc_mcontext.arm_r1, err); @@ -457,3 +453,42 @@ void compat_setup_restart_syscall(struct pt_regs *regs) { regs->regs[7] = __NR_compat_restart_syscall; } + +/* + * Compile-time assertions for siginfo_t offsets. Check NSIG* as well, as + * changes likely come with new fields that should be added below. + */ +static_assert(NSIGILL == 11); +static_assert(NSIGFPE == 15); +static_assert(NSIGSEGV == 9); +static_assert(NSIGBUS == 5); +static_assert(NSIGTRAP == 6); +static_assert(NSIGCHLD == 6); +static_assert(NSIGSYS == 2); +static_assert(sizeof(compat_siginfo_t) == 128); +static_assert(__alignof__(compat_siginfo_t) == 4); +static_assert(offsetof(compat_siginfo_t, si_signo) == 0x00); +static_assert(offsetof(compat_siginfo_t, si_errno) == 0x04); +static_assert(offsetof(compat_siginfo_t, si_code) == 0x08); +static_assert(offsetof(compat_siginfo_t, si_pid) == 0x0c); +static_assert(offsetof(compat_siginfo_t, si_uid) == 0x10); +static_assert(offsetof(compat_siginfo_t, si_tid) == 0x0c); +static_assert(offsetof(compat_siginfo_t, si_overrun) == 0x10); +static_assert(offsetof(compat_siginfo_t, si_status) == 0x14); +static_assert(offsetof(compat_siginfo_t, si_utime) == 0x18); +static_assert(offsetof(compat_siginfo_t, si_stime) == 0x1c); +static_assert(offsetof(compat_siginfo_t, si_value) == 0x14); +static_assert(offsetof(compat_siginfo_t, si_int) == 0x14); +static_assert(offsetof(compat_siginfo_t, si_ptr) == 0x14); +static_assert(offsetof(compat_siginfo_t, si_addr) == 0x0c); +static_assert(offsetof(compat_siginfo_t, si_addr_lsb) == 0x10); +static_assert(offsetof(compat_siginfo_t, si_lower) == 0x14); +static_assert(offsetof(compat_siginfo_t, si_upper) == 0x18); +static_assert(offsetof(compat_siginfo_t, si_pkey) == 0x14); +static_assert(offsetof(compat_siginfo_t, si_perf_data) == 0x10); +static_assert(offsetof(compat_siginfo_t, si_perf_type) == 0x14); +static_assert(offsetof(compat_siginfo_t, si_band) == 0x0c); +static_assert(offsetof(compat_siginfo_t, si_fd) == 0x10); +static_assert(offsetof(compat_siginfo_t, si_call_addr) == 0x0c); +static_assert(offsetof(compat_siginfo_t, si_syscall) == 0x10); +static_assert(offsetof(compat_siginfo_t, si_arch) == 0x14); diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 938ce6fbee8a..19ee7c33769d 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -76,7 +76,6 @@ void notrace __cpu_suspend_exit(void) spectre_v4_enable_mitigation(NULL); /* Restore additional feature-specific configuration */ - mte_suspend_exit(); ptrauth_suspend_exit(); } diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 709d2c433c5e..f6b1a88245db 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -181,6 +181,8 @@ SECTIONS /* everything from this point to __init_begin will be marked RO NX */ RO_DATA(PAGE_SIZE) + HYPERVISOR_DATA_SECTIONS + idmap_pg_dir = .; . += IDMAP_DIR_SIZE; idmap_pg_end = .; @@ -260,8 +262,6 @@ SECTIONS _sdata = .; RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN) - HYPERVISOR_DATA_SECTIONS - /* * Data written with the MMU off but read with the MMU on requires * cache lines to be invalidated, discarding up to a Cache Writeback |