diff options
Diffstat (limited to 'arch')
78 files changed, 603 insertions, 490 deletions
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 7d59765aef22..c392e18f1e43 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -207,7 +207,7 @@ static void xen_power_off(void) static irqreturn_t xen_arm_callback(int irq, void *arg) { - xen_hvm_evtchn_do_upcall(); + xen_evtchn_do_upcall(); return IRQ_HANDLED; } diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 96e50227f940..5bba39376055 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -663,7 +663,7 @@ static inline bool supports_clearbhb(int scope) isar2 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1); return cpuid_feature_extract_unsigned_field(isar2, - ID_AA64ISAR2_EL1_BC_SHIFT); + ID_AA64ISAR2_EL1_CLRBHB_SHIFT); } const struct cpumask *system_32bit_el0_cpumask(void); diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index b7238c72a04c..66efd67ea7e8 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -118,7 +118,7 @@ void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu); u64 __guest_enter(struct kvm_vcpu *vcpu); -bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt); +bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt, u32 func_id); #ifdef __KVM_NVHE_HYPERVISOR__ void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr, diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b018ae12ff5f..444a73c2e638 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -222,7 +222,8 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CSSC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRFM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CLRBHB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_MOPS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_EL1_APA3_SHIFT, 4, 0), diff --git a/arch/arm64/kvm/hyp/include/nvhe/ffa.h b/arch/arm64/kvm/hyp/include/nvhe/ffa.h index 1becb10ecd80..d9fd5e6c7d3c 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/ffa.h +++ b/arch/arm64/kvm/hyp/include/nvhe/ffa.h @@ -12,6 +12,6 @@ #define FFA_MAX_FUNC_NUM 0x7F int hyp_ffa_init(void *pages); -bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt); +bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id); #endif /* __KVM_HYP_FFA_H */ diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index ab4f5d160c58..6e4dba9eadef 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -634,9 +634,8 @@ out_handled: return true; } -bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt) +bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) { - DECLARE_REG(u64, func_id, host_ctxt, 0); struct arm_smccc_res res; /* diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index 90fade1b032e..1cc06e6797bd 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -57,6 +57,7 @@ __do_hyp_init: cmp x0, #HVC_STUB_HCALL_NR b.lo __kvm_handle_stub_hvc + bic x0, x0, #ARM_SMCCC_CALL_HINTS mov x3, #KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) cmp x0, x3 b.eq 1f diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 857d9bc04fd4..2385fd03ed87 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -368,6 +368,7 @@ static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) if (static_branch_unlikely(&kvm_protected_mode_initialized)) hcall_min = __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize; + id &= ~ARM_SMCCC_CALL_HINTS; id -= KVM_HOST_SMCCC_ID(0); if (unlikely(id < hcall_min || id >= ARRAY_SIZE(host_hcall))) @@ -392,11 +393,14 @@ static void default_host_smc_handler(struct kvm_cpu_context *host_ctxt) static void handle_host_smc(struct kvm_cpu_context *host_ctxt) { + DECLARE_REG(u64, func_id, host_ctxt, 0); bool handled; - handled = kvm_host_psci_handler(host_ctxt); + func_id &= ~ARM_SMCCC_CALL_HINTS; + + handled = kvm_host_psci_handler(host_ctxt, func_id); if (!handled) - handled = kvm_host_ffa_handler(host_ctxt); + handled = kvm_host_ffa_handler(host_ctxt, func_id); if (!handled) default_host_smc_handler(host_ctxt); diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 24543d2a3490..d57bcb6ab94d 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -273,9 +273,8 @@ static unsigned long psci_1_0_handler(u64 func_id, struct kvm_cpu_context *host_ } } -bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt) +bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) { - DECLARE_REG(u64, func_id, host_ctxt, 0); unsigned long ret; switch (kvm_host_psci_config.version) { diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index de5e5148ef5d..2358dd0b9589 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -652,6 +652,9 @@ int hyp_alloc_private_va_range(size_t size, unsigned long *haddr) mutex_unlock(&kvm_hyp_pgd_mutex); + if (!ret) + *haddr = base; + return ret; } diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 2517ef7c21cf..76ce150e7347 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -1347,7 +1347,11 @@ UnsignedEnum 51:48 RPRFM 0b0000 NI 0b0001 IMP EndEnum -Res0 47:28 +Res0 47:32 +UnsignedEnum 31:28 CLRBHB + 0b0000 NI + 0b0001 IMP +EndEnum UnsignedEnum 27:24 PAC_frac 0b0000 NI 0b0001 IMP diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 15f6cfddcc08..41e8fe55cd98 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -907,3 +907,7 @@ EXPORT_SYMBOL(acpi_unregister_ioapic); * TBD when IA64 starts to support suspend... */ int acpi_suspend_lowlevel(void) { return 0; } + +void acpi_proc_quirk_mwait_check(void) +{ +} diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h index 5c9c03bdf915..b24437e28c6e 100644 --- a/arch/loongarch/include/asm/addrspace.h +++ b/arch/loongarch/include/asm/addrspace.h @@ -19,7 +19,7 @@ */ #ifndef __ASSEMBLY__ #ifndef PHYS_OFFSET -#define PHYS_OFFSET _AC(0, UL) +#define PHYS_OFFSET _UL(0) #endif extern unsigned long vm_map_base; #endif /* __ASSEMBLY__ */ @@ -43,7 +43,7 @@ extern unsigned long vm_map_base; * Memory above this physical address will be considered highmem. */ #ifndef HIGHMEM_START -#define HIGHMEM_START (_AC(1, UL) << _AC(DMW_PABITS, UL)) +#define HIGHMEM_START (_UL(1) << _UL(DMW_PABITS)) #endif #define TO_PHYS(x) ( ((x) & TO_PHYS_MASK)) @@ -65,16 +65,16 @@ extern unsigned long vm_map_base; #define _ATYPE_ #define _ATYPE32_ #define _ATYPE64_ -#define _CONST64_(x) x #else #define _ATYPE_ __PTRDIFF_TYPE__ #define _ATYPE32_ int #define _ATYPE64_ __s64 +#endif + #ifdef CONFIG_64BIT -#define _CONST64_(x) x ## UL +#define _CONST64_(x) _UL(x) #else -#define _CONST64_(x) x ## ULL -#endif +#define _CONST64_(x) _ULL(x) #endif /* diff --git a/arch/loongarch/include/asm/exception.h b/arch/loongarch/include/asm/exception.h new file mode 100644 index 000000000000..af74a3fdcad1 --- /dev/null +++ b/arch/loongarch/include/asm/exception.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ASM_EXCEPTION_H +#define __ASM_EXCEPTION_H + +#include <asm/ptrace.h> +#include <linux/kprobes.h> + +void show_registers(struct pt_regs *regs); + +asmlinkage void cache_parity_error(void); +asmlinkage void noinstr do_ade(struct pt_regs *regs); +asmlinkage void noinstr do_ale(struct pt_regs *regs); +asmlinkage void noinstr do_bce(struct pt_regs *regs); +asmlinkage void noinstr do_bp(struct pt_regs *regs); +asmlinkage void noinstr do_ri(struct pt_regs *regs); +asmlinkage void noinstr do_fpu(struct pt_regs *regs); +asmlinkage void noinstr do_fpe(struct pt_regs *regs, unsigned long fcsr); +asmlinkage void noinstr do_lsx(struct pt_regs *regs); +asmlinkage void noinstr do_lasx(struct pt_regs *regs); +asmlinkage void noinstr do_lbt(struct pt_regs *regs); +asmlinkage void noinstr do_watch(struct pt_regs *regs); +asmlinkage void noinstr do_syscall(struct pt_regs *regs); +asmlinkage void noinstr do_reserved(struct pt_regs *regs); +asmlinkage void noinstr do_vint(struct pt_regs *regs, unsigned long sp); +asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, + unsigned long write, unsigned long address); + +asmlinkage void handle_ade(void); +asmlinkage void handle_ale(void); +asmlinkage void handle_bce(void); +asmlinkage void handle_sys(void); +asmlinkage void handle_bp(void); +asmlinkage void handle_ri(void); +asmlinkage void handle_fpu(void); +asmlinkage void handle_fpe(void); +asmlinkage void handle_lsx(void); +asmlinkage void handle_lasx(void); +asmlinkage void handle_lbt(void); +asmlinkage void handle_watch(void); +asmlinkage void handle_reserved(void); +asmlinkage void handle_vint(void); +asmlinkage void noinstr handle_loongarch_irq(struct pt_regs *regs); + +#endif /* __ASM_EXCEPTION_H */ diff --git a/arch/loongarch/include/asm/kasan.h b/arch/loongarch/include/asm/kasan.h index deeff8158f45..cd6084f4e153 100644 --- a/arch/loongarch/include/asm/kasan.h +++ b/arch/loongarch/include/asm/kasan.h @@ -10,8 +10,6 @@ #include <asm/io.h> #include <asm/pgtable.h> -#define __HAVE_ARCH_SHADOW_MAP - #define KASAN_SHADOW_SCALE_SHIFT 3 #define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) @@ -62,61 +60,22 @@ extern bool kasan_early_stage; extern unsigned char kasan_early_shadow_page[PAGE_SIZE]; +#define kasan_mem_to_shadow kasan_mem_to_shadow +void *kasan_mem_to_shadow(const void *addr); + +#define kasan_shadow_to_mem kasan_shadow_to_mem +const void *kasan_shadow_to_mem(const void *shadow_addr); + #define kasan_arch_is_ready kasan_arch_is_ready static __always_inline bool kasan_arch_is_ready(void) { return !kasan_early_stage; } -static inline void *kasan_mem_to_shadow(const void *addr) -{ - if (!kasan_arch_is_ready()) { - return (void *)(kasan_early_shadow_page); - } else { - unsigned long maddr = (unsigned long)addr; - unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff; - unsigned long offset = 0; - - maddr &= XRANGE_SHADOW_MASK; - switch (xrange) { - case XKPRANGE_CC_SEG: - offset = XKPRANGE_CC_SHADOW_OFFSET; - break; - case XKPRANGE_UC_SEG: - offset = XKPRANGE_UC_SHADOW_OFFSET; - break; - case XKVRANGE_VC_SEG: - offset = XKVRANGE_VC_SHADOW_OFFSET; - break; - default: - WARN_ON(1); - return NULL; - } - - return (void *)((maddr >> KASAN_SHADOW_SCALE_SHIFT) + offset); - } -} - -static inline const void *kasan_shadow_to_mem(const void *shadow_addr) +#define addr_has_metadata addr_has_metadata +static __always_inline bool addr_has_metadata(const void *addr) { - unsigned long addr = (unsigned long)shadow_addr; - - if (unlikely(addr > KASAN_SHADOW_END) || - unlikely(addr < KASAN_SHADOW_START)) { - WARN_ON(1); - return NULL; - } - - if (addr >= XKVRANGE_VC_SHADOW_OFFSET) - return (void *)(((addr - XKVRANGE_VC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKVRANGE_VC_START); - else if (addr >= XKPRANGE_UC_SHADOW_OFFSET) - return (void *)(((addr - XKPRANGE_UC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_UC_START); - else if (addr >= XKPRANGE_CC_SHADOW_OFFSET) - return (void *)(((addr - XKPRANGE_CC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_CC_START); - else { - WARN_ON(1); - return NULL; - } + return (kasan_mem_to_shadow((void *)addr) != NULL); } void kasan_init(void); diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h index 66ecb480c894..f81e5f01d619 100644 --- a/arch/loongarch/include/asm/smp.h +++ b/arch/loongarch/include/asm/smp.h @@ -70,6 +70,7 @@ struct secondary_data { extern struct secondary_data cpuboot_data; extern asmlinkage void smpboot_entry(void); +extern asmlinkage void start_secondary(void); extern void calculate_cpu_foreign_map(void); diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index c56ea0b75448..4fcc168f0732 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -19,6 +19,10 @@ obj-$(CONFIG_CPU_HAS_LBT) += lbt.o obj-$(CONFIG_ARCH_STRICT_ALIGN) += unaligned.o +CFLAGS_module.o += $(call cc-option,-Wno-override-init,) +CFLAGS_syscall.o += $(call cc-option,-Wno-override-init,) +CFLAGS_perf_event.o += $(call cc-option,-Wno-override-init,) + ifdef CONFIG_FUNCTION_TRACER ifndef CONFIG_DYNAMIC_FTRACE obj-y += mcount.o ftrace.o diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 9450e09073eb..8e00a754e548 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -281,7 +281,6 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) pr_info("SRAT: PXM %u -> CPU 0x%02x -> Node %u\n", pxm, pa->apic_id, node); } -void __init acpi_numa_arch_fixup(void) {} #endif void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size) diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c index 4a4107a6a965..aed901c57fb4 100644 --- a/arch/loongarch/kernel/mem.c +++ b/arch/loongarch/kernel/mem.c @@ -50,7 +50,6 @@ void __init memblock_init(void) } memblock_set_current_limit(PFN_PHYS(max_low_pfn)); - memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0); /* Reserve the first 2MB */ memblock_reserve(PHYS_OFFSET, 0x200000); @@ -58,4 +57,7 @@ void __init memblock_init(void) /* Reserve the kernel text/data/bss */ memblock_reserve(__pa_symbol(&_text), __pa_symbol(&_end) - __pa_symbol(&_text)); + + memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0); + memblock_set_node(0, PHYS_ADDR_MAX, &memblock.reserved, 0); } diff --git a/arch/loongarch/kernel/module-sections.c b/arch/loongarch/kernel/module-sections.c index d4dbcda1c4b0..e2f30ff9afde 100644 --- a/arch/loongarch/kernel/module-sections.c +++ b/arch/loongarch/kernel/module-sections.c @@ -6,6 +6,7 @@ #include <linux/elf.h> #include <linux/kernel.h> #include <linux/module.h> +#include <linux/moduleloader.h> #include <linux/ftrace.h> Elf_Addr module_emit_got_entry(struct module *mod, Elf_Shdr *sechdrs, Elf_Addr val) diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 3cb082e0c992..767d94cce0de 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -37,6 +37,7 @@ #include <asm/bootinfo.h> #include <asm/cpu.h> #include <asm/elf.h> +#include <asm/exec.h> #include <asm/fpu.h> #include <asm/lbt.h> #include <asm/io.h> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S index d13252553a7c..f49f6b053763 100644 --- a/arch/loongarch/kernel/relocate_kernel.S +++ b/arch/loongarch/kernel/relocate_kernel.S @@ -72,7 +72,6 @@ copy_word: LONG_ADDI s5, s5, -1 beqz s5, process_entry b copy_word - b process_entry done: ibar 0 diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c index 504fdfe85203..4a3686d13349 100644 --- a/arch/loongarch/kernel/signal.c +++ b/arch/loongarch/kernel/signal.c @@ -13,6 +13,7 @@ #include <linux/audit.h> #include <linux/cache.h> #include <linux/context_tracking.h> +#include <linux/entry-common.h> #include <linux/irqflags.h> #include <linux/sched.h> #include <linux/mm.h> @@ -891,8 +892,8 @@ static unsigned long setup_extcontext(struct extctx_layout *extctx, unsigned lon return new_sp; } -void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, - struct extctx_layout *extctx) +static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, + struct extctx_layout *extctx) { unsigned long sp; @@ -922,7 +923,7 @@ void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, * Atomically swap in the new signal mask, and wait for a signal. */ -asmlinkage long sys_rt_sigreturn(void) +SYSCALL_DEFINE0(rt_sigreturn) { int sig; sigset_t set; diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 6667b0a90f81..ef35c871244f 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -13,6 +13,7 @@ #include <linux/cpumask.h> #include <linux/init.h> #include <linux/interrupt.h> +#include <linux/profile.h> #include <linux/seq_file.h> #include <linux/smp.h> #include <linux/threads.h> @@ -556,10 +557,12 @@ void smp_send_stop(void) smp_call_function(stop_this_cpu, NULL, 0); } +#ifdef CONFIG_PROFILING int setup_profiling_timer(unsigned int multiplier) { return 0; } +#endif static void flush_tlb_all_ipi(void *info) { diff --git a/arch/loongarch/kernel/syscall.c b/arch/loongarch/kernel/syscall.c index 3fc4211db989..b4c5acd7aa3b 100644 --- a/arch/loongarch/kernel/syscall.c +++ b/arch/loongarch/kernel/syscall.c @@ -13,6 +13,7 @@ #include <linux/unistd.h> #include <asm/asm.h> +#include <asm/exception.h> #include <asm/signal.h> #include <asm/switch_to.h> #include <asm-generic/syscalls.h> diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index c189e03cd5da..3064af94db9c 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -29,7 +29,7 @@ static void constant_event_handler(struct clock_event_device *dev) { } -irqreturn_t constant_timer_interrupt(int irq, void *data) +static irqreturn_t constant_timer_interrupt(int irq, void *data) { int cpu = smp_processor_id(); struct clock_event_device *cd; diff --git a/arch/loongarch/kernel/topology.c b/arch/loongarch/kernel/topology.c index caa7cd859078..3fd166006698 100644 --- a/arch/loongarch/kernel/topology.c +++ b/arch/loongarch/kernel/topology.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include <linux/acpi.h> #include <linux/cpu.h> #include <linux/cpumask.h> #include <linux/init.h> @@ -7,6 +8,8 @@ #include <linux/percpu.h> #include <asm/bootinfo.h> +#include <acpi/processor.h> + static DEFINE_PER_CPU(struct cpu, cpu_devices); #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 65214774ef7c..aebfc3733a76 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -25,7 +25,6 @@ #include <linux/ptrace.h> #include <linux/kgdb.h> #include <linux/kdebug.h> -#include <linux/kprobes.h> #include <linux/notifier.h> #include <linux/irq.h> #include <linux/perf_event.h> @@ -35,6 +34,7 @@ #include <asm/branch.h> #include <asm/break.h> #include <asm/cpu.h> +#include <asm/exception.h> #include <asm/fpu.h> #include <asm/lbt.h> #include <asm/inst.h> @@ -53,21 +53,6 @@ #include "access-helper.h" -extern asmlinkage void handle_ade(void); -extern asmlinkage void handle_ale(void); -extern asmlinkage void handle_bce(void); -extern asmlinkage void handle_sys(void); -extern asmlinkage void handle_bp(void); -extern asmlinkage void handle_ri(void); -extern asmlinkage void handle_fpu(void); -extern asmlinkage void handle_fpe(void); -extern asmlinkage void handle_lbt(void); -extern asmlinkage void handle_lsx(void); -extern asmlinkage void handle_lasx(void); -extern asmlinkage void handle_reserved(void); -extern asmlinkage void handle_watch(void); -extern asmlinkage void handle_vint(void); - static void show_backtrace(struct task_struct *task, const struct pt_regs *regs, const char *loglvl, bool user) { @@ -439,8 +424,8 @@ static inline void setup_vint_size(unsigned int size) * happen together with Overflow or Underflow, and `ptrace' can set * any bits. */ -void force_fcsr_sig(unsigned long fcsr, void __user *fault_addr, - struct task_struct *tsk) +static void force_fcsr_sig(unsigned long fcsr, + void __user *fault_addr, struct task_struct *tsk) { int si_code = FPE_FLTUNK; @@ -458,7 +443,7 @@ void force_fcsr_sig(unsigned long fcsr, void __user *fault_addr, force_sig_fault(SIGFPE, si_code, fault_addr); } -int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcsr) +static int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcsr) { int si_code; @@ -824,7 +809,7 @@ out: asmlinkage void noinstr do_ri(struct pt_regs *regs) { int status = SIGILL; - unsigned int opcode = 0; + unsigned int __maybe_unused opcode; unsigned int __user *era = (unsigned int __user *)exception_era(regs); irqentry_state_t state = irqentry_enter(regs); diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index b1686afcf876..bb2ec86f37a8 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -53,33 +53,6 @@ SECTIONS . = ALIGN(PECOFF_SEGMENT_ALIGN); _etext = .; - /* - * struct alt_inst entries. From the header (alternative.h): - * "Alternative instructions for different CPU types or capabilities" - * Think locking instructions on spinlocks. - */ - . = ALIGN(4); - .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { - __alt_instructions = .; - *(.altinstructions) - __alt_instructions_end = .; - } - -#ifdef CONFIG_RELOCATABLE - . = ALIGN(8); - .la_abs : AT(ADDR(.la_abs) - LOAD_OFFSET) { - __la_abs_begin = .; - *(.la_abs) - __la_abs_end = .; - } -#endif - - .got : ALIGN(16) { *(.got) } - .plt : ALIGN(16) { *(.plt) } - .got.plt : ALIGN(16) { *(.got.plt) } - - .data.rel : { *(.data.rel*) } - . = ALIGN(PECOFF_SEGMENT_ALIGN); __init_begin = .; __inittext_begin = .; @@ -94,6 +67,18 @@ SECTIONS __initdata_begin = .; + /* + * struct alt_inst entries. From the header (alternative.h): + * "Alternative instructions for different CPU types or capabilities" + * Think locking instructions on spinlocks. + */ + . = ALIGN(4); + .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { + __alt_instructions = .; + *(.altinstructions) + __alt_instructions_end = .; + } + INIT_DATA_SECTION(16) .exit.data : { EXIT_DATA @@ -113,6 +98,11 @@ SECTIONS _sdata = .; RO_DATA(4096) + + .got : ALIGN(16) { *(.got) } + .plt : ALIGN(16) { *(.plt) } + .got.plt : ALIGN(16) { *(.got.plt) } + RW_DATA(1 << CONFIG_L1_CACHE_SHIFT, PAGE_SIZE, THREAD_SIZE) .rela.dyn : ALIGN(8) { @@ -121,6 +111,17 @@ SECTIONS __rela_dyn_end = .; } + .data.rel : { *(.data.rel*) } + +#ifdef CONFIG_RELOCATABLE + . = ALIGN(8); + .la_abs : AT(ADDR(.la_abs) - LOAD_OFFSET) { + __la_abs_begin = .; + *(.la_abs) + __la_abs_end = .; + } +#endif + .sdata : { *(.sdata) } diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c index e6376e3dce86..1fc2f6813ea0 100644 --- a/arch/loongarch/mm/fault.c +++ b/arch/loongarch/mm/fault.c @@ -20,12 +20,12 @@ #include <linux/mm.h> #include <linux/smp.h> #include <linux/kdebug.h> -#include <linux/kprobes.h> #include <linux/perf_event.h> #include <linux/uaccess.h> #include <linux/kfence.h> #include <asm/branch.h> +#include <asm/exception.h> #include <asm/mmu_context.h> #include <asm/ptrace.h> diff --git a/arch/loongarch/mm/hugetlbpage.c b/arch/loongarch/mm/hugetlbpage.c index ba138117b124..1e76fcb83093 100644 --- a/arch/loongarch/mm/hugetlbpage.c +++ b/arch/loongarch/mm/hugetlbpage.c @@ -50,18 +50,6 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, return (pte_t *) pmd; } -/* - * This function checks for proper alignment of input addr and len parameters. - */ -int is_aligned_hugepage_range(unsigned long addr, unsigned long len) -{ - if (len & ~HPAGE_MASK) - return -EINVAL; - if (addr & ~HPAGE_MASK) - return -EINVAL; - return 0; -} - int pmd_huge(pmd_t pmd) { return (pmd_val(pmd) & _PAGE_HUGE) != 0; diff --git a/arch/loongarch/mm/ioremap.c b/arch/loongarch/mm/ioremap.c index 73b0980ab6f5..70ca73019811 100644 --- a/arch/loongarch/mm/ioremap.c +++ b/arch/loongarch/mm/ioremap.c @@ -4,6 +4,7 @@ */ #include <asm/io.h> +#include <asm-generic/early_ioremap.h> void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size) { diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c index da68bc1a4643..cc3e81fe0186 100644 --- a/arch/loongarch/mm/kasan_init.c +++ b/arch/loongarch/mm/kasan_init.c @@ -35,6 +35,57 @@ static pgd_t kasan_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); bool kasan_early_stage = true; +void *kasan_mem_to_shadow(const void *addr) +{ + if (!kasan_arch_is_ready()) { + return (void *)(kasan_early_shadow_page); + } else { + unsigned long maddr = (unsigned long)addr; + unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff; + unsigned long offset = 0; + + maddr &= XRANGE_SHADOW_MASK; + switch (xrange) { + case XKPRANGE_CC_SEG: + offset = XKPRANGE_CC_SHADOW_OFFSET; + break; + case XKPRANGE_UC_SEG: + offset = XKPRANGE_UC_SHADOW_OFFSET; + break; + case XKVRANGE_VC_SEG: + offset = XKVRANGE_VC_SHADOW_OFFSET; + break; + default: + WARN_ON(1); + return NULL; + } + + return (void *)((maddr >> KASAN_SHADOW_SCALE_SHIFT) + offset); + } +} + +const void *kasan_shadow_to_mem(const void *shadow_addr) +{ + unsigned long addr = (unsigned long)shadow_addr; + + if (unlikely(addr > KASAN_SHADOW_END) || + unlikely(addr < KASAN_SHADOW_START)) { + WARN_ON(1); + return NULL; + } + + if (addr >= XKVRANGE_VC_SHADOW_OFFSET) + return (void *)(((addr - XKVRANGE_VC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKVRANGE_VC_START); + else if (addr >= XKPRANGE_UC_SHADOW_OFFSET) + return (void *)(((addr - XKPRANGE_UC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_UC_START); + else if (addr >= XKPRANGE_CC_SHADOW_OFFSET) + return (void *)(((addr - XKPRANGE_CC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_CC_START); + else { + WARN_ON(1); + return NULL; + } +} + /* * Alloc memory for shadow memory page table. */ diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c index eb8572e201ea..2c0a411f23aa 100644 --- a/arch/loongarch/mm/tlb.c +++ b/arch/loongarch/mm/tlb.c @@ -261,7 +261,7 @@ unsigned long pcpu_handlers[NR_CPUS]; #endif extern long exception_handlers[VECSIZE * 128 / sizeof(long)]; -void setup_tlb_handler(int cpu) +static void setup_tlb_handler(int cpu) { setup_ptwalker(); local_flush_tlb_all(); diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 54b9387c3691..3aaadfd2c8eb 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -255,7 +255,7 @@ config PPC select HAVE_KPROBES select HAVE_KPROBES_ON_FTRACE select HAVE_KRETPROBES - select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if HAVE_OBJTOOL_MCOUNT + select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if HAVE_OBJTOOL_MCOUNT && (!ARCH_USING_PATCHABLE_FUNCTION_ENTRY || (!CC_IS_GCC || GCC_VERSION >= 110100)) select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index b8513dc3e53a..a1318ce18d0e 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -230,13 +230,15 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) struct arch_hw_breakpoint *info; int i; + preempt_disable(); + for (i = 0; i < nr_wp_slots(); i++) { struct perf_event *bp = __this_cpu_read(bp_per_reg[i]); if (unlikely(bp && counter_arch_bp(bp)->perf_single_step)) goto reset; } - return; + goto out; reset: regs_set_return_msr(regs, regs->msr & ~MSR_SE); @@ -245,6 +247,9 @@ reset: __set_breakpoint(i, info); info->perf_single_step = false; } + +out: + preempt_enable(); } static bool is_larx_stcx_instr(int type) @@ -363,6 +368,11 @@ static void handle_p10dd1_spurious_exception(struct perf_event **bp, } } +/* + * Handle a DABR or DAWR exception. + * + * Called in atomic context. + */ int hw_breakpoint_handler(struct die_args *args) { bool err = false; @@ -490,6 +500,8 @@ NOKPROBE_SYMBOL(hw_breakpoint_handler); /* * Handle single-step exceptions following a DABR hit. + * + * Called in atomic context. */ static int single_step_dabr_instruction(struct die_args *args) { @@ -541,6 +553,8 @@ NOKPROBE_SYMBOL(single_step_dabr_instruction); /* * Handle debug exception notifications. + * + * Called in atomic context. */ int hw_breakpoint_exceptions_notify( struct notifier_block *unused, unsigned long val, void *data) diff --git a/arch/powerpc/kernel/hw_breakpoint_constraints.c b/arch/powerpc/kernel/hw_breakpoint_constraints.c index a74623025f3a..9e51801c4915 100644 --- a/arch/powerpc/kernel/hw_breakpoint_constraints.c +++ b/arch/powerpc/kernel/hw_breakpoint_constraints.c @@ -131,8 +131,13 @@ void wp_get_instr_detail(struct pt_regs *regs, ppc_inst_t *instr, int *type, int *size, unsigned long *ea) { struct instruction_op op; + int err; - if (__get_user_instr(*instr, (void __user *)regs->nip)) + pagefault_disable(); + err = __get_user_instr(*instr, (void __user *)regs->nip); + pagefault_enable(); + + if (err) return; analyse_instr(&op, regs, *instr); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index eeff136b83d9..64ff37721fd0 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1512,23 +1512,11 @@ static void do_program_check(struct pt_regs *regs) return; } - if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE) && user_mode(regs)) { - ppc_inst_t insn; - - if (get_user_instr(insn, (void __user *)regs->nip)) { - _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); - return; - } - - if (ppc_inst_primary_opcode(insn) == 31 && - get_xop(ppc_inst_val(insn)) == OP_31_XOP_HASHCHK) { - _exception(SIGILL, regs, ILL_ILLOPN, regs->nip); - return; - } + /* User mode considers other cases after enabling IRQs */ + if (!user_mode(regs)) { + _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); + return; } - - _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); - return; } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (reason & REASON_TM) { @@ -1561,16 +1549,44 @@ static void do_program_check(struct pt_regs *regs) /* * If we took the program check in the kernel skip down to sending a - * SIGILL. The subsequent cases all relate to emulating instructions - * which we should only do for userspace. We also do not want to enable - * interrupts for kernel faults because that might lead to further - * faults, and loose the context of the original exception. + * SIGILL. The subsequent cases all relate to user space, such as + * emulating instructions which we should only do for user space. We + * also do not want to enable interrupts for kernel faults because that + * might lead to further faults, and loose the context of the original + * exception. */ if (!user_mode(regs)) goto sigill; interrupt_cond_local_irq_enable(regs); + /* + * (reason & REASON_TRAP) is mostly handled before enabling IRQs, + * except get_user_instr() can sleep so we cannot reliably inspect the + * current instruction in that context. Now that we know we are + * handling a user space trap and can sleep, we can check if the trap + * was a hashchk failure. + */ + if (reason & REASON_TRAP) { + if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE)) { + ppc_inst_t insn; + + if (get_user_instr(insn, (void __user *)regs->nip)) { + _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); + return; + } + + if (ppc_inst_primary_opcode(insn) == 31 && + get_xop(ppc_inst_val(insn)) == OP_31_XOP_HASHCHK) { + _exception(SIGILL, regs, ILL_ILLOPN, regs->nip); + return; + } + } + + _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); + return; + } + /* (reason & REASON_ILLEGAL) would be the obvious thing here, * but there seems to be a hardware bug on the 405GP (RevD) * that means ESR is sometimes set incorrectly - either to diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 317175791d23..3449be7c0d51 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -1418,7 +1418,7 @@ static int h_24x7_event_init(struct perf_event *event) } domain = event_get_domain(event); - if (domain >= HV_PERF_DOMAIN_MAX) { + if (domain == 0 || domain >= HV_PERF_DOMAIN_MAX) { pr_devel("invalid domain %d\n", domain); return -EINVAL; } diff --git a/arch/powerpc/platforms/82xx/Kconfig b/arch/powerpc/platforms/82xx/Kconfig index d9f1a2a83158..1824536cf6f2 100644 --- a/arch/powerpc/platforms/82xx/Kconfig +++ b/arch/powerpc/platforms/82xx/Kconfig @@ -2,6 +2,7 @@ menuconfig PPC_82xx bool "82xx-based boards (PQ II)" depends on PPC_BOOK3S_32 + select FSL_SOC if PPC_82xx @@ -9,7 +10,6 @@ config EP8248E bool "Embedded Planet EP8248E (a.k.a. CWH-PPC-8248N-VE)" select CPM2 select PPC_INDIRECT_PCI if PCI - select FSL_SOC select PHYLIB if NETDEVICES select MDIO_BITBANG if PHYLIB help @@ -22,7 +22,6 @@ config MGCOGE bool "Keymile MGCOGE" select CPM2 select PPC_INDIRECT_PCI if PCI - select FSL_SOC help This enables support for the Keymile MGCOGE board. diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index 1b7e9fa265cb..b7e0e03c69b1 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -460,8 +460,11 @@ static int riscv_vcpu_get_isa_ext_single(struct kvm_vcpu *vcpu, reg_num >= ARRAY_SIZE(kvm_isa_ext_arr)) return -ENOENT; - *reg_val = 0; host_isa_ext = kvm_isa_ext_arr[reg_num]; + if (!__riscv_isa_extension_available(NULL, host_isa_ext)) + return -ENOENT; + + *reg_val = 0; if (__riscv_isa_extension_available(vcpu->arch.isa, host_isa_ext)) *reg_val = 1; /* Mark the given extension as available */ @@ -842,7 +845,7 @@ static int copy_isa_ext_reg_indices(const struct kvm_vcpu *vcpu, u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_ISA_EXT | i; isa_ext = kvm_isa_ext_arr[i]; - if (!__riscv_isa_extension_available(vcpu->arch.isa, isa_ext)) + if (!__riscv_isa_extension_available(NULL, isa_ext)) continue; if (uindices) { diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index af2fbe48e16c..438cd92e6080 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -40,23 +40,25 @@ CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y +CONFIG_KEXEC_FILE=y +CONFIG_KEXEC_SIG=y +CONFIG_CRASH_DUMP=y CONFIG_LIVEPATCH=y CONFIG_MARCH_ZEC12=y CONFIG_TUNE_ZEC12=y CONFIG_NR_CPUS=512 CONFIG_NUMA=y CONFIG_HZ_100=y -CONFIG_KEXEC_FILE=y -CONFIG_KEXEC_SIG=y +CONFIG_CERT_STORE=y CONFIG_EXPOLINE=y CONFIG_EXPOLINE_AUTO=y CONFIG_CHSC_SCH=y CONFIG_VFIO_CCW=m CONFIG_VFIO_AP=m -CONFIG_CRASH_DUMP=y CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y CONFIG_CMM=m CONFIG_APPLDATA_BASE=y +CONFIG_S390_HYPFS_FS=y CONFIG_KVM=m CONFIG_S390_UNWIND_SELFTEST=m CONFIG_S390_KPROBES_SANITY_TEST=m @@ -434,6 +436,7 @@ CONFIG_SCSI_DH_EMC=m CONFIG_SCSI_DH_ALUA=m CONFIG_MD=y CONFIG_BLK_DEV_MD=y +# CONFIG_MD_BITMAP_FILE is not set CONFIG_MD_LINEAR=m CONFIG_MD_MULTIPATH=m CONFIG_MD_FAULTY=m @@ -577,6 +580,7 @@ CONFIG_SOFT_WATCHDOG=m CONFIG_DIAG288_WATCHDOG=m # CONFIG_DRM_DEBUG_MODESET_LOCK is not set CONFIG_FB=y +# CONFIG_FB_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y # CONFIG_HID_SUPPORT is not set @@ -647,6 +651,7 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_TMPFS_INODE64=y +CONFIG_TMPFS_QUOTA=y CONFIG_HUGETLBFS=y CONFIG_ECRYPT_FS=m CONFIG_CRAMFS=m @@ -703,6 +708,7 @@ CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor" CONFIG_INIT_STACK_NONE=y +CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_USER=m # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set CONFIG_CRYPTO_PCRYPT=m @@ -825,9 +831,9 @@ CONFIG_LOCK_STAT=y CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_DEBUG_LOCKING_API_SELFTESTS=y CONFIG_DEBUG_IRQFLAGS=y +CONFIG_DEBUG_LIST=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y -CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_DEBUG_CREDENTIALS=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_REF_SCALE_TEST=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 3f263b767a4c..1b8150e50f6a 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -38,23 +38,25 @@ CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y +CONFIG_KEXEC_FILE=y +CONFIG_KEXEC_SIG=y +CONFIG_CRASH_DUMP=y CONFIG_LIVEPATCH=y CONFIG_MARCH_ZEC12=y CONFIG_TUNE_ZEC12=y CONFIG_NR_CPUS=512 CONFIG_NUMA=y CONFIG_HZ_100=y -CONFIG_KEXEC_FILE=y -CONFIG_KEXEC_SIG=y +CONFIG_CERT_STORE=y CONFIG_EXPOLINE=y CONFIG_EXPOLINE_AUTO=y CONFIG_CHSC_SCH=y CONFIG_VFIO_CCW=m CONFIG_VFIO_AP=m -CONFIG_CRASH_DUMP=y CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y CONFIG_CMM=m CONFIG_APPLDATA_BASE=y +CONFIG_S390_HYPFS_FS=y CONFIG_KVM=m CONFIG_S390_UNWIND_SELFTEST=m CONFIG_S390_KPROBES_SANITY_TEST=m @@ -424,6 +426,7 @@ CONFIG_SCSI_DH_EMC=m CONFIG_SCSI_DH_ALUA=m CONFIG_MD=y CONFIG_BLK_DEV_MD=y +# CONFIG_MD_BITMAP_FILE is not set CONFIG_MD_LINEAR=m CONFIG_MD_MULTIPATH=m CONFIG_MD_FAULTY=m @@ -566,6 +569,7 @@ CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=m CONFIG_DIAG288_WATCHDOG=m CONFIG_FB=y +# CONFIG_FB_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y # CONFIG_HID_SUPPORT is not set @@ -632,6 +636,7 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_TMPFS_INODE64=y +CONFIG_TMPFS_QUOTA=y CONFIG_HUGETLBFS=y CONFIG_CONFIGFS_FS=m CONFIG_ECRYPT_FS=m @@ -687,6 +692,7 @@ CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor" CONFIG_INIT_STACK_NONE=y +CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_USER=m # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set @@ -781,7 +787,6 @@ CONFIG_PTDUMP_DEBUGFS=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_PANIC_ON_OOPS=y CONFIG_TEST_LOCKUP=m -CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_REF_SCALE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=60 diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index e62fb2015102..b831083b4edd 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -8,6 +8,7 @@ CONFIG_BPF_SYSCALL=y # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_CRASH_DUMP=y CONFIG_MARCH_ZEC12=y CONFIG_TUNE_ZEC12=y # CONFIG_COMPAT is not set @@ -15,9 +16,8 @@ CONFIG_NR_CPUS=2 CONFIG_HZ_100=y # CONFIG_CHSC_SCH is not set # CONFIG_SCM_BUS is not set -CONFIG_CRASH_DUMP=y # CONFIG_PFAULT is not set -# CONFIG_S390_HYPFS_FS is not set +# CONFIG_S390_HYPFS is not set # CONFIG_VIRTUALIZATION is not set # CONFIG_S390_GUEST is not set # CONFIG_SECCOMP is not set diff --git a/arch/s390/kernel/cert_store.c b/arch/s390/kernel/cert_store.c index 3986a044eb36..554447768bdd 100644 --- a/arch/s390/kernel/cert_store.c +++ b/arch/s390/kernel/cert_store.c @@ -432,15 +432,16 @@ static char *get_key_description(struct vcssb *vcssb, const struct vce *vce) char *desc; cs_token = vcssb->cs_token; - /* Description string contains "%64s:%04u:%08u\0". */ + /* Description string contains "%64s:%05u:%010u\0". */ name_len = sizeof(vce->vce_hdr.vc_name); - len = name_len + 1 + 4 + 1 + 8 + 1; + len = name_len + 1 + 5 + 1 + 10 + 1; desc = kmalloc(len, GFP_KERNEL); if (!desc) return NULL; memcpy(desc, vce->vce_hdr.vc_name, name_len); - sprintf(desc + name_len, ":%04u:%08u", vce->vce_hdr.vc_index, cs_token); + snprintf(desc + name_len, len - name_len, ":%05u:%010u", + vce->vce_hdr.vc_index, cs_token); return desc; } diff --git a/arch/sh/mm/ioremap.c b/arch/sh/mm/ioremap.c index c33b3daa4ad1..33d20f34560f 100644 --- a/arch/sh/mm/ioremap.c +++ b/arch/sh/mm/ioremap.c @@ -72,8 +72,8 @@ __ioremap_29bit(phys_addr_t offset, unsigned long size, pgprot_t prot) #define __ioremap_29bit(offset, size, prot) NULL #endif /* CONFIG_29BIT */ -void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, - unsigned long prot) +void __iomem __ref *ioremap_prot(phys_addr_t phys_addr, size_t size, + unsigned long prot) { void __iomem *mapped; pgprot_t pgprot = __pgprot(prot); diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 6c2826417b33..93c60c0c9d4a 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -294,7 +294,7 @@ static void __xen_pv_evtchn_do_upcall(struct pt_regs *regs) inc_irq_stat(irq_hv_callback_count); - xen_hvm_evtchn_do_upcall(); + xen_evtchn_do_upcall(); set_irq_regs(old_regs); } diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1a4def36d5bb..17715cb8731d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1419,7 +1419,6 @@ struct kvm_arch { * the thread holds the MMU lock in write mode. */ spinlock_t tdp_mmu_pages_lock; - struct workqueue_struct *tdp_mmu_zap_wq; #endif /* CONFIG_X86_64 */ /* @@ -1835,7 +1834,7 @@ void kvm_mmu_vendor_module_exit(void); void kvm_mmu_destroy(struct kvm_vcpu *vcpu); int kvm_mmu_create(struct kvm_vcpu *vcpu); -int kvm_mmu_init_vm(struct kvm *kvm); +void kvm_mmu_init_vm(struct kvm *kvm); void kvm_mmu_uninit_vm(struct kvm *kvm); void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu); diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 5ff49fd67732..571fe4d2d232 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -105,6 +105,13 @@ CFI_POST_PADDING \ SYM_FUNC_END(__cfi_##name) +/* UML needs to be able to override memcpy() and friends for KASAN. */ +#ifdef CONFIG_UML +# define SYM_FUNC_ALIAS_MEMFUNC SYM_FUNC_ALIAS_WEAK +#else +# define SYM_FUNC_ALIAS_MEMFUNC SYM_FUNC_ALIAS +#endif + /* SYM_TYPED_FUNC_START -- use for indirectly called globals, w/ CFI type */ #define SYM_TYPED_FUNC_START(name) \ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_F_ALIGN) \ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 416901d406f8..8dac45a2c7fc 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -186,8 +186,7 @@ do { \ #else #define deactivate_mm(tsk, mm) \ do { \ - if (!tsk->vfork_done) \ - shstk_free(tsk); \ + shstk_free(tsk); \ load_gs_index(0); \ loadsegment(fs, 0); \ } while (0) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 4acbcddddc29..772d03487520 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -9,13 +9,6 @@ struct paravirt_patch_site { u8 type; /* type of this instruction */ u8 len; /* length of original instruction */ }; - -/* Lazy mode for batching updates / context switch */ -enum paravirt_lazy_mode { - PARAVIRT_LAZY_NONE, - PARAVIRT_LAZY_MMU, - PARAVIRT_LAZY_CPU, -}; #endif #ifdef CONFIG_PARAVIRT @@ -549,14 +542,6 @@ int paravirt_disable_iospace(void); __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) -enum paravirt_lazy_mode paravirt_get_lazy_mode(void); -void paravirt_start_context_switch(struct task_struct *prev); -void paravirt_end_context_switch(struct task_struct *next); - -void paravirt_enter_lazy_mmu(void); -void paravirt_leave_lazy_mmu(void); -void paravirt_flush_lazy_mmu(void); - void _paravirt_nop(void); void paravirt_BUG(void); unsigned long paravirt_ret0(void); diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 0086920cda06..a3669a7774ed 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -683,13 +683,11 @@ extern u16 get_llc_id(unsigned int cpu); #ifdef CONFIG_CPU_SUP_AMD extern u32 amd_get_nodes_per_socket(void); extern u32 amd_get_highest_perf(void); -extern bool cpu_has_ibpb_brtype_microcode(void); extern void amd_clear_divider(void); extern void amd_check_microcode(void); #else static inline u32 amd_get_nodes_per_socket(void) { return 0; } static inline u32 amd_get_highest_perf(void) { return 0; } -static inline bool cpu_has_ibpb_brtype_microcode(void) { return false; } static inline void amd_clear_divider(void) { } static inline void amd_check_microcode(void) { } #endif diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 5fc35f889cd1..7048dfacc04b 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -36,6 +36,7 @@ extern struct shared_info *HYPERVISOR_shared_info; extern struct start_info *xen_start_info; +#include <asm/bug.h> #include <asm/processor.h> #define XEN_SIGNATURE "XenVMMXenVMM" @@ -63,4 +64,40 @@ void __init xen_pvh_init(struct boot_params *boot_params); void __init mem_map_via_hcall(struct boot_params *boot_params_p); #endif +/* Lazy mode for batching updates / context switch */ +enum xen_lazy_mode { + XEN_LAZY_NONE, + XEN_LAZY_MMU, + XEN_LAZY_CPU, +}; + +DECLARE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode); +DECLARE_PER_CPU(unsigned int, xen_lazy_nesting); + +static inline void enter_lazy(enum xen_lazy_mode mode) +{ + enum xen_lazy_mode old_mode = this_cpu_read(xen_lazy_mode); + + if (mode == old_mode) { + this_cpu_inc(xen_lazy_nesting); + return; + } + + BUG_ON(old_mode != XEN_LAZY_NONE); + + this_cpu_write(xen_lazy_mode, mode); +} + +static inline void leave_lazy(enum xen_lazy_mode mode) +{ + BUG_ON(this_cpu_read(xen_lazy_mode) != mode); + + if (this_cpu_read(xen_lazy_nesting) == 0) + this_cpu_write(xen_lazy_mode, XEN_LAZY_NONE); + else + this_cpu_dec(xen_lazy_nesting); +} + +enum xen_lazy_mode xen_get_lazy_mode(void); + #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index a5ead6a6d233..517ee01503be 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -720,13 +720,8 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) { s32 *s; - /* - * Do not patch out the default return thunks if those needed are the - * ones generated by the compiler. - */ - if (cpu_feature_enabled(X86_FEATURE_RETHUNK) && - (x86_return_thunk == __x86_return_thunk)) - return; + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + static_call_force_reinit(); for (s = start; s < end; s++) { void *dest = NULL, *addr = (void *)s + *s; diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index c06bfc086565..faa9f2299848 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -272,7 +272,6 @@ void __init callthunks_patch_builtin_calls(void) pr_info("Setting up call depth tracking\n"); mutex_lock(&text_mutex); callthunks_setup(&cs, &builtin_coretext); - static_call_force_reinit(); thunks_initialized = true; mutex_unlock(&text_mutex); } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index dd8379d84445..03ef962a6992 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -766,6 +766,15 @@ static void early_init_amd(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_TOPOEXT)) smp_num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1; + + if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_IBPB_BRTYPE)) { + if (c->x86 == 0x17 && boot_cpu_has(X86_FEATURE_AMD_IBPB)) + setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE); + else if (c->x86 >= 0x19 && !wrmsrl_safe(MSR_IA32_PRED_CMD, PRED_CMD_SBPB)) { + setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE); + setup_force_cpu_cap(X86_FEATURE_SBPB); + } + } } static void init_amd_k8(struct cpuinfo_x86 *c) @@ -1301,25 +1310,6 @@ void amd_check_microcode(void) on_each_cpu(zenbleed_check_cpu, NULL, 1); } -bool cpu_has_ibpb_brtype_microcode(void) -{ - switch (boot_cpu_data.x86) { - /* Zen1/2 IBPB flushes branch type predictions too. */ - case 0x17: - return boot_cpu_has(X86_FEATURE_AMD_IBPB); - case 0x19: - /* Poke the MSR bit on Zen3/4 to check its presence. */ - if (!wrmsrl_safe(MSR_IA32_PRED_CMD, PRED_CMD_SBPB)) { - setup_force_cpu_cap(X86_FEATURE_SBPB); - return true; - } else { - return false; - } - default: - return false; - } -} - /* * Issue a DIV 0/1 insn to clear any division data from previous DIV * operations. diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index f081d26616ac..10499bcd4e39 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -2404,27 +2404,16 @@ early_param("spec_rstack_overflow", srso_parse_cmdline); static void __init srso_select_mitigation(void) { - bool has_microcode; + bool has_microcode = boot_cpu_has(X86_FEATURE_IBPB_BRTYPE); if (!boot_cpu_has_bug(X86_BUG_SRSO) || cpu_mitigations_off()) goto pred_cmd; - /* - * The first check is for the kernel running as a guest in order - * for guests to verify whether IBPB is a viable mitigation. - */ - has_microcode = boot_cpu_has(X86_FEATURE_IBPB_BRTYPE) || cpu_has_ibpb_brtype_microcode(); if (!has_microcode) { pr_warn("IBPB-extending microcode not applied!\n"); pr_warn(SRSO_NOTICE); } else { /* - * Enable the synthetic (even if in a real CPUID leaf) - * flags for guests. - */ - setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE); - - /* * Zen1/2 with SMT off aren't vulnerable after the right * IBPB microcode has been applied. */ @@ -2444,7 +2433,7 @@ static void __init srso_select_mitigation(void) switch (srso_cmd) { case SRSO_CMD_OFF: - return; + goto pred_cmd; case SRSO_CMD_MICROCODE: if (has_microcode) { @@ -2717,7 +2706,7 @@ static ssize_t srso_show_state(char *buf) return sysfs_emit(buf, "%s%s\n", srso_strings[srso_mitigation], - (cpu_has_ibpb_brtype_microcode() ? "" : ", no microcode")); + boot_cpu_has(X86_FEATURE_IBPB_BRTYPE) ? "" : ", no microcode"); } static ssize_t gds_show_state(char *buf) diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 975f98d5eee5..97f1436c1a20 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -143,66 +143,7 @@ int paravirt_disable_iospace(void) return request_resource(&ioport_resource, &reserve_ioports); } -static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; - -static inline void enter_lazy(enum paravirt_lazy_mode mode) -{ - BUG_ON(this_cpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); - - this_cpu_write(paravirt_lazy_mode, mode); -} - -static void leave_lazy(enum paravirt_lazy_mode mode) -{ - BUG_ON(this_cpu_read(paravirt_lazy_mode) != mode); - - this_cpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE); -} - -void paravirt_enter_lazy_mmu(void) -{ - enter_lazy(PARAVIRT_LAZY_MMU); -} - -void paravirt_leave_lazy_mmu(void) -{ - leave_lazy(PARAVIRT_LAZY_MMU); -} - -void paravirt_flush_lazy_mmu(void) -{ - preempt_disable(); - - if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { - arch_leave_lazy_mmu_mode(); - arch_enter_lazy_mmu_mode(); - } - - preempt_enable(); -} - #ifdef CONFIG_PARAVIRT_XXL -void paravirt_start_context_switch(struct task_struct *prev) -{ - BUG_ON(preemptible()); - - if (this_cpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) { - arch_leave_lazy_mmu_mode(); - set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES); - } - enter_lazy(PARAVIRT_LAZY_CPU); -} - -void paravirt_end_context_switch(struct task_struct *next) -{ - BUG_ON(preemptible()); - - leave_lazy(PARAVIRT_LAZY_CPU); - - if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES)) - arch_enter_lazy_mmu_mode(); -} - static noinstr void pv_native_write_cr2(unsigned long val) { native_write_cr2(val); @@ -229,14 +170,6 @@ static noinstr void pv_native_safe_halt(void) } #endif -enum paravirt_lazy_mode paravirt_get_lazy_mode(void) -{ - if (in_interrupt()) - return PARAVIRT_LAZY_NONE; - - return this_cpu_read(paravirt_lazy_mode); -} - struct pv_info pv_info = { .name = "bare hardware", #ifdef CONFIG_PARAVIRT_XXL diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 9f0909142a0a..b6f4e8399fca 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -257,13 +257,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) if (!ret && unlikely(test_tsk_thread_flag(current, TIF_IO_BITMAP))) io_bitmap_share(p); - /* - * If copy_thread() if failing, don't leak the shadow stack possibly - * allocated in shstk_alloc_thread_stack() above. - */ - if (ret) - shstk_free(p); - return ret; } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b9145a63da77..b098b1fa2470 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -358,15 +358,11 @@ static void __init add_early_ima_buffer(u64 phys_addr) #if defined(CONFIG_HAVE_IMA_KEXEC) && !defined(CONFIG_OF_FLATTREE) int __init ima_free_kexec_buffer(void) { - int rc; - if (!ima_kexec_buffer_size) return -ENOENT; - rc = memblock_phys_free(ima_kexec_buffer_phys, - ima_kexec_buffer_size); - if (rc) - return rc; + memblock_free_late(ima_kexec_buffer_phys, + ima_kexec_buffer_size); ima_kexec_buffer_phys = 0; ima_kexec_buffer_size = 0; diff --git a/arch/x86/kernel/shstk.c b/arch/x86/kernel/shstk.c index fd689921a1db..59e15dd8d0f8 100644 --- a/arch/x86/kernel/shstk.c +++ b/arch/x86/kernel/shstk.c @@ -205,10 +205,21 @@ unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, unsigned long cl return 0; /* - * For CLONE_VM, except vfork, the child needs a separate shadow + * For CLONE_VFORK the child will share the parents shadow stack. + * Make sure to clear the internal tracking of the thread shadow + * stack so the freeing logic run for child knows to leave it alone. + */ + if (clone_flags & CLONE_VFORK) { + shstk->base = 0; + shstk->size = 0; + return 0; + } + + /* + * For !CLONE_VM the child will use a copy of the parents shadow * stack. */ - if ((clone_flags & (CLONE_VFORK | CLONE_VM)) != CLONE_VM) + if (!(clone_flags & CLONE_VM)) return 0; size = adjust_shstk_size(stack_size); @@ -408,7 +419,25 @@ void shstk_free(struct task_struct *tsk) if (!tsk->mm || tsk->mm != current->mm) return; + /* + * If shstk->base is NULL, then this task is not managing its + * own shadow stack (CLONE_VFORK). So skip freeing it. + */ + if (!shstk->base) + return; + + /* + * shstk->base is NULL for CLONE_VFORK child tasks, and so is + * normal. But size = 0 on a shstk->base is not normal and + * indicated an attempt to free the thread shadow stack twice. + * Warn about it. + */ + if (WARN_ON(!shstk->size)) + return; + unmap_shadow_stack(shstk->base, shstk->size); + + shstk->size = 0; } static int wrss_control(bool enable) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index e1d011c67cc6..f7901cb4d2fa 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6167,20 +6167,15 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm) return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages)); } -int kvm_mmu_init_vm(struct kvm *kvm) +void kvm_mmu_init_vm(struct kvm *kvm) { - int r; - INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages); spin_lock_init(&kvm->arch.mmu_unsync_pages_lock); - if (tdp_mmu_enabled) { - r = kvm_mmu_init_tdp_mmu(kvm); - if (r < 0) - return r; - } + if (tdp_mmu_enabled) + kvm_mmu_init_tdp_mmu(kvm); kvm->arch.split_page_header_cache.kmem_cache = mmu_page_header_cache; kvm->arch.split_page_header_cache.gfp_zero = __GFP_ZERO; @@ -6189,8 +6184,6 @@ int kvm_mmu_init_vm(struct kvm *kvm) kvm->arch.split_desc_cache.kmem_cache = pte_list_desc_cache; kvm->arch.split_desc_cache.gfp_zero = __GFP_ZERO; - - return 0; } static void mmu_free_vm_memory_caches(struct kvm *kvm) @@ -6246,7 +6239,6 @@ static bool kvm_rmap_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_e void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) { bool flush; - int i; if (WARN_ON_ONCE(gfn_end <= gfn_start)) return; @@ -6257,11 +6249,8 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end); - if (tdp_mmu_enabled) { - for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) - flush = kvm_tdp_mmu_zap_leafs(kvm, i, gfn_start, - gfn_end, true, flush); - } + if (tdp_mmu_enabled) + flush = kvm_tdp_mmu_zap_leafs(kvm, gfn_start, gfn_end, flush); if (flush) kvm_flush_remote_tlbs_range(kvm, gfn_start, gfn_end - gfn_start); diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index b102014e2c60..decc1f153669 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -58,7 +58,12 @@ struct kvm_mmu_page { bool tdp_mmu_page; bool unsync; - u8 mmu_valid_gen; + union { + u8 mmu_valid_gen; + + /* Only accessed under slots_lock. */ + bool tdp_mmu_scheduled_root_to_zap; + }; /* * The shadow page can't be replaced by an equivalent huge page @@ -100,13 +105,7 @@ struct kvm_mmu_page { struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */ tdp_ptep_t ptep; }; - union { - DECLARE_BITMAP(unsync_child_bitmap, 512); - struct { - struct work_struct tdp_mmu_async_work; - void *tdp_mmu_async_data; - }; - }; + DECLARE_BITMAP(unsync_child_bitmap, 512); /* * Tracks shadow pages that, if zapped, would allow KVM to create an NX diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 6c63f2d1675f..6cd4dd631a2f 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -12,18 +12,10 @@ #include <trace/events/kvm.h> /* Initializes the TDP MMU for the VM, if enabled. */ -int kvm_mmu_init_tdp_mmu(struct kvm *kvm) +void kvm_mmu_init_tdp_mmu(struct kvm *kvm) { - struct workqueue_struct *wq; - - wq = alloc_workqueue("kvm", WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 0); - if (!wq) - return -ENOMEM; - INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots); spin_lock_init(&kvm->arch.tdp_mmu_pages_lock); - kvm->arch.tdp_mmu_zap_wq = wq; - return 1; } /* Arbitrarily returns true so that this may be used in if statements. */ @@ -46,20 +38,15 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) * ultimately frees all roots. */ kvm_tdp_mmu_invalidate_all_roots(kvm); - - /* - * Destroying a workqueue also first flushes the workqueue, i.e. no - * need to invoke kvm_tdp_mmu_zap_invalidated_roots(). - */ - destroy_workqueue(kvm->arch.tdp_mmu_zap_wq); + kvm_tdp_mmu_zap_invalidated_roots(kvm); WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages)); WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); /* * Ensure that all the outstanding RCU callbacks to free shadow pages - * can run before the VM is torn down. Work items on tdp_mmu_zap_wq - * can call kvm_tdp_mmu_put_root and create new callbacks. + * can run before the VM is torn down. Putting the last reference to + * zapped roots will create new callbacks. */ rcu_barrier(); } @@ -86,46 +73,6 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head) tdp_mmu_free_sp(sp); } -static void tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root, - bool shared); - -static void tdp_mmu_zap_root_work(struct work_struct *work) -{ - struct kvm_mmu_page *root = container_of(work, struct kvm_mmu_page, - tdp_mmu_async_work); - struct kvm *kvm = root->tdp_mmu_async_data; - - read_lock(&kvm->mmu_lock); - - /* - * A TLB flush is not necessary as KVM performs a local TLB flush when - * allocating a new root (see kvm_mmu_load()), and when migrating vCPU - * to a different pCPU. Note, the local TLB flush on reuse also - * invalidates any paging-structure-cache entries, i.e. TLB entries for - * intermediate paging structures, that may be zapped, as such entries - * are associated with the ASID on both VMX and SVM. - */ - tdp_mmu_zap_root(kvm, root, true); - - /* - * Drop the refcount using kvm_tdp_mmu_put_root() to test its logic for - * avoiding an infinite loop. By design, the root is reachable while - * it's being asynchronously zapped, thus a different task can put its - * last reference, i.e. flowing through kvm_tdp_mmu_put_root() for an - * asynchronously zapped root is unavoidable. - */ - kvm_tdp_mmu_put_root(kvm, root, true); - - read_unlock(&kvm->mmu_lock); -} - -static void tdp_mmu_schedule_zap_root(struct kvm *kvm, struct kvm_mmu_page *root) -{ - root->tdp_mmu_async_data = kvm; - INIT_WORK(&root->tdp_mmu_async_work, tdp_mmu_zap_root_work); - queue_work(kvm->arch.tdp_mmu_zap_wq, &root->tdp_mmu_async_work); -} - void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, bool shared) { @@ -211,8 +158,12 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, #define for_each_valid_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared) \ __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, true) -#define for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id) \ - __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, false, false) +#define for_each_tdp_mmu_root_yield_safe(_kvm, _root, _shared) \ + for (_root = tdp_mmu_next_root(_kvm, NULL, _shared, false); \ + _root; \ + _root = tdp_mmu_next_root(_kvm, _root, _shared, false)) \ + if (!kvm_lockdep_assert_mmu_lock_held(_kvm, _shared)) { \ + } else /* * Iterate over all TDP MMU roots. Requires that mmu_lock be held for write, @@ -292,7 +243,7 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu) * by a memslot update or by the destruction of the VM. Initialize the * refcount to two; one reference for the vCPU, and one reference for * the TDP MMU itself, which is held until the root is invalidated and - * is ultimately put by tdp_mmu_zap_root_work(). + * is ultimately put by kvm_tdp_mmu_zap_invalidated_roots(). */ refcount_set(&root->tdp_mmu_root_count, 2); @@ -877,13 +828,12 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root, * true if a TLB flush is needed before releasing the MMU lock, i.e. if one or * more SPTEs were zapped since the MMU lock was last acquired. */ -bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, int as_id, gfn_t start, gfn_t end, - bool can_yield, bool flush) +bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush) { struct kvm_mmu_page *root; - for_each_tdp_mmu_root_yield_safe(kvm, root, as_id) - flush = tdp_mmu_zap_leafs(kvm, root, start, end, can_yield, flush); + for_each_tdp_mmu_root_yield_safe(kvm, root, false) + flush = tdp_mmu_zap_leafs(kvm, root, start, end, true, flush); return flush; } @@ -891,7 +841,6 @@ bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, int as_id, gfn_t start, gfn_t end, void kvm_tdp_mmu_zap_all(struct kvm *kvm) { struct kvm_mmu_page *root; - int i; /* * Zap all roots, including invalid roots, as all SPTEs must be dropped @@ -905,10 +854,8 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm) * is being destroyed or the userspace VMM has exited. In both cases, * KVM_RUN is unreachable, i.e. no vCPUs will ever service the request. */ - for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { - for_each_tdp_mmu_root_yield_safe(kvm, root, i) - tdp_mmu_zap_root(kvm, root, false); - } + for_each_tdp_mmu_root_yield_safe(kvm, root, false) + tdp_mmu_zap_root(kvm, root, false); } /* @@ -917,18 +864,47 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm) */ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm) { - flush_workqueue(kvm->arch.tdp_mmu_zap_wq); + struct kvm_mmu_page *root; + + read_lock(&kvm->mmu_lock); + + for_each_tdp_mmu_root_yield_safe(kvm, root, true) { + if (!root->tdp_mmu_scheduled_root_to_zap) + continue; + + root->tdp_mmu_scheduled_root_to_zap = false; + KVM_BUG_ON(!root->role.invalid, kvm); + + /* + * A TLB flush is not necessary as KVM performs a local TLB + * flush when allocating a new root (see kvm_mmu_load()), and + * when migrating a vCPU to a different pCPU. Note, the local + * TLB flush on reuse also invalidates paging-structure-cache + * entries, i.e. TLB entries for intermediate paging structures, + * that may be zapped, as such entries are associated with the + * ASID on both VMX and SVM. + */ + tdp_mmu_zap_root(kvm, root, true); + + /* + * The referenced needs to be put *after* zapping the root, as + * the root must be reachable by mmu_notifiers while it's being + * zapped + */ + kvm_tdp_mmu_put_root(kvm, root, true); + } + + read_unlock(&kvm->mmu_lock); } /* * Mark each TDP MMU root as invalid to prevent vCPUs from reusing a root that * is about to be zapped, e.g. in response to a memslots update. The actual - * zapping is performed asynchronously. Using a separate workqueue makes it - * easy to ensure that the destruction is performed before the "fast zap" - * completes, without keeping a separate list of invalidated roots; the list is - * effectively the list of work items in the workqueue. + * zapping is done separately so that it happens with mmu_lock with read, + * whereas invalidating roots must be done with mmu_lock held for write (unless + * the VM is being destroyed). * - * Note, the asynchronous worker is gifted the TDP MMU's reference. + * Note, kvm_tdp_mmu_zap_invalidated_roots() is gifted the TDP MMU's reference. * See kvm_tdp_mmu_get_vcpu_root_hpa(). */ void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm) @@ -953,19 +929,20 @@ void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm) /* * As above, mmu_lock isn't held when destroying the VM! There can't * be other references to @kvm, i.e. nothing else can invalidate roots - * or be consuming roots, but walking the list of roots does need to be - * guarded against roots being deleted by the asynchronous zap worker. + * or get/put references to roots. */ - rcu_read_lock(); - - list_for_each_entry_rcu(root, &kvm->arch.tdp_mmu_roots, link) { + list_for_each_entry(root, &kvm->arch.tdp_mmu_roots, link) { + /* + * Note, invalid roots can outlive a memslot update! Invalid + * roots must be *zapped* before the memslot update completes, + * but a different task can acquire a reference and keep the + * root alive after its been zapped. + */ if (!root->role.invalid) { + root->tdp_mmu_scheduled_root_to_zap = true; root->role.invalid = true; - tdp_mmu_schedule_zap_root(kvm, root); } } - - rcu_read_unlock(); } /* @@ -1146,8 +1123,13 @@ retry: bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range, bool flush) { - return kvm_tdp_mmu_zap_leafs(kvm, range->slot->as_id, range->start, - range->end, range->may_block, flush); + struct kvm_mmu_page *root; + + __for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false, false) + flush = tdp_mmu_zap_leafs(kvm, root, range->start, range->end, + range->may_block, flush); + + return flush; } typedef bool (*tdp_handler_t)(struct kvm *kvm, struct tdp_iter *iter, diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index 0a63b1afabd3..733a3aef3a96 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -7,7 +7,7 @@ #include "spte.h" -int kvm_mmu_init_tdp_mmu(struct kvm *kvm); +void kvm_mmu_init_tdp_mmu(struct kvm *kvm); void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm); hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu); @@ -20,8 +20,7 @@ __must_check static inline bool kvm_tdp_mmu_get_root(struct kvm_mmu_page *root) void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, bool shared); -bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, int as_id, gfn_t start, - gfn_t end, bool can_yield, bool flush); +bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush); bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp); void kvm_tdp_mmu_zap_all(struct kvm *kvm); void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm); diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index b9a0a939d59f..4900c078045a 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2962,6 +2962,32 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) count, in); } +static void sev_es_vcpu_after_set_cpuid(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + + if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) { + bool v_tsc_aux = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) || + guest_cpuid_has(vcpu, X86_FEATURE_RDPID); + + set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, v_tsc_aux, v_tsc_aux); + } +} + +void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + struct kvm_cpuid_entry2 *best; + + /* For sev guests, the memory encryption bit is not reserved in CR3. */ + best = kvm_find_cpuid_entry(vcpu, 0x8000001F); + if (best) + vcpu->arch.reserved_gpa_bits &= ~(1UL << (best->ebx & 0x3f)); + + if (sev_es_guest(svm->vcpu.kvm)) + sev_es_vcpu_after_set_cpuid(svm); +} + static void sev_es_init_vmcb(struct vcpu_svm *svm) { struct vmcb *vmcb = svm->vmcb01.ptr; @@ -3024,14 +3050,6 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm) set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1); set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1); set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1); - - if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) && - (guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP) || - guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDPID))) { - set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, 1, 1); - if (guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP)) - svm_clr_intercept(svm, INTERCEPT_RDTSCP); - } } void sev_init_vmcb(struct vcpu_svm *svm) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index f283eb47f6ac..9507df93f410 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -683,6 +683,21 @@ static int svm_hardware_enable(void) amd_pmu_enable_virt(); + /* + * If TSC_AUX virtualization is supported, TSC_AUX becomes a swap type + * "B" field (see sev_es_prepare_switch_to_guest()) for SEV-ES guests. + * Since Linux does not change the value of TSC_AUX once set, prime the + * TSC_AUX field now to avoid a RDMSR on every vCPU run. + */ + if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) { + struct sev_es_save_area *hostsa; + u32 msr_hi; + + hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400); + + rdmsr(MSR_TSC_AUX, hostsa->tsc_aux, msr_hi); + } + return 0; } @@ -1532,7 +1547,14 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) if (tsc_scaling) __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio); - if (likely(tsc_aux_uret_slot >= 0)) + /* + * TSC_AUX is always virtualized for SEV-ES guests when the feature is + * available. The user return MSR support is not required in this case + * because TSC_AUX is restored on #VMEXIT from the host save area + * (which has been initialized in svm_hardware_enable()). + */ + if (likely(tsc_aux_uret_slot >= 0) && + (!boot_cpu_has(X86_FEATURE_V_TSC_AUX) || !sev_es_guest(vcpu->kvm))) kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull); svm->guest_state_loaded = true; @@ -3087,6 +3109,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_TSC_AUX: /* + * TSC_AUX is always virtualized for SEV-ES guests when the + * feature is available. The user return MSR support is not + * required in this case because TSC_AUX is restored on #VMEXIT + * from the host save area (which has been initialized in + * svm_hardware_enable()). + */ + if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) && sev_es_guest(vcpu->kvm)) + break; + + /* * TSC_AUX is usually changed only during boot and never read * directly. Intercept TSC_AUX instead of exposing it to the * guest via direct_access_msrs, and switch it via user return. @@ -4284,7 +4316,6 @@ static bool svm_has_emulated_msr(struct kvm *kvm, u32 index) static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - struct kvm_cpuid_entry2 *best; /* * SVM doesn't provide a way to disable just XSAVES in the guest, KVM @@ -4328,12 +4359,8 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) set_msr_interception(vcpu, svm->msrpm, MSR_IA32_FLUSH_CMD, 0, !!guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D)); - /* For sev guests, the memory encryption bit is not reserved in CR3. */ - if (sev_guest(vcpu->kvm)) { - best = kvm_find_cpuid_entry(vcpu, 0x8000001F); - if (best) - vcpu->arch.reserved_gpa_bits &= ~(1UL << (best->ebx & 0x3f)); - } + if (sev_guest(vcpu->kvm)) + sev_vcpu_after_set_cpuid(svm); init_vmcb_after_set_cpuid(vcpu); } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index f41253958357..be67ab7fdd10 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -684,6 +684,7 @@ void __init sev_hardware_setup(void); void sev_hardware_unsetup(void); int sev_cpu_init(struct svm_cpu_data *sd); void sev_init_vmcb(struct vcpu_svm *svm); +void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm); void sev_free_vcpu(struct kvm_vcpu *vcpu); int sev_handle_vmgexit(struct kvm_vcpu *vcpu); int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6c9c81e82e65..9f18b06bbda6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12308,9 +12308,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (ret) goto out; - ret = kvm_mmu_init_vm(kvm); - if (ret) - goto out_page_track; + kvm_mmu_init_vm(kvm); ret = static_call(kvm_x86_vm_init)(kvm); if (ret) @@ -12355,7 +12353,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) out_uninit_mmu: kvm_mmu_uninit_vm(kvm); -out_page_track: kvm_page_track_cleanup(kvm); out: return ret; diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 8f95fb267caa..76697df8dfd5 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -40,7 +40,7 @@ SYM_TYPED_FUNC_START(__memcpy) SYM_FUNC_END(__memcpy) EXPORT_SYMBOL(__memcpy) -SYM_FUNC_ALIAS(memcpy, __memcpy) +SYM_FUNC_ALIAS_MEMFUNC(memcpy, __memcpy) EXPORT_SYMBOL(memcpy) SYM_FUNC_START_LOCAL(memcpy_orig) diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 0559b206fb11..ccdf3a597045 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -212,5 +212,5 @@ SYM_FUNC_START(__memmove) SYM_FUNC_END(__memmove) EXPORT_SYMBOL(__memmove) -SYM_FUNC_ALIAS(memmove, __memmove) +SYM_FUNC_ALIAS_MEMFUNC(memmove, __memmove) EXPORT_SYMBOL(memmove) diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 7c59a704c458..3d818b849ec6 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -40,7 +40,7 @@ SYM_FUNC_START(__memset) SYM_FUNC_END(__memset) EXPORT_SYMBOL(__memset) -SYM_FUNC_ALIAS(memset, __memset) +SYM_FUNC_ALIAS_MEMFUNC(memset, __memset) EXPORT_SYMBOL(memset) SYM_FUNC_START_LOCAL(memset_orig) diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index 863d0d6b3edc..7250d0e0e1a9 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -138,7 +138,7 @@ void __init xen_efi_init(struct boot_params *boot_params) if (efi_systab_xen == NULL) return; - strncpy((char *)&boot_params->efi_info.efi_loader_signature, "Xen", + strscpy((char *)&boot_params->efi_info.efi_loader_signature, "Xen", sizeof(boot_params->efi_info.efi_loader_signature)); boot_params->efi_info.efi_systab = (__u32)__pa(efi_systab_xen); boot_params->efi_info.efi_systab_hi = (__u32)(__pa(efi_systab_xen) >> 32); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index b8db2148c07d..0337392a3121 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -32,7 +32,7 @@ EXPORT_SYMBOL_GPL(hypercall_page); * &HYPERVISOR_shared_info->vcpu_info[cpu]. See xen_hvm_init_shared_info * and xen_vcpu_setup for details. By default it points to share_info->vcpu_info * but during boot it is switched to point to xen_vcpu_info. - * The pointer is used in __xen_evtchn_do_upcall to acknowledge pending events. + * The pointer is used in xen_evtchn_do_upcall to acknowledge pending events. */ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 9a192f51f1b0..3f8c34707c50 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -136,7 +136,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_xen_hvm_callback) inc_irq_stat(irq_hv_callback_count); - xen_hvm_evtchn_do_upcall(); + xen_evtchn_do_upcall(); set_irq_regs(old_regs); } diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 49352fad7d1d..bbbfdd495ebd 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -101,6 +101,17 @@ struct tls_descs { struct desc_struct desc[3]; }; +DEFINE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode) = XEN_LAZY_NONE; +DEFINE_PER_CPU(unsigned int, xen_lazy_nesting); + +enum xen_lazy_mode xen_get_lazy_mode(void) +{ + if (in_interrupt()) + return XEN_LAZY_NONE; + + return this_cpu_read(xen_lazy_mode); +} + /* * Updating the 3 TLS descriptors in the GDT on every task switch is * surprisingly expensive so we avoid updating them if they haven't @@ -362,10 +373,25 @@ static noinstr unsigned long xen_get_debugreg(int reg) return HYPERVISOR_get_debugreg(reg); } +static void xen_start_context_switch(struct task_struct *prev) +{ + BUG_ON(preemptible()); + + if (this_cpu_read(xen_lazy_mode) == XEN_LAZY_MMU) { + arch_leave_lazy_mmu_mode(); + set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES); + } + enter_lazy(XEN_LAZY_CPU); +} + static void xen_end_context_switch(struct task_struct *next) { + BUG_ON(preemptible()); + xen_mc_flush(); - paravirt_end_context_switch(next); + leave_lazy(XEN_LAZY_CPU); + if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES)) + arch_enter_lazy_mmu_mode(); } static unsigned long xen_store_tr(void) @@ -472,7 +498,7 @@ static void xen_set_ldt(const void *addr, unsigned entries) MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - xen_mc_issue(PARAVIRT_LAZY_CPU); + xen_mc_issue(XEN_LAZY_CPU); } static void xen_load_gdt(const struct desc_ptr *dtr) @@ -568,7 +594,7 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu) * exception between the new %fs descriptor being loaded and * %fs being effectively cleared at __switch_to(). */ - if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) + if (xen_get_lazy_mode() == XEN_LAZY_CPU) loadsegment(fs, 0); xen_mc_batch(); @@ -577,7 +603,7 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu) load_TLS_descriptor(t, cpu, 1); load_TLS_descriptor(t, cpu, 2); - xen_mc_issue(PARAVIRT_LAZY_CPU); + xen_mc_issue(XEN_LAZY_CPU); } static void xen_load_gs_index(unsigned int idx) @@ -909,7 +935,7 @@ static void xen_load_sp0(unsigned long sp0) mcs = xen_mc_entry(0); MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0); - xen_mc_issue(PARAVIRT_LAZY_CPU); + xen_mc_issue(XEN_LAZY_CPU); this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); } @@ -973,7 +999,7 @@ static void xen_write_cr0(unsigned long cr0) MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0); - xen_mc_issue(PARAVIRT_LAZY_CPU); + xen_mc_issue(XEN_LAZY_CPU); } static void xen_write_cr4(unsigned long cr4) @@ -1156,7 +1182,7 @@ static const typeof(pv_ops) xen_cpu_ops __initconst = { #endif .io_delay = xen_io_delay, - .start_context_switch = paravirt_start_context_switch, + .start_context_switch = xen_start_context_switch, .end_context_switch = xen_end_context_switch, }, }; diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 1652c39e3dfb..b6830554ff69 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -236,7 +236,7 @@ static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) u.val = pmd_val_ma(val); xen_extend_mmu_update(&u); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); preempt_enable(); } @@ -270,7 +270,7 @@ static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval) { struct mmu_update u; - if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU) + if (xen_get_lazy_mode() != XEN_LAZY_MMU) return false; xen_mc_batch(); @@ -279,7 +279,7 @@ static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval) u.val = pte_val_ma(pteval); xen_extend_mmu_update(&u); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); return true; } @@ -325,7 +325,7 @@ void xen_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, u.val = pte_val_ma(pte); xen_extend_mmu_update(&u); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); } /* Assume pteval_t is equivalent to all the other *val_t types. */ @@ -419,7 +419,7 @@ static void xen_set_pud_hyper(pud_t *ptr, pud_t val) u.val = pud_val_ma(val); xen_extend_mmu_update(&u); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); preempt_enable(); } @@ -499,7 +499,7 @@ static void __init xen_set_p4d_hyper(p4d_t *ptr, p4d_t val) __xen_set_p4d_hyper(ptr, val); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); preempt_enable(); } @@ -531,7 +531,7 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val) if (user_ptr) __xen_set_p4d_hyper((p4d_t *)user_ptr, val); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); } #if CONFIG_PGTABLE_LEVELS >= 5 @@ -1245,7 +1245,7 @@ static noinline void xen_flush_tlb(void) op->cmd = MMUEXT_TLB_FLUSH_LOCAL; MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); preempt_enable(); } @@ -1265,7 +1265,7 @@ static void xen_flush_tlb_one_user(unsigned long addr) op->arg1.linear_addr = addr & PAGE_MASK; MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); preempt_enable(); } @@ -1302,7 +1302,7 @@ static void xen_flush_tlb_multi(const struct cpumask *cpus, MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); } static unsigned long xen_read_cr3(void) @@ -1361,7 +1361,7 @@ static void xen_write_cr3(unsigned long cr3) else __xen_write_cr3(false, 0); - xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ + xen_mc_issue(XEN_LAZY_CPU); /* interrupts restored */ } /* @@ -1396,7 +1396,7 @@ static void __init xen_write_cr3_init(unsigned long cr3) __xen_write_cr3(true, cr3); - xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ + xen_mc_issue(XEN_LAZY_CPU); /* interrupts restored */ } static int xen_pgd_alloc(struct mm_struct *mm) @@ -1557,7 +1557,7 @@ static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS && !pinned) __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); } } @@ -1587,7 +1587,7 @@ static inline void xen_release_ptpage(unsigned long pfn, unsigned level) __set_pfn_prot(pfn, PAGE_KERNEL); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); ClearPagePinned(page); } @@ -1804,7 +1804,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) */ xen_mc_batch(); __xen_write_cr3(true, __pa(init_top_pgt)); - xen_mc_issue(PARAVIRT_LAZY_CPU); + xen_mc_issue(XEN_LAZY_CPU); /* We can't that easily rip out L3 and L2, as the Xen pagetables are * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for @@ -2083,6 +2083,23 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) #endif } +static void xen_enter_lazy_mmu(void) +{ + enter_lazy(XEN_LAZY_MMU); +} + +static void xen_flush_lazy_mmu(void) +{ + preempt_disable(); + + if (xen_get_lazy_mode() == XEN_LAZY_MMU) { + arch_leave_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode(); + } + + preempt_enable(); +} + static void __init xen_post_allocator_init(void) { pv_ops.mmu.set_pte = xen_set_pte; @@ -2107,7 +2124,7 @@ static void xen_leave_lazy_mmu(void) { preempt_disable(); xen_mc_flush(); - paravirt_leave_lazy_mmu(); + leave_lazy(XEN_LAZY_MMU); preempt_enable(); } @@ -2166,9 +2183,9 @@ static const typeof(pv_ops) xen_mmu_ops __initconst = { .exit_mmap = xen_exit_mmap, .lazy_mode = { - .enter = paravirt_enter_lazy_mmu, + .enter = xen_enter_lazy_mmu, .leave = xen_leave_lazy_mmu, - .flush = paravirt_flush_lazy_mmu, + .flush = xen_flush_lazy_mmu, }, .set_fixmap = xen_set_fixmap, @@ -2385,7 +2402,7 @@ static noinline void xen_flush_tlb_all(void) op->cmd = MMUEXT_TLB_FLUSH_ALL; MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - xen_mc_issue(PARAVIRT_LAZY_MMU); + xen_mc_issue(XEN_LAZY_MMU); preempt_enable(); } diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h index 1c51b2c87f30..c3867b585e0d 100644 --- a/arch/x86/xen/multicalls.h +++ b/arch/x86/xen/multicalls.h @@ -26,7 +26,7 @@ static inline void xen_mc_batch(void) /* need to disable interrupts until this entry is complete */ local_irq_save(flags); - trace_xen_mc_batch(paravirt_get_lazy_mode()); + trace_xen_mc_batch(xen_get_lazy_mode()); __this_cpu_write(xen_mc_irq_flags, flags); } @@ -44,7 +44,7 @@ static inline void xen_mc_issue(unsigned mode) { trace_xen_mc_issue(mode); - if ((paravirt_get_lazy_mode() & mode) == 0) + if ((xen_get_lazy_mode() & mode) == 0) xen_mc_flush(); /* restore flags saved in xen_mc_batch */ |