diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/apic/apic.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/apic/x2apic_cluster.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/apic/x2apic_phys.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/intel.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/core.c | 7 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/topology.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/core.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/hw_breakpoint.c | 61 | ||||
-rw-r--r-- | arch/x86/kernel/sev-es.c | 14 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 19 | ||||
-rw-r--r-- | arch/x86/kernel/step.c | 10 |
12 files changed, 107 insertions, 39 deletions
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 6bd20c0de8bc..7f4c081f59f0 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -41,6 +41,7 @@ #include <asm/perf_event.h> #include <asm/x86_init.h> #include <linux/atomic.h> +#include <asm/barrier.h> #include <asm/mpspec.h> #include <asm/i8259.h> #include <asm/proto.h> @@ -477,6 +478,9 @@ static int lapic_next_deadline(unsigned long delta, { u64 tsc; + /* This MSR is special and need a special fence: */ + weak_wrmsr_fence(); + tsc = rdtsc(); wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR)); return 0; diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index df6adc5674c9..f4da9bb69a88 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -29,7 +29,8 @@ static void x2apic_send_IPI(int cpu, int vector) { u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu); - x2apic_wrmsr_fence(); + /* x2apic MSRs are special and need a special fence: */ + weak_wrmsr_fence(); __x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL); } @@ -41,7 +42,8 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) unsigned long flags; u32 dest; - x2apic_wrmsr_fence(); + /* x2apic MSRs are special and need a special fence: */ + weak_wrmsr_fence(); local_irq_save(flags); tmpmsk = this_cpu_cpumask_var_ptr(ipi_mask); diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 0e4e81971567..6bde05a86b4e 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -43,7 +43,8 @@ static void x2apic_send_IPI(int cpu, int vector) { u32 dest = per_cpu(x86_cpu_to_apicid, cpu); - x2apic_wrmsr_fence(); + /* x2apic MSRs are special and need a special fence: */ + weak_wrmsr_fence(); __x2apic_send_IPI_dest(dest, vector, APIC_DEST_PHYSICAL); } @@ -54,7 +55,8 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) unsigned long this_cpu; unsigned long flags; - x2apic_wrmsr_fence(); + /* x2apic MSRs are special and need a special fence: */ + weak_wrmsr_fence(); local_irq_save(flags); @@ -125,7 +127,8 @@ void __x2apic_send_IPI_shorthand(int vector, u32 which) { unsigned long cfg = __prepare_ICR(which, vector, 0); - x2apic_wrmsr_fence(); + /* x2apic MSRs are special and need a special fence: */ + weak_wrmsr_fence(); native_x2apic_icr_write(cfg, 0); } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f8ca66f3d861..347a956f71ca 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -542,12 +542,12 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) u32 ecx; ecx = cpuid_ecx(0x8000001e); - nodes_per_socket = ((ecx >> 8) & 7) + 1; + __max_die_per_package = nodes_per_socket = ((ecx >> 8) & 7) + 1; } else if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) { u64 value; rdmsrl(MSR_FAM10H_NODE_ID, value); - nodes_per_socket = ((value >> 3) & 7) + 1; + __max_die_per_package = nodes_per_socket = ((value >> 3) & 7) + 1; } if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) && diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 59a1e3ce3f14..816fdbec795a 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -1159,6 +1159,7 @@ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, 1), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 1), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, 1), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 1), {} }; diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 13d3f1cbda17..e133ce1e562b 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1992,10 +1992,9 @@ static __always_inline void exc_machine_check_kernel(struct pt_regs *regs) * that out because it's an indirect call. Annotate it. */ instrumentation_begin(); - trace_hardirqs_off_finish(); + machine_check_vector(regs); - if (regs->flags & X86_EFLAGS_IF) - trace_hardirqs_on_prepare(); + instrumentation_end(); irqentry_nmi_exit(regs, irq_state); } @@ -2004,7 +2003,9 @@ static __always_inline void exc_machine_check_user(struct pt_regs *regs) { irqentry_enter_from_user_mode(regs); instrumentation_begin(); + machine_check_vector(regs); + instrumentation_end(); irqentry_exit_to_user_mode(regs); } diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 1068002c8532..8678864ce712 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -25,10 +25,10 @@ #define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f) #define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff) -#ifdef CONFIG_SMP unsigned int __max_die_per_package __read_mostly = 1; EXPORT_SYMBOL(__max_die_per_package); +#ifdef CONFIG_SMP /* * Check if given CPUID extended toplogy "leaf" is implemented */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index eb86a2b831b1..571220ac8bea 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -121,7 +121,7 @@ int copy_fpregs_to_fpstate(struct fpu *fpu) } EXPORT_SYMBOL(copy_fpregs_to_fpstate); -void kernel_fpu_begin(void) +void kernel_fpu_begin_mask(unsigned int kfpu_mask) { preempt_disable(); @@ -141,13 +141,14 @@ void kernel_fpu_begin(void) } __cpu_invalidate_fpregs_state(); - if (boot_cpu_has(X86_FEATURE_XMM)) + /* Put sane initial values into the control registers. */ + if (likely(kfpu_mask & KFPU_MXCSR) && boot_cpu_has(X86_FEATURE_XMM)) ldmxcsr(MXCSR_DEFAULT); - if (boot_cpu_has(X86_FEATURE_FPU)) + if (unlikely(kfpu_mask & KFPU_387) && boot_cpu_has(X86_FEATURE_FPU)) asm volatile ("fninit"); } -EXPORT_SYMBOL_GPL(kernel_fpu_begin); +EXPORT_SYMBOL_GPL(kernel_fpu_begin_mask); void kernel_fpu_end(void) { diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 03aa33b58165..668a4a6533d9 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -269,6 +269,20 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end) CPU_ENTRY_AREA_TOTAL_SIZE)) return true; + /* + * When FSGSBASE is enabled, paranoid_entry() fetches the per-CPU + * GSBASE value via __per_cpu_offset or pcpu_unit_offsets. + */ +#ifdef CONFIG_SMP + if (within_area(addr, end, (unsigned long)__per_cpu_offset, + sizeof(unsigned long) * nr_cpu_ids)) + return true; +#else + if (within_area(addr, end, (unsigned long)&pcpu_unit_offsets, + sizeof(pcpu_unit_offsets))) + return true; +#endif + for_each_possible_cpu(cpu) { /* The original rw GDT is being used after load_direct_gdt() */ if (within_area(addr, end, (unsigned long)get_cpu_gdt_rw(cpu), @@ -293,6 +307,14 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end) (unsigned long)&per_cpu(cpu_tlbstate, cpu), sizeof(struct tlb_state))) return true; + + /* + * When in guest (X86_FEATURE_HYPERVISOR), local_db_save() + * will read per-cpu cpu_dr7 before clear dr7 register. + */ + if (within_area(addr, end, (unsigned long)&per_cpu(cpu_dr7, cpu), + sizeof(cpu_dr7))) + return true; } return false; @@ -491,15 +513,12 @@ static int hw_breakpoint_handler(struct die_args *args) struct perf_event *bp; unsigned long *dr6_p; unsigned long dr6; + bool bpx; /* The DR6 value is pointed by args->err */ dr6_p = (unsigned long *)ERR_PTR(args->err); dr6 = *dr6_p; - /* If it's a single step, TRAP bits are random */ - if (dr6 & DR_STEP) - return NOTIFY_DONE; - /* Do an early return if no trap bits are set in DR6 */ if ((dr6 & DR_TRAP_BITS) == 0) return NOTIFY_DONE; @@ -509,28 +528,29 @@ static int hw_breakpoint_handler(struct die_args *args) if (likely(!(dr6 & (DR_TRAP0 << i)))) continue; + bp = this_cpu_read(bp_per_reg[i]); + if (!bp) + continue; + + bpx = bp->hw.info.type == X86_BREAKPOINT_EXECUTE; + /* - * The counter may be concurrently released but that can only - * occur from a call_rcu() path. We can then safely fetch - * the breakpoint, use its callback, touch its counter - * while we are in an rcu_read_lock() path. + * TF and data breakpoints are traps and can be merged, however + * instruction breakpoints are faults and will be raised + * separately. + * + * However DR6 can indicate both TF and instruction + * breakpoints. In that case take TF as that has precedence and + * delay the instruction breakpoint for the next exception. */ - rcu_read_lock(); + if (bpx && (dr6 & DR_STEP)) + continue; - bp = this_cpu_read(bp_per_reg[i]); /* * Reset the 'i'th TRAP bit in dr6 to denote completion of * exception handling */ (*dr6_p) &= ~(DR_TRAP0 << i); - /* - * bp can be NULL due to lazy debug register switching - * or due to concurrent perf counter removing. - */ - if (!bp) { - rcu_read_unlock(); - break; - } perf_bp_event(bp, args->regs); @@ -538,11 +558,10 @@ static int hw_breakpoint_handler(struct die_args *args) * Set up resume flag to avoid breakpoint recursion when * returning back to origin. */ - if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE) + if (bpx) args->regs->flags |= X86_EFLAGS_RF; - - rcu_read_unlock(); } + /* * Further processing in do_debug() is needed for a) user-space * breakpoints (to generate signals) and b) when the system has diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c index 0bd1a0fc587e..84c1821819af 100644 --- a/arch/x86/kernel/sev-es.c +++ b/arch/x86/kernel/sev-es.c @@ -225,7 +225,7 @@ static inline u64 sev_es_rd_ghcb_msr(void) return __rdmsr(MSR_AMD64_SEV_ES_GHCB); } -static inline void sev_es_wr_ghcb_msr(u64 val) +static __always_inline void sev_es_wr_ghcb_msr(u64 val) { u32 low, high; @@ -286,6 +286,12 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, u16 d2; u8 d1; + /* If instruction ran in kernel mode and the I/O buffer is in kernel space */ + if (!user_mode(ctxt->regs) && !access_ok(target, size)) { + memcpy(dst, buf, size); + return ES_OK; + } + switch (size) { case 1: memcpy(&d1, buf, 1); @@ -335,6 +341,12 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, u16 d2; u8 d1; + /* If instruction ran in kernel mode and the I/O buffer is in kernel space */ + if (!user_mode(ctxt->regs) && !access_ok(s, size)) { + memcpy(buf, src, size); + return ES_OK; + } + switch (size) { case 1: if (get_user(d1, s)) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8ca66af96a54..117e24fbfd8a 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -56,6 +56,7 @@ #include <linux/numa.h> #include <linux/pgtable.h> #include <linux/overflow.h> +#include <linux/syscore_ops.h> #include <asm/acpi.h> #include <asm/desc.h> @@ -2083,6 +2084,23 @@ static void init_counter_refs(void) this_cpu_write(arch_prev_mperf, mperf); } +#ifdef CONFIG_PM_SLEEP +static struct syscore_ops freq_invariance_syscore_ops = { + .resume = init_counter_refs, +}; + +static void register_freq_invariance_syscore_ops(void) +{ + /* Bail out if registered already. */ + if (freq_invariance_syscore_ops.node.prev) + return; + + register_syscore_ops(&freq_invariance_syscore_ops); +} +#else +static inline void register_freq_invariance_syscore_ops(void) {} +#endif + static void init_freq_invariance(bool secondary, bool cppc_ready) { bool ret = false; @@ -2109,6 +2127,7 @@ static void init_freq_invariance(bool secondary, bool cppc_ready) if (ret) { init_counter_refs(); static_branch_enable(&arch_scale_freq_key); + register_freq_invariance_syscore_ops(); pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio); } else { pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n"); diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 60d2c3798ba2..0f3c307b37b3 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -127,12 +127,17 @@ static int enable_single_step(struct task_struct *child) regs->flags |= X86_EFLAGS_TF; /* - * Always set TIF_SINGLESTEP - this guarantees that - * we single-step system calls etc.. This will also + * Always set TIF_SINGLESTEP. This will also * cause us to set TF when returning to user mode. */ set_tsk_thread_flag(child, TIF_SINGLESTEP); + /* + * Ensure that a trap is triggered once stepping out of a system + * call prior to executing any user instruction. + */ + set_task_syscall_work(child, SYSCALL_EXIT_TRAP); + oflags = regs->flags; /* Set TF on the kernel stack.. */ @@ -230,6 +235,7 @@ void user_disable_single_step(struct task_struct *child) /* Always clear TIF_SINGLESTEP... */ clear_tsk_thread_flag(child, TIF_SINGLESTEP); + clear_task_syscall_work(child, SYSCALL_EXIT_TRAP); /* But touch TF only if it was set by us.. */ if (test_and_clear_tsk_thread_flag(child, TIF_FORCED_TF)) |