summaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/asm-offsets.c6
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c2
-rw-r--r--arch/x86/kernel/crash.c17
-rw-r--r--arch/x86/kernel/nmi.c8
-rw-r--r--arch/x86/kernel/reboot.c88
-rw-r--r--arch/x86/kernel/smp.c6
6 files changed, 79 insertions, 48 deletions
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index ef9e951415c5..283dcd2f62c8 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -76,12 +76,18 @@ static void __used common(void)
OFFSET(TDX_MODULE_r11, tdx_module_output, r11);
BLANK();
+ OFFSET(TDX_HYPERCALL_r8, tdx_hypercall_args, r8);
+ OFFSET(TDX_HYPERCALL_r9, tdx_hypercall_args, r9);
OFFSET(TDX_HYPERCALL_r10, tdx_hypercall_args, r10);
OFFSET(TDX_HYPERCALL_r11, tdx_hypercall_args, r11);
OFFSET(TDX_HYPERCALL_r12, tdx_hypercall_args, r12);
OFFSET(TDX_HYPERCALL_r13, tdx_hypercall_args, r13);
OFFSET(TDX_HYPERCALL_r14, tdx_hypercall_args, r14);
OFFSET(TDX_HYPERCALL_r15, tdx_hypercall_args, r15);
+ OFFSET(TDX_HYPERCALL_rdi, tdx_hypercall_args, rdi);
+ OFFSET(TDX_HYPERCALL_rsi, tdx_hypercall_args, rsi);
+ OFFSET(TDX_HYPERCALL_rbx, tdx_hypercall_args, rbx);
+ OFFSET(TDX_HYPERCALL_rdx, tdx_hypercall_args, rdx);
BLANK();
OFFSET(BP_scratch, boot_params, scratch);
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index f924a76c6923..f36dc2f796c5 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -460,7 +460,7 @@ static void __init ms_hyperv_init_platform(void)
* setting of this MSR bit should happen before init_intel()
* is called.
*/
- wrmsrl(HV_X64_MSR_TSC_INVARIANT_CONTROL, 0x1);
+ wrmsrl(HV_X64_MSR_TSC_INVARIANT_CONTROL, HV_EXPOSE_INVARIANT_TSC);
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
}
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 305514431f26..cdd92ab43cda 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -37,7 +37,6 @@
#include <linux/kdebug.h>
#include <asm/cpu.h>
#include <asm/reboot.h>
-#include <asm/virtext.h>
#include <asm/intel_pt.h>
#include <asm/crash.h>
#include <asm/cmdline.h>
@@ -81,15 +80,6 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
*/
cpu_crash_vmclear_loaded_vmcss();
- /* Disable VMX or SVM if needed.
- *
- * We need to disable virtualization on all CPUs.
- * Having VMX or SVM enabled on any CPU may break rebooting
- * after the kdump kernel has finished its task.
- */
- cpu_emergency_vmxoff();
- cpu_emergency_svm_disable();
-
/*
* Disable Intel PT to stop its logging
*/
@@ -148,12 +138,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
*/
cpu_crash_vmclear_loaded_vmcss();
- /* Booting kdump kernel with VMX or SVM enabled won't work,
- * because (among other limitations) we can't disable paging
- * with the virt flags.
- */
- cpu_emergency_vmxoff();
- cpu_emergency_svm_disable();
+ cpu_emergency_disable_virtualization();
/*
* Disable Intel PT to stop its logging
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index c315b18ec7c8..776f4b1e395b 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -560,14 +560,14 @@ nmi_restart:
}
}
-#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL)
-DEFINE_IDTENTRY_RAW(exc_nmi_noist)
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+DEFINE_IDTENTRY_RAW(exc_nmi_kvm_vmx)
{
exc_nmi(regs);
}
-#endif
#if IS_MODULE(CONFIG_KVM_INTEL)
-EXPORT_SYMBOL_GPL(asm_exc_nmi_noist);
+EXPORT_SYMBOL_GPL(asm_exc_nmi_kvm_vmx);
+#endif
#endif
#ifdef CONFIG_NMI_CHECK_CPU
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index c3636ea4aa71..d03c551defcc 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -528,33 +528,29 @@ static inline void kb_wait(void)
}
}
-static void vmxoff_nmi(int cpu, struct pt_regs *regs)
-{
- cpu_emergency_vmxoff();
-}
+static inline void nmi_shootdown_cpus_on_restart(void);
-/* Use NMIs as IPIs to tell all CPUs to disable virtualization */
-static void emergency_vmx_disable_all(void)
+static void emergency_reboot_disable_virtualization(void)
{
/* Just make sure we won't change CPUs while doing this */
local_irq_disable();
/*
- * Disable VMX on all CPUs before rebooting, otherwise we risk hanging
- * the machine, because the CPU blocks INIT when it's in VMX root.
+ * Disable virtualization on all CPUs before rebooting to avoid hanging
+ * the system, as VMX and SVM block INIT when running in the host.
*
* We can't take any locks and we may be on an inconsistent state, so
- * use NMIs as IPIs to tell the other CPUs to exit VMX root and halt.
+ * use NMIs as IPIs to tell the other CPUs to disable VMX/SVM and halt.
*
- * Do the NMI shootdown even if VMX if off on _this_ CPU, as that
- * doesn't prevent a different CPU from being in VMX root operation.
+ * Do the NMI shootdown even if virtualization is off on _this_ CPU, as
+ * other CPUs may have virtualization enabled.
*/
- if (cpu_has_vmx()) {
- /* Safely force _this_ CPU out of VMX root operation. */
- __cpu_emergency_vmxoff();
+ if (cpu_has_vmx() || cpu_has_svm(NULL)) {
+ /* Safely force _this_ CPU out of VMX/SVM operation. */
+ cpu_emergency_disable_virtualization();
- /* Halt and exit VMX root operation on the other CPUs. */
- nmi_shootdown_cpus(vmxoff_nmi);
+ /* Disable VMX/SVM and halt on other CPUs. */
+ nmi_shootdown_cpus_on_restart();
}
}
@@ -590,7 +586,7 @@ static void native_machine_emergency_restart(void)
unsigned short mode;
if (reboot_emergency)
- emergency_vmx_disable_all();
+ emergency_reboot_disable_virtualization();
tboot_shutdown(TB_SHUTDOWN_REBOOT);
@@ -795,6 +791,17 @@ void machine_crash_shutdown(struct pt_regs *regs)
/* This is the CPU performing the emergency shutdown work. */
int crashing_cpu = -1;
+/*
+ * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
+ * reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
+ * GIF=0, i.e. if the crash occurred between CLGI and STGI.
+ */
+void cpu_emergency_disable_virtualization(void)
+{
+ cpu_emergency_vmxoff();
+ cpu_emergency_svm_disable();
+}
+
#if defined(CONFIG_SMP)
static nmi_shootdown_cb shootdown_callback;
@@ -817,7 +824,14 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
return NMI_HANDLED;
local_irq_disable();
- shootdown_callback(cpu, regs);
+ if (shootdown_callback)
+ shootdown_callback(cpu, regs);
+
+ /*
+ * Prepare the CPU for reboot _after_ invoking the callback so that the
+ * callback can safely use virtualization instructions, e.g. VMCLEAR.
+ */
+ cpu_emergency_disable_virtualization();
atomic_dec(&waiting_for_crash_ipi);
/* Assume hlt works */
@@ -828,18 +842,32 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
return NMI_HANDLED;
}
-/*
- * Halt all other CPUs, calling the specified function on each of them
+/**
+ * nmi_shootdown_cpus - Stop other CPUs via NMI
+ * @callback: Optional callback to be invoked from the NMI handler
+ *
+ * The NMI handler on the remote CPUs invokes @callback, if not
+ * NULL, first and then disables virtualization to ensure that
+ * INIT is recognized during reboot.
*
- * This function can be used to halt all other CPUs on crash
- * or emergency reboot time. The function passed as parameter
- * will be called inside a NMI handler on all CPUs.
+ * nmi_shootdown_cpus() can only be invoked once. After the first
+ * invocation all other CPUs are stuck in crash_nmi_callback() and
+ * cannot respond to a second NMI.
*/
void nmi_shootdown_cpus(nmi_shootdown_cb callback)
{
unsigned long msecs;
+
local_irq_disable();
+ /*
+ * Avoid certain doom if a shootdown already occurred; re-registering
+ * the NMI handler will cause list corruption, modifying the callback
+ * will do who knows what, etc...
+ */
+ if (WARN_ON_ONCE(crash_ipi_issued))
+ return;
+
/* Make a note of crashing cpu. Will be used in NMI callback. */
crashing_cpu = safe_smp_processor_id();
@@ -867,7 +895,17 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
msecs--;
}
- /* Leave the nmi callback set */
+ /*
+ * Leave the nmi callback set, shootdown is a one-time thing. Clearing
+ * the callback could result in a NULL pointer dereference if a CPU
+ * (finally) responds after the timeout expires.
+ */
+}
+
+static inline void nmi_shootdown_cpus_on_restart(void)
+{
+ if (!crash_ipi_issued)
+ nmi_shootdown_cpus(NULL);
}
/*
@@ -897,6 +935,8 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
/* No other CPUs to shoot down */
}
+static inline void nmi_shootdown_cpus_on_restart(void) { }
+
void run_crash_ipi_callback(struct pt_regs *regs)
{
}
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 06db901fabe8..375b33ecafa2 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -32,7 +32,7 @@
#include <asm/mce.h>
#include <asm/trace/irq_vectors.h>
#include <asm/kexec.h>
-#include <asm/virtext.h>
+#include <asm/reboot.h>
/*
* Some notes on x86 processor bugs affecting SMP operation:
@@ -122,7 +122,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
if (raw_smp_processor_id() == atomic_read(&stopping_cpu))
return NMI_HANDLED;
- cpu_emergency_vmxoff();
+ cpu_emergency_disable_virtualization();
stop_this_cpu(NULL);
return NMI_HANDLED;
@@ -134,7 +134,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
DEFINE_IDTENTRY_SYSVEC(sysvec_reboot)
{
ack_APIC_irq();
- cpu_emergency_vmxoff();
+ cpu_emergency_disable_virtualization();
stop_this_cpu(NULL);
}