summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-09-17 15:27:01 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2024-09-17 15:27:01 +0200
commitfc1dc0d50780a9b215322bcc315f07ad8e4c6c13 (patch)
tree79abf1631a4e1fd597a10805972147d8db18c761
parentb50753547453613eb5d0fada99d55583852c42df (diff)
parente7ff4ebffe3bedf55560ef861d80f6500ff0d76f (diff)
Merge tag 'x86-timers-2024-09-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 timer updates from Thomas Gleixner: - Use the topology information of number of packages for making the decision about TSC trust instead of using the number of online nodes which is not reflecting the real topology. - Stop the PIT timer 0 when its not in use as to stop pointless emulation in the VMM. - Fix the PIT timer stop sequence for timer 0 so it truly stops both real hardware and buggy VMM emulations. * tag 'x86-timers-2024-09-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/tsc: Check for sockets instead of CPUs to make code match comment clockevents/drivers/i8253: Fix stop sequence for timer 0 x86/i8253: Disable PIT timer 0 when not in use x86/tsc: Use topology_max_packages() to get package number
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c11
-rw-r--r--arch/x86/kernel/i8253.c11
-rw-r--r--arch/x86/kernel/tsc.c10
-rw-r--r--drivers/clocksource/i8253.c49
-rw-r--r--include/linux/i8253.h2
5 files changed, 48 insertions, 35 deletions
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index ead967479fa6..d18078834ded 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -16,7 +16,6 @@
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/kexec.h>
-#include <linux/i8253.h>
#include <linux/random.h>
#include <asm/processor.h>
#include <asm/hypervisor.h>
@@ -537,16 +536,6 @@ static void __init ms_hyperv_init_platform(void)
if (efi_enabled(EFI_BOOT))
x86_platform.get_nmi_reason = hv_get_nmi_reason;
- /*
- * Hyper-V VMs have a PIT emulation quirk such that zeroing the
- * counter register during PIT shutdown restarts the PIT. So it
- * continues to interrupt @18.2 HZ. Setting i8253_clear_counter
- * to false tells pit_shutdown() not to zero the counter so that
- * the PIT really is shutdown. Generation 2 VMs don't have a PIT,
- * and setting this value has no effect.
- */
- i8253_clear_counter_on_shutdown = false;
-
#if IS_ENABLED(CONFIG_HYPERV)
if ((hv_get_isolation_type() == HV_ISOLATION_TYPE_VBS) ||
ms_hyperv.paravisor_present)
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 2b7999a1a50a..80e262bb627f 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -8,6 +8,7 @@
#include <linux/timex.h>
#include <linux/i8253.h>
+#include <asm/hypervisor.h>
#include <asm/apic.h>
#include <asm/hpet.h>
#include <asm/time.h>
@@ -39,9 +40,15 @@ static bool __init use_pit(void)
bool __init pit_timer_init(void)
{
- if (!use_pit())
+ if (!use_pit()) {
+ /*
+ * Don't just ignore the PIT. Ensure it's stopped, because
+ * VMMs otherwise steal CPU time just to pointlessly waggle
+ * the (masked) IRQ.
+ */
+ clockevent_i8253_disable();
return false;
-
+ }
clockevent_i8253_init(true);
global_clock_event = &i8253_clockevent;
return true;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index d4462fb26299..dfe6847fd99e 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -28,6 +28,7 @@
#include <asm/apic.h>
#include <asm/cpu_device_id.h>
#include <asm/i8259.h>
+#include <asm/topology.h>
#include <asm/uv/uv.h>
unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
@@ -1253,15 +1254,12 @@ static void __init check_system_tsc_reliable(void)
* - TSC which does not stop in C-States
* - the TSC_ADJUST register which allows to detect even minimal
* modifications
- * - not more than two sockets. As the number of sockets cannot be
- * evaluated at the early boot stage where this has to be
- * invoked, check the number of online memory nodes as a
- * fallback solution which is an reasonable estimate.
+ * - not more than four packages
*/
if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
boot_cpu_has(X86_FEATURE_NONSTOP_TSC) &&
boot_cpu_has(X86_FEATURE_TSC_ADJUST) &&
- nr_online_nodes <= 4)
+ topology_max_packages() <= 4)
tsc_disable_clocksource_watchdog();
}
@@ -1290,7 +1288,7 @@ int unsynchronized_tsc(void)
*/
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
/* assume multi socket systems are not synchronized: */
- if (num_possible_cpus() > 1)
+ if (topology_max_packages() > 1)
return 1;
}
diff --git a/drivers/clocksource/i8253.c b/drivers/clocksource/i8253.c
index d4350bb10b83..39f7c2d736d1 100644
--- a/drivers/clocksource/i8253.c
+++ b/drivers/clocksource/i8253.c
@@ -20,13 +20,6 @@
DEFINE_RAW_SPINLOCK(i8253_lock);
EXPORT_SYMBOL(i8253_lock);
-/*
- * Handle PIT quirk in pit_shutdown() where zeroing the counter register
- * restarts the PIT, negating the shutdown. On platforms with the quirk,
- * platform specific code can set this to false.
- */
-bool i8253_clear_counter_on_shutdown __ro_after_init = true;
-
#ifdef CONFIG_CLKSRC_I8253
/*
* Since the PIT overflows every tick, its not very useful
@@ -108,21 +101,47 @@ int __init clocksource_i8253_init(void)
#endif
#ifdef CONFIG_CLKEVT_I8253
-static int pit_shutdown(struct clock_event_device *evt)
+void clockevent_i8253_disable(void)
{
- if (!clockevent_state_oneshot(evt) && !clockevent_state_periodic(evt))
- return 0;
-
raw_spin_lock(&i8253_lock);
+ /*
+ * Writing the MODE register should stop the counter, according to
+ * the datasheet. This appears to work on real hardware (well, on
+ * modern Intel and AMD boxes; I didn't dig the Pegasos out of the
+ * shed).
+ *
+ * However, some virtual implementations differ, and the MODE change
+ * doesn't have any effect until either the counter is written (KVM
+ * in-kernel PIT) or the next interrupt (QEMU). And in those cases,
+ * it may not stop the *count*, only the interrupts. Although in
+ * the virt case, that probably doesn't matter, as the value of the
+ * counter will only be calculated on demand if the guest reads it;
+ * it's the interrupts which cause steal time.
+ *
+ * Hyper-V apparently has a bug where even in mode 0, the IRQ keeps
+ * firing repeatedly if the counter is running. But it *does* do the
+ * right thing when the MODE register is written.
+ *
+ * So: write the MODE and then load the counter, which ensures that
+ * the IRQ is stopped on those buggy virt implementations. And then
+ * write the MODE again, which is the right way to stop it.
+ */
outb_p(0x30, PIT_MODE);
+ outb_p(0, PIT_CH0);
+ outb_p(0, PIT_CH0);
- if (i8253_clear_counter_on_shutdown) {
- outb_p(0, PIT_CH0);
- outb_p(0, PIT_CH0);
- }
+ outb_p(0x30, PIT_MODE);
raw_spin_unlock(&i8253_lock);
+}
+
+static int pit_shutdown(struct clock_event_device *evt)
+{
+ if (!clockevent_state_oneshot(evt) && !clockevent_state_periodic(evt))
+ return 0;
+
+ clockevent_i8253_disable();
return 0;
}
diff --git a/include/linux/i8253.h b/include/linux/i8253.h
index 8336b2f6f834..56c280eb2d4f 100644
--- a/include/linux/i8253.h
+++ b/include/linux/i8253.h
@@ -21,9 +21,9 @@
#define PIT_LATCH ((PIT_TICK_RATE + HZ/2) / HZ)
extern raw_spinlock_t i8253_lock;
-extern bool i8253_clear_counter_on_shutdown;
extern struct clock_event_device i8253_clockevent;
extern void clockevent_i8253_init(bool oneshot);
+extern void clockevent_i8253_disable(void);
extern void setup_pit_timer(void);