From f365d05506150398fe6b035918d6fd8b62f35b9f Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Tue, 23 Jan 2024 17:46:57 +0100 Subject: tick/sched: Add function description for tick_nohz_next_event() The return value of tick_nohz_next_event() is not obvious at the first glance. Add a kernel-doc compatible function description which also covers return values. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240123164702.55612-4-anna-maria@linutronix.de --- kernel/time/tick-sched.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 01fb50c1b17e..7c9efe3d9d56 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -799,6 +799,16 @@ static inline bool local_timer_softirq_pending(void) return local_softirq_pending() & BIT(TIMER_SOFTIRQ); } +/** + * tick_nohz_next_event() - return the clock monotonic based next event + * @ts: pointer to tick_sched struct + * @cpu: CPU number + * + * Return: + * *%0 - When the next event is a maximum of TICK_NSEC in the future + * and the tick is not stopped yet + * *%next_event - Next event based on clock monotonic + */ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) { u64 basemono, next_tick, delta, expires; -- cgit v1.2.3-70-g09d2 From e2e1d724e948c87a31c18c34c6b6a193a9b2a0f0 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Wed, 21 Feb 2024 10:05:31 +0100 Subject: timers: Move marking timer bases idle into tick_nohz_stop_tick() The timer base is marked idle when get_next_timer_interrupt() is executed. But the decision whether the tick will be stopped and whether the system is able to go idle is done later. When the timer bases is marked idle and a new first timer is enqueued remote an IPI is raised. Even if it is not required because the tick is not stopped and the timer base is evaluated again at the next tick. To prevent this, the timer base is marked idle in tick_nohz_stop_tick() and get_next_timer_interrupt() is streamlined by only looking for the next timer interrupt. All other work is postponed to timer_base_try_to_set_idle() which is called by tick_nohz_stop_tick(). timer_base_try_to_set_idle() never resets timer_base::is_idle state. This is done when the tick is restarted via tick_nohz_restart_sched_tick(). With this, tick_sched::tick_stopped and timer_base::is_idle are always in sync. So there is no longer the need to execute timer_clear_idle() in tick_nohz_idle_retain_tick(). This was required before, as tick_nohz_next_event() set timer_base::is_idle even if the tick would not be stopped. So timer_clear_idle() is only executed, when timer base is idle. So the check whether timer base is idle, is now no longer required as well. While at it fix some nearby whitespace damage as well. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20240221090548.36600-4-anna-maria@linutronix.de --- kernel/time/tick-internal.h | 1 + kernel/time/tick-sched.c | 40 +++++++++++++++++++++--------- kernel/time/timer.c | 60 +++++++++++++++++++++++++++++++-------------- 3 files changed, 71 insertions(+), 30 deletions(-) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 481b7ab65e2c..47df30b871e4 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -163,6 +163,7 @@ static inline void timers_update_nohz(void) { } DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases); extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem); +u64 timer_base_try_to_set_idle(unsigned long basej, u64 basem, bool *idle); void timer_clear_idle(void); #define CLOCK_SET_WALL \ diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 7c9efe3d9d56..344b904f520f 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -859,11 +859,6 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) */ delta = next_tick - basemono; if (delta <= (u64)TICK_NSEC) { - /* - * Tell the timer code that the base is not idle, i.e. undo - * the effect of get_next_timer_interrupt(): - */ - timer_clear_idle(); /* * We've not stopped the tick yet, and there's a timer in the * next period, so no point in stopping it either, bail. @@ -899,12 +894,38 @@ out: static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) { struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); + unsigned long basejiff = ts->last_jiffies; u64 basemono = ts->timer_expires_base; - u64 expires = ts->timer_expires; + bool timer_idle; + u64 expires; /* Make sure we won't be trying to stop it twice in a row. */ ts->timer_expires_base = 0; + /* + * Now the tick should be stopped definitely - so the timer base needs + * to be marked idle as well to not miss a newly queued timer. + */ + expires = timer_base_try_to_set_idle(basejiff, basemono, &timer_idle); + if (expires > ts->timer_expires) { + /* + * This path could only happen when the first timer was removed + * between calculating the possible sleep length and now (when + * high resolution mode is not active, timer could also be a + * hrtimer). + * + * We have to stick to the original calculated expiry value to + * not stop the tick for too long with a shallow C-state (which + * was programmed by cpuidle because of an early next expiration + * value). + */ + expires = ts->timer_expires; + } + + /* If the timer base is not idle, retain the not yet stopped tick. */ + if (!timer_idle) + return; + /* * If this CPU is the one which updates jiffies, then give up * the assignment and let it be taken by the CPU which runs @@ -1001,7 +1022,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) touch_softlockup_watchdog_sched(); /* Cancel the scheduled timer and restore the tick: */ - ts->tick_stopped = 0; + ts->tick_stopped = 0; tick_nohz_restart(ts, now); } @@ -1157,11 +1178,6 @@ void tick_nohz_idle_stop_tick(void) void tick_nohz_idle_retain_tick(void) { tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched)); - /* - * Undo the effect of get_next_timer_interrupt() called from - * tick_nohz_next_event(). - */ - timer_clear_idle(); } /** diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 9f0cdba4afe0..a4b8a58d05e5 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1956,19 +1956,22 @@ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires) return DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC; } -static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem) +static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem, + bool *idle) { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); unsigned long nextevt = basej + NEXT_TIMER_MAX_DELTA; u64 expires = KTIME_MAX; - bool was_idle; /* * Pretend that there is no timer pending if the cpu is offline. * Possible pending timers will be migrated later to an active cpu. */ - if (cpu_is_offline(smp_processor_id())) + if (cpu_is_offline(smp_processor_id())) { + if (idle) + *idle = true; return expires; + } raw_spin_lock(&base->lock); if (base->next_expiry_recalc) @@ -1998,17 +2001,26 @@ static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem) __forward_timer_base(base, basej); /* - * Base is idle if the next event is more than a tick away. - * - * If the base is marked idle then any timer add operation must forward - * the base clk itself to keep granularity small. This idle logic is - * only maintained for the BASE_STD base, deferrable timers may still - * see large granularity skew (by design). + * Set base->is_idle only when caller is timer_base_try_to_set_idle() */ - was_idle = base->is_idle; - base->is_idle = time_after(nextevt, basej + 1); - if (was_idle != base->is_idle) - trace_timer_base_idle(base->is_idle, base->cpu); + if (idle) { + /* + * Base is idle if the next event is more than a tick away. + * + * If the base is marked idle then any timer add operation must + * forward the base clk itself to keep granularity small. This + * idle logic is only maintained for the BASE_STD base, + * deferrable timers may still see large granularity skew (by + * design). + */ + if (!base->is_idle) { + if (time_after(nextevt, basej + 1)) { + base->is_idle = true; + trace_timer_base_idle(true, base->cpu); + } + } + *idle = base->is_idle; + } raw_spin_unlock(&base->lock); @@ -2025,7 +2037,21 @@ static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem) */ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) { - return __get_next_timer_interrupt(basej, basem); + return __get_next_timer_interrupt(basej, basem, NULL); +} + +/** + * timer_base_try_to_set_idle() - Try to set the idle state of the timer bases + * @basej: base time jiffies + * @basem: base time clock monotonic + * @idle: pointer to store the value of timer_base->is_idle + * + * Returns the tick aligned clock monotonic time of the next pending + * timer or KTIME_MAX if no timer is pending. + */ +u64 timer_base_try_to_set_idle(unsigned long basej, u64 basem, bool *idle) +{ + return __get_next_timer_interrupt(basej, basem, idle); } /** @@ -2043,10 +2069,8 @@ void timer_clear_idle(void) * sending the IPI a few instructions smaller for the cost of taking * the lock in the exit from idle path. */ - if (base->is_idle) { - base->is_idle = false; - trace_timer_base_idle(false, smp_processor_id()); - } + base->is_idle = false; + trace_timer_base_idle(false, smp_processor_id()); } #endif -- cgit v1.2.3-70-g09d2 From 73129cf4b69cd1aaa3dd5eb7900a9c349773f5ae Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Wed, 21 Feb 2024 10:05:32 +0100 Subject: timers: Optimization for timer_base_try_to_set_idle() When tick is stopped also the timer base is_idle flag is set. When reentering timer_base_try_to_set_idle() with the tick stopped, there is no need to check whether the timer base needs to be set idle again. When a timer was enqueued in the meantime, this is already handled by the tick_nohz_next_event() call which was executed before tick_nohz_stop_tick(). Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20240221090548.36600-5-anna-maria@linutronix.de --- kernel/time/tick-sched.c | 2 +- kernel/time/timer.c | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 344b904f520f..f6b613380229 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -896,7 +896,7 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); unsigned long basejiff = ts->last_jiffies; u64 basemono = ts->timer_expires_base; - bool timer_idle; + bool timer_idle = ts->tick_stopped; u64 expires; /* Make sure we won't be trying to stop it twice in a row. */ diff --git a/kernel/time/timer.c b/kernel/time/timer.c index a4b8a58d05e5..74cfe21f8fd9 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -2044,13 +2044,18 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) * timer_base_try_to_set_idle() - Try to set the idle state of the timer bases * @basej: base time jiffies * @basem: base time clock monotonic - * @idle: pointer to store the value of timer_base->is_idle + * @idle: pointer to store the value of timer_base->is_idle on return; + * *idle contains the information whether tick was already stopped * - * Returns the tick aligned clock monotonic time of the next pending - * timer or KTIME_MAX if no timer is pending. + * Returns the tick aligned clock monotonic time of the next pending timer or + * KTIME_MAX if no timer is pending. When tick was already stopped KTIME_MAX is + * returned as well. */ u64 timer_base_try_to_set_idle(unsigned long basej, u64 basem, bool *idle) { + if (*idle) + return KTIME_MAX; + return __get_next_timer_interrupt(basej, basem, idle); } -- cgit v1.2.3-70-g09d2 From 4c532939aa2e9345ee346bc69d3d12d56d5aa9aa Mon Sep 17 00:00:00 2001 From: "Richard Cochran (linutronix GmbH)" Date: Wed, 21 Feb 2024 10:05:44 +0100 Subject: tick/sched: Split out jiffies update helper function The logic to get the time of the last jiffies update will be needed by the timer pull model as well. Move the code into a global function in anticipation of the new caller. No functional change. Signed-off-by: Richard Cochran (linutronix GmbH) Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20240221090548.36600-17-anna-maria@linutronix.de --- kernel/time/tick-internal.h | 1 + kernel/time/tick-sched.c | 26 +++++++++++++++++++------- 2 files changed, 20 insertions(+), 7 deletions(-) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 8b0c28edbd09..ccf39befde85 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -157,6 +157,7 @@ static inline void tick_nohz_init(void) { } #ifdef CONFIG_NO_HZ_COMMON extern unsigned long tick_nohz_active; extern void timers_update_nohz(void); +extern u64 get_jiffies_update(unsigned long *basej); # ifdef CONFIG_SMP extern struct static_key_false timers_migration_enabled; extern void fetch_next_timer_interrupt_remote(unsigned long basej, u64 basem, diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f6b613380229..417bb7f880ca 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -799,6 +799,24 @@ static inline bool local_timer_softirq_pending(void) return local_softirq_pending() & BIT(TIMER_SOFTIRQ); } +/* + * Read jiffies and the time when jiffies were updated last + */ +u64 get_jiffies_update(unsigned long *basej) +{ + unsigned long basejiff; + unsigned int seq; + u64 basemono; + + do { + seq = read_seqcount_begin(&jiffies_seq); + basemono = last_jiffies_update; + basejiff = jiffies; + } while (read_seqcount_retry(&jiffies_seq, seq)); + *basej = basejiff; + return basemono; +} + /** * tick_nohz_next_event() - return the clock monotonic based next event * @ts: pointer to tick_sched struct @@ -813,14 +831,8 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) { u64 basemono, next_tick, delta, expires; unsigned long basejiff; - unsigned int seq; - /* Read jiffies and the time when jiffies were updated last */ - do { - seq = read_seqcount_begin(&jiffies_seq); - basemono = last_jiffies_update; - basejiff = jiffies; - } while (read_seqcount_retry(&jiffies_seq, seq)); + basemono = get_jiffies_update(&basejiff); ts->last_jiffies = basejiff; ts->timer_expires_base = basemono; -- cgit v1.2.3-70-g09d2 From ffb7e01c4e654d5c8bf2ce2a4830b826fa1f149e Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Sun, 25 Feb 2024 23:54:53 +0100 Subject: tick/nohz: Remove duplicate between tick_nohz_switch_to_nohz() and tick_setup_sched_timer() The ts->sched_timer initialization work of tick_nohz_switch_to_nohz() is almost the same as that of tick_setup_sched_timer(), so adjust the latter to get it reused by tick_nohz_switch_to_nohz(). This also makes the low resolution mode sched_timer benefit from the tick skew boot option. Signed-off-by: Peng Liu Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240225225508.11587-2-frederic@kernel.org --- kernel/time/hrtimer.c | 2 +- kernel/time/tick-sched.c | 39 ++++++++++++++++++--------------------- kernel/time/tick-sched.h | 2 +- 3 files changed, 20 insertions(+), 23 deletions(-) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 1fd106af747d..95f1f351dcd9 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -747,7 +747,7 @@ static void hrtimer_switch_to_hres(void) base->hres_active = 1; hrtimer_resolution = HIGH_RES_NSEC; - tick_setup_sched_timer(); + tick_setup_sched_timer(NOHZ_MODE_HIGHRES); /* "Retrigger" the interrupt to get things going */ retrigger_next_event(NULL); } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 417bb7f880ca..d4901654148d 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1471,9 +1471,6 @@ static inline void tick_nohz_activate(struct tick_sched *ts, int mode) */ static void tick_nohz_switch_to_nohz(void) { - struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); - ktime_t next; - if (!tick_nohz_enabled) return; @@ -1482,16 +1479,9 @@ static void tick_nohz_switch_to_nohz(void) /* * Recycle the hrtimer in 'ts', so we can share the - * hrtimer_forward_now() function with the highres code. + * highres code. */ - hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); - /* Get the next period */ - next = tick_init_jiffy_update(); - - hrtimer_set_expires(&ts->sched_timer, next); - hrtimer_forward_now(&ts->sched_timer, TICK_NSEC); - tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); - tick_nohz_activate(ts, NOHZ_MODE_LOWRES); + tick_setup_sched_timer(NOHZ_MODE_LOWRES); } static inline void tick_nohz_irq_enter(void) @@ -1570,7 +1560,11 @@ static enum hrtimer_restart tick_nohz_highres_handler(struct hrtimer *timer) return HRTIMER_RESTART; } +#else +#define tick_nohz_highres_handler NULL +#endif /* CONFIG_HIGH_RES_TIMERS */ +#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS static int sched_skew_tick; static int __init skew_tick(char *str) @@ -1583,15 +1577,17 @@ early_param("skew_tick", skew_tick); /** * tick_setup_sched_timer - setup the tick emulation timer + * @mode: tick_nohz_mode to setup for */ -void tick_setup_sched_timer(void) +void tick_setup_sched_timer(int mode) { struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); - ktime_t now = ktime_get(); /* Emulate tick processing via per-CPU hrtimers: */ hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); - ts->sched_timer.function = tick_nohz_highres_handler; + + if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && mode == NOHZ_MODE_HIGHRES) + ts->sched_timer.function = tick_nohz_highres_handler; /* Get the next period (per-CPU) */ hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); @@ -1604,13 +1600,14 @@ void tick_setup_sched_timer(void) hrtimer_add_expires_ns(&ts->sched_timer, offset); } - hrtimer_forward(&ts->sched_timer, now, TICK_NSEC); - hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD); - tick_nohz_activate(ts, NOHZ_MODE_HIGHRES); + hrtimer_forward_now(&ts->sched_timer, TICK_NSEC); + if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && mode == NOHZ_MODE_HIGHRES) + hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD); + else + tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); + tick_nohz_activate(ts, mode); } -#endif /* HIGH_RES_TIMERS */ -#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS void tick_cancel_sched_timer(int cpu) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); @@ -1632,7 +1629,7 @@ void tick_cancel_sched_timer(int cpu) ts->idle_calls = idle_calls; ts->idle_sleeps = idle_sleeps; } -#endif +#endif /* CONFIG_NO_HZ_COMMON || CONFIG_HIGH_RES_TIMERS */ /* * Async notification about clocksource changes diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index 5ed5a9d41d5a..35808bbb8a47 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -102,7 +102,7 @@ struct tick_sched { extern struct tick_sched *tick_get_tick_sched(int cpu); -extern void tick_setup_sched_timer(void); +extern void tick_setup_sched_timer(int mode); #if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS extern void tick_cancel_sched_timer(int cpu); #else -- cgit v1.2.3-70-g09d2 From 37263ba0c44b13b2025398ac81919df9f0a91368 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Sun, 25 Feb 2024 23:54:54 +0100 Subject: tick/nohz: Remove duplicate between lowres and highres handlers tick_nohz_lowres_handler() does the same work as tick_nohz_highres_handler() plus the clockevent device reprogramming, so make the former reuse the latter and rename it accordingly. Signed-off-by: Peng Liu Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240225225508.11587-3-frederic@kernel.org --- kernel/time/tick-sched.c | 96 ++++++++++++++++++------------------------------ 1 file changed, 36 insertions(+), 60 deletions(-) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d4901654148d..88c992f48126 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -255,6 +255,40 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) update_process_times(user_mode(regs)); profile_tick(CPU_PROFILING); } + +/* + * We rearm the timer until we get disabled by the idle code. + * Called with interrupts disabled. + */ +static enum hrtimer_restart tick_nohz_handler(struct hrtimer *timer) +{ + struct tick_sched *ts = container_of(timer, struct tick_sched, sched_timer); + struct pt_regs *regs = get_irq_regs(); + ktime_t now = ktime_get(); + + tick_sched_do_timer(ts, now); + + /* + * Do not call when we are not in IRQ context and have + * no valid 'regs' pointer + */ + if (regs) + tick_sched_handle(ts, regs); + else + ts->next_tick = 0; + + /* + * In dynticks mode, tick reprogram is deferred: + * - to the idle task if in dynticks-idle + * - to IRQ exit if in full-dynticks. + */ + if (unlikely(ts->tick_stopped)) + return HRTIMER_NORESTART; + + hrtimer_forward(timer, now, TICK_NSEC); + + return HRTIMER_RESTART; +} #endif #ifdef CONFIG_NO_HZ_FULL @@ -1429,31 +1463,15 @@ void tick_nohz_idle_exit(void) * at the clockevent level. hrtimer can't be used instead, because its * infrastructure actually relies on the tick itself as a backend in * low-resolution mode (see hrtimer_run_queues()). - * - * This low-resolution handler still makes use of some hrtimer APIs meanwhile - * for convenience with expiration calculation and forwarding. */ static void tick_nohz_lowres_handler(struct clock_event_device *dev) { struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); - struct pt_regs *regs = get_irq_regs(); - ktime_t now = ktime_get(); dev->next_event = KTIME_MAX; - tick_sched_do_timer(ts, now); - tick_sched_handle(ts, regs); - - /* - * In dynticks mode, tick reprogram is deferred: - * - to the idle task if in dynticks-idle - * - to IRQ exit if in full-dynticks. - */ - if (likely(!ts->tick_stopped)) { - hrtimer_forward(&ts->sched_timer, now, TICK_NSEC); + if (likely(tick_nohz_handler(&ts->sched_timer) == HRTIMER_RESTART)) tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); - } - } static inline void tick_nohz_activate(struct tick_sched *ts, int mode) @@ -1522,48 +1540,6 @@ void tick_irq_enter(void) tick_nohz_irq_enter(); } -/* - * High resolution timer specific code - */ -#ifdef CONFIG_HIGH_RES_TIMERS -/* - * We rearm the timer until we get disabled by the idle code. - * Called with interrupts disabled. - */ -static enum hrtimer_restart tick_nohz_highres_handler(struct hrtimer *timer) -{ - struct tick_sched *ts = - container_of(timer, struct tick_sched, sched_timer); - struct pt_regs *regs = get_irq_regs(); - ktime_t now = ktime_get(); - - tick_sched_do_timer(ts, now); - - /* - * Do not call when we are not in IRQ context and have - * no valid 'regs' pointer - */ - if (regs) - tick_sched_handle(ts, regs); - else - ts->next_tick = 0; - - /* - * In dynticks mode, tick reprogram is deferred: - * - to the idle task if in dynticks-idle - * - to IRQ exit if in full-dynticks. - */ - if (unlikely(ts->tick_stopped)) - return HRTIMER_NORESTART; - - hrtimer_forward(timer, now, TICK_NSEC); - - return HRTIMER_RESTART; -} -#else -#define tick_nohz_highres_handler NULL -#endif /* CONFIG_HIGH_RES_TIMERS */ - #if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS static int sched_skew_tick; @@ -1587,7 +1563,7 @@ void tick_setup_sched_timer(int mode) hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && mode == NOHZ_MODE_HIGHRES) - ts->sched_timer.function = tick_nohz_highres_handler; + ts->sched_timer.function = tick_nohz_handler; /* Get the next period (per-CPU) */ hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); -- cgit v1.2.3-70-g09d2 From 3aedb7fcd88af25c215be098bf8ecb9ae8cb60ab Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 25 Feb 2024 23:54:55 +0100 Subject: tick/sched: Remove useless oneshot ifdeffery tick-sched.c is only built when CONFIG_TICK_ONESHOT=y, which is selected only if CONFIG_NO_HZ_COMMON=y or CONFIG_HIGH_RES_TIMERS=y. Therefore the related ifdeferry in this file is needless and can be removed. Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240225225508.11587-4-frederic@kernel.org --- kernel/time/tick-sched.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 88c992f48126..27aaecb2e50c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -43,7 +43,6 @@ struct tick_sched *tick_get_tick_sched(int cpu) return &per_cpu(tick_cpu_sched, cpu); } -#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS) /* * The time when the last jiffy update happened. Write access must hold * jiffies_lock and jiffies_seq. tick_nohz_next_event() needs to get a @@ -289,7 +288,6 @@ static enum hrtimer_restart tick_nohz_handler(struct hrtimer *timer) return HRTIMER_RESTART; } -#endif #ifdef CONFIG_NO_HZ_FULL cpumask_var_t tick_nohz_full_mask; @@ -635,7 +633,7 @@ void __init tick_nohz_init(void) pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n", cpumask_pr_args(tick_nohz_full_mask)); } -#endif +#endif /* #ifdef CONFIG_NO_HZ_FULL */ /* * NOHZ - aka dynamic tick functionality @@ -1540,7 +1538,6 @@ void tick_irq_enter(void) tick_nohz_irq_enter(); } -#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS static int sched_skew_tick; static int __init skew_tick(char *str) @@ -1605,7 +1602,6 @@ void tick_cancel_sched_timer(int cpu) ts->idle_calls = idle_calls; ts->idle_sleeps = idle_sleeps; } -#endif /* CONFIG_NO_HZ_COMMON || CONFIG_HIGH_RES_TIMERS */ /* * Async notification about clocksource changes -- cgit v1.2.3-70-g09d2 From 27dc08096ce498ec8b87fb12ce4b9932c8111898 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 25 Feb 2024 23:54:56 +0100 Subject: tick: Use IS_ENABLED() whenever possible Avoid ifdeferry if it can be converted to IS_ENABLED() whenever possible Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240225225508.11587-5-frederic@kernel.org --- kernel/time/tick-common.c | 4 +--- kernel/time/tick-sched.c | 14 +++++--------- 2 files changed, 6 insertions(+), 12 deletions(-) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index e9138cd7a0f5..0084e1ae2583 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -111,15 +111,13 @@ void tick_handle_periodic(struct clock_event_device *dev) tick_periodic(cpu); -#if defined(CONFIG_HIGH_RES_TIMERS) || defined(CONFIG_NO_HZ_COMMON) /* * The cpu might have transitioned to HIGHRES or NOHZ mode via * update_process_times() -> run_local_timers() -> * hrtimer_run_queues(). */ - if (dev->event_handler != tick_handle_periodic) + if (IS_ENABLED(CONFIG_TICK_ONESHOT) && dev->event_handler != tick_handle_periodic) return; -#endif if (!clockevent_state_oneshot(dev)) return; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 27aaecb2e50c..4e34967edc0d 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -186,7 +186,6 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) { int cpu = smp_processor_id(); -#ifdef CONFIG_NO_HZ_COMMON /* * Check if the do_timer duty was dropped. We don't care about * concurrency: This happens only when the CPU in charge went @@ -197,13 +196,13 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) * If nohz_full is enabled, this should not happen because the * 'tick_do_timer_cpu' CPU never relinquishes. */ - if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) { + if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && + unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) { #ifdef CONFIG_NO_HZ_FULL WARN_ON_ONCE(tick_nohz_full_running); #endif tick_do_timer_cpu = cpu; } -#endif /* Check if jiffies need an update */ if (tick_do_timer_cpu == cpu) @@ -230,7 +229,6 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) { -#ifdef CONFIG_NO_HZ_COMMON /* * When we are idle and the tick is stopped, we have to touch * the watchdog as we might not schedule for a really long @@ -239,7 +237,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) * idle" jiffy stamp so the idle accounting adjustment we do * when we go busy again does not account too many ticks. */ - if (ts->tick_stopped) { + if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && ts->tick_stopped) { touch_softlockup_watchdog_sched(); if (is_idle_task(current)) ts->idle_jiffies++; @@ -250,7 +248,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) */ ts->next_tick = 0; } -#endif + update_process_times(user_mode(regs)); profile_tick(CPU_PROFILING); } @@ -1587,10 +1585,8 @@ void tick_cancel_sched_timer(int cpu) ktime_t idle_sleeptime, iowait_sleeptime; unsigned long idle_calls, idle_sleeps; -# ifdef CONFIG_HIGH_RES_TIMERS - if (ts->sched_timer.base) + if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && ts->sched_timer.base) hrtimer_cancel(&ts->sched_timer); -# endif idle_sleeptime = ts->idle_sleeptime; iowait_sleeptime = ts->iowait_sleeptime; -- cgit v1.2.3-70-g09d2 From 3650f49bfb954119a33613672abe4ca3bbbf6243 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 25 Feb 2024 23:54:57 +0100 Subject: tick/sched: Rename tick_nohz_stop_sched_tick() to tick_nohz_full_stop_tick() tick_nohz_stop_sched_tick() is only about NOHZ_full and not about dynticks-idle. Reflect that in the function name to avoid confusion. Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240225225508.11587-6-frederic@kernel.org --- kernel/time/tick-sched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 4e34967edc0d..9f75f5621965 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1040,7 +1040,7 @@ static void tick_nohz_retain_tick(struct tick_sched *ts) } #ifdef CONFIG_NO_HZ_FULL -static void tick_nohz_stop_sched_tick(struct tick_sched *ts, int cpu) +static void tick_nohz_full_stop_tick(struct tick_sched *ts, int cpu) { if (tick_nohz_next_event(ts, cpu)) tick_nohz_stop_tick(ts, cpu); @@ -1075,7 +1075,7 @@ static void __tick_nohz_full_update_tick(struct tick_sched *ts, int cpu = smp_processor_id(); if (can_stop_full_tick(cpu, ts)) - tick_nohz_stop_sched_tick(ts, cpu); + tick_nohz_full_stop_tick(ts, cpu); else if (ts->tick_stopped) tick_nohz_restart_sched_tick(ts, now); #endif -- cgit v1.2.3-70-g09d2 From 60313c21c33abc08108bdd60390fa89563977e64 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 25 Feb 2024 23:54:58 +0100 Subject: tick/sched: Don't clear ts::next_tick again in can_stop_idle_tick() The tick sched structure is already cleared from tick_cancel_sched_timer(), so there is no need to clear that field again. Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240225225508.11587-7-frederic@kernel.org --- kernel/time/tick-sched.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 9f75f5621965..b17895de26b9 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1143,11 +1143,6 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) if (unlikely(!cpu_online(cpu))) { if (cpu == tick_do_timer_cpu) tick_do_timer_cpu = TICK_DO_TIMER_NONE; - /* - * Make sure the CPU doesn't get fooled by obsolete tick - * deadline if it comes back online later. - */ - ts->next_tick = 0; return false; } -- cgit v1.2.3-70-g09d2 From d9b1865c86aec7c515db5718e4820106c2c12db3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 25 Feb 2024 23:55:02 +0100 Subject: tick: Assume the tick can't be stopped in NOHZ_MODE_INACTIVE mode The full-nohz update function checks if the nohz mode is active before proceeding. It considers one exception though: if the tick is already stopped even though the nohz mode is inactive, it still moves on in order to update/restart the tick if needed. However in order for the tick to be stopped, the nohz_mode has to be either NOHZ_MODE_LOWRES or NOHZ_MODE_HIGHRES. Therefore it doesn't make sense to test if the tick is stopped before verifying NOHZ_MODE_INACTIVE mode. Remove the needless related condition. Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240225225508.11587-11-frederic@kernel.org --- kernel/time/tick-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index b17895de26b9..b79f5403433b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1086,7 +1086,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts) if (!tick_nohz_full_cpu(smp_processor_id())) return; - if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE) + if (ts->nohz_mode == NOHZ_MODE_INACTIVE) return; __tick_nohz_full_update_tick(ts, ktime_get()); -- cgit v1.2.3-70-g09d2 From a478ffb2ae234ee1ece2b84719762c54d304e2c7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 25 Feb 2024 23:55:04 +0100 Subject: tick: Move individual bit features to debuggable mask accesses The individual bitfields of struct tick_sched must be modified from IRQs disabled places, otherwise local modifications can race due to them sharing the same memory storage. The recent move of the "got_idle_tick" bitfield to its own storage shows that the use of these bitfields, as pretty as they look, can be as much error prone. In order to avoid future issues of the like and make sure that those bitfields are safely accessed, move those flags to an explicit mask along with a mutator function performing the basic IRQs disabled sanity check. Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240225225508.11587-13-frederic@kernel.org --- kernel/time/tick-sched.c | 88 ++++++++++++++++++++++++++++++------------------ kernel/time/tick-sched.h | 23 ++++++++----- kernel/time/timer_list.c | 5 ++- 3 files changed, 73 insertions(+), 43 deletions(-) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index b79f5403433b..4aa7ce04a72c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -180,6 +180,26 @@ static ktime_t tick_init_jiffy_update(void) return period; } +static inline int tick_sched_flag_test(struct tick_sched *ts, + unsigned long flag) +{ + return !!(ts->flags & flag); +} + +static inline void tick_sched_flag_set(struct tick_sched *ts, + unsigned long flag) +{ + lockdep_assert_irqs_disabled(); + ts->flags |= flag; +} + +static inline void tick_sched_flag_clear(struct tick_sched *ts, + unsigned long flag) +{ + lockdep_assert_irqs_disabled(); + ts->flags &= ~flag; +} + #define MAX_STALLED_JIFFIES 5 static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) @@ -223,7 +243,7 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) } } - if (ts->inidle) + if (tick_sched_flag_test(ts, TS_FLAG_INIDLE)) ts->got_idle_tick = 1; } @@ -237,7 +257,8 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) * idle" jiffy stamp so the idle accounting adjustment we do * when we go busy again does not account too many ticks. */ - if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && ts->tick_stopped) { + if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && + tick_sched_flag_test(ts, TS_FLAG_STOPPED)) { touch_softlockup_watchdog_sched(); if (is_idle_task(current)) ts->idle_jiffies++; @@ -279,7 +300,7 @@ static enum hrtimer_restart tick_nohz_handler(struct hrtimer *timer) * - to the idle task if in dynticks-idle * - to IRQ exit if in full-dynticks. */ - if (unlikely(ts->tick_stopped)) + if (unlikely(tick_sched_flag_test(ts, TS_FLAG_STOPPED))) return HRTIMER_NORESTART; hrtimer_forward(timer, now, TICK_NSEC); @@ -559,7 +580,7 @@ void __tick_nohz_task_switch(void) ts = this_cpu_ptr(&tick_cpu_sched); - if (ts->tick_stopped) { + if (tick_sched_flag_test(ts, TS_FLAG_STOPPED)) { if (atomic_read(¤t->tick_dep_mask) || atomic_read(¤t->signal->tick_dep_mask)) tick_nohz_full_kick(); @@ -656,14 +677,14 @@ bool tick_nohz_tick_stopped(void) { struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); - return ts->tick_stopped; + return tick_sched_flag_test(ts, TS_FLAG_STOPPED); } bool tick_nohz_tick_stopped_cpu(int cpu) { struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu); - return ts->tick_stopped; + return tick_sched_flag_test(ts, TS_FLAG_STOPPED); } /** @@ -693,7 +714,7 @@ static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now) { ktime_t delta; - if (WARN_ON_ONCE(!ts->idle_active)) + if (WARN_ON_ONCE(!tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE))) return; delta = ktime_sub(now, ts->idle_entrytime); @@ -705,7 +726,7 @@ static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now) ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); ts->idle_entrytime = now; - ts->idle_active = 0; + tick_sched_flag_clear(ts, TS_FLAG_IDLE_ACTIVE); write_seqcount_end(&ts->idle_sleeptime_seq); sched_clock_idle_wakeup_event(); @@ -715,7 +736,7 @@ static void tick_nohz_start_idle(struct tick_sched *ts) { write_seqcount_begin(&ts->idle_sleeptime_seq); ts->idle_entrytime = ktime_get(); - ts->idle_active = 1; + tick_sched_flag_set(ts, TS_FLAG_IDLE_ACTIVE); write_seqcount_end(&ts->idle_sleeptime_seq); sched_clock_idle_sleep_event(); @@ -737,7 +758,7 @@ static u64 get_cpu_sleep_time_us(struct tick_sched *ts, ktime_t *sleeptime, do { seq = read_seqcount_begin(&ts->idle_sleeptime_seq); - if (ts->idle_active && compute_delta) { + if (tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE) && compute_delta) { ktime_t delta = ktime_sub(now, ts->idle_entrytime); idle = ktime_add(*sleeptime, delta); @@ -905,7 +926,7 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) * We've not stopped the tick yet, and there's a timer in the * next period, so no point in stopping it either, bail. */ - if (!ts->tick_stopped) { + if (!tick_sched_flag_test(ts, TS_FLAG_STOPPED)) { ts->timer_expires = 0; goto out; } @@ -918,7 +939,8 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) */ delta = timekeeping_max_deferment(); if (cpu != tick_do_timer_cpu && - (tick_do_timer_cpu != TICK_DO_TIMER_NONE || !ts->do_timer_last)) + (tick_do_timer_cpu != TICK_DO_TIMER_NONE || + !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST))) delta = KTIME_MAX; /* Calculate the next expiry time */ @@ -938,7 +960,7 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); unsigned long basejiff = ts->last_jiffies; u64 basemono = ts->timer_expires_base; - bool timer_idle = ts->tick_stopped; + bool timer_idle = tick_sched_flag_test(ts, TS_FLAG_STOPPED); u64 expires; /* Make sure we won't be trying to stop it twice in a row. */ @@ -978,13 +1000,13 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) */ if (cpu == tick_do_timer_cpu) { tick_do_timer_cpu = TICK_DO_TIMER_NONE; - ts->do_timer_last = 1; + tick_sched_flag_set(ts, TS_FLAG_DO_TIMER_LAST); } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) { - ts->do_timer_last = 0; + tick_sched_flag_clear(ts, TS_FLAG_DO_TIMER_LAST); } /* Skip reprogram of event if it's not changed */ - if (ts->tick_stopped && (expires == ts->next_tick)) { + if (tick_sched_flag_test(ts, TS_FLAG_STOPPED) && (expires == ts->next_tick)) { /* Sanity check: make sure clockevent is actually programmed */ if (expires == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer)) return; @@ -1002,12 +1024,12 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) * call we save the current tick time, so we can restart the * scheduler tick in tick_nohz_restart_sched_tick(). */ - if (!ts->tick_stopped) { + if (!tick_sched_flag_test(ts, TS_FLAG_STOPPED)) { calc_load_nohz_start(); quiet_vmstat(); ts->last_tick = hrtimer_get_expires(&ts->sched_timer); - ts->tick_stopped = 1; + tick_sched_flag_set(ts, TS_FLAG_STOPPED); trace_tick_stop(1, TICK_DEP_MASK_NONE); } @@ -1064,7 +1086,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) touch_softlockup_watchdog_sched(); /* Cancel the scheduled timer and restore the tick: */ - ts->tick_stopped = 0; + tick_sched_flag_clear(ts, TS_FLAG_STOPPED); tick_nohz_restart(ts, now); } @@ -1076,7 +1098,7 @@ static void __tick_nohz_full_update_tick(struct tick_sched *ts, if (can_stop_full_tick(cpu, ts)) tick_nohz_full_stop_tick(ts, cpu); - else if (ts->tick_stopped) + else if (tick_sched_flag_test(ts, TS_FLAG_STOPPED)) tick_nohz_restart_sched_tick(ts, now); #endif } @@ -1196,14 +1218,14 @@ void tick_nohz_idle_stop_tick(void) ts->idle_calls++; if (expires > 0LL) { - int was_stopped = ts->tick_stopped; + int was_stopped = tick_sched_flag_test(ts, TS_FLAG_STOPPED); tick_nohz_stop_tick(ts, cpu); ts->idle_sleeps++; ts->idle_expires = expires; - if (!was_stopped && ts->tick_stopped) { + if (!was_stopped && tick_sched_flag_test(ts, TS_FLAG_STOPPED)) { ts->idle_jiffies = ts->last_jiffies; nohz_balance_enter_idle(cpu); } @@ -1234,7 +1256,7 @@ void tick_nohz_idle_enter(void) WARN_ON_ONCE(ts->timer_expires_base); - ts->inidle = 1; + tick_sched_flag_set(ts, TS_FLAG_INIDLE); tick_nohz_start_idle(ts); local_irq_enable(); @@ -1263,7 +1285,7 @@ void tick_nohz_irq_exit(void) { struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); - if (ts->inidle) + if (tick_sched_flag_test(ts, TS_FLAG_INIDLE)) tick_nohz_start_idle(ts); else tick_nohz_full_update_tick(ts); @@ -1317,7 +1339,7 @@ ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next) ktime_t now = ts->idle_entrytime; ktime_t next_event; - WARN_ON_ONCE(!ts->inidle); + WARN_ON_ONCE(!tick_sched_flag_test(ts, TS_FLAG_INIDLE)); *delta_next = ktime_sub(dev->next_event, now); @@ -1389,7 +1411,7 @@ void tick_nohz_idle_restart_tick(void) { struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); - if (ts->tick_stopped) { + if (tick_sched_flag_test(ts, TS_FLAG_STOPPED)) { ktime_t now = ktime_get(); tick_nohz_restart_sched_tick(ts, now); tick_nohz_account_idle_time(ts, now); @@ -1430,12 +1452,12 @@ void tick_nohz_idle_exit(void) local_irq_disable(); - WARN_ON_ONCE(!ts->inidle); + WARN_ON_ONCE(!tick_sched_flag_test(ts, TS_FLAG_INIDLE)); WARN_ON_ONCE(ts->timer_expires_base); - ts->inidle = 0; - idle_active = ts->idle_active; - tick_stopped = ts->tick_stopped; + tick_sched_flag_clear(ts, TS_FLAG_INIDLE); + idle_active = tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE); + tick_stopped = tick_sched_flag_test(ts, TS_FLAG_STOPPED); if (idle_active || tick_stopped) now = ktime_get(); @@ -1498,10 +1520,10 @@ static inline void tick_nohz_irq_enter(void) struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); ktime_t now; - if (!ts->idle_active && !ts->tick_stopped) + if (!tick_sched_flag_test(ts, TS_FLAG_STOPPED | TS_FLAG_IDLE_ACTIVE)) return; now = ktime_get(); - if (ts->idle_active) + if (tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE)) tick_nohz_stop_idle(ts, now); /* * If all CPUs are idle we may need to update a stale jiffies value. @@ -1510,7 +1532,7 @@ static inline void tick_nohz_irq_enter(void) * rare case (typically stop machine). So we must make sure we have a * last resort. */ - if (ts->tick_stopped) + if (tick_sched_flag_test(ts, TS_FLAG_STOPPED)) tick_nohz_update_jiffies(now); } diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index 3b555e0fa937..07a4c0144c47 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -20,14 +20,22 @@ enum tick_nohz_mode { NOHZ_MODE_HIGHRES, }; +/* The CPU is in the tick idle mode */ +#define TS_FLAG_INIDLE BIT(0) +/* The idle tick has been stopped */ +#define TS_FLAG_STOPPED BIT(1) +/* + * Indicator that the CPU is actively in the tick idle mode; + * it is reset during irq handling phases. + */ +#define TS_FLAG_IDLE_ACTIVE BIT(2) +/* CPU was the last one doing do_timer before going idle */ +#define TS_FLAG_DO_TIMER_LAST BIT(3) + /** * struct tick_sched - sched tick emulation and no idle tick control/stats * - * @inidle: Indicator that the CPU is in the tick idle mode - * @tick_stopped: Indicator that the idle tick has been stopped - * @idle_active: Indicator that the CPU is actively in the tick idle mode; - * it is reset during irq handling phases. - * @do_timer_last: CPU was the last one doing do_timer before going idle + * @flags: State flags gathering the TS_FLAG_* features * @got_idle_tick: Tick timer function has run with @inidle set * @stalled_jiffies: Number of stalled jiffies detected across ticks * @last_tick_jiffies: Value of jiffies seen on last tick @@ -57,10 +65,7 @@ enum tick_nohz_mode { */ struct tick_sched { /* Common flags */ - unsigned int inidle : 1; - unsigned int tick_stopped : 1; - unsigned int idle_active : 1; - unsigned int do_timer_last : 1; + unsigned long flags; /* Tick handling: jiffies stall check */ unsigned int stalled_jiffies; diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index ed7d6ad694fb..38f81d836fc5 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -147,11 +147,14 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now) # define P_ns(x) \ SEQ_printf(m, " .%-15s: %Lu nsecs\n", #x, \ (unsigned long long)(ktime_to_ns(ts->x))) +# define P_flag(x, f) \ + SEQ_printf(m, " .%-15s: %d\n", #x, !!(ts->flags & (f))) + { struct tick_sched *ts = tick_get_tick_sched(cpu); P(nohz_mode); P_ns(last_tick); - P(tick_stopped); + P_flag(tick_stopped, TS_FLAG_STOPPED); P(idle_jiffies); P(idle_calls); P(idle_sleeps); -- cgit v1.2.3-70-g09d2 From 7988e5ae2be70b110db9d4b8ec163bd42e67d3be Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 25 Feb 2024 23:55:05 +0100 Subject: tick: Split nohz and highres features from nohz_mode The nohz mode field tells about low resolution nohz mode or high resolution nohz mode but it doesn't tell about high resolution non-nohz mode. In order to retrieve the latter state, tick_cancel_sched_timer() must fiddle with struct hrtimer's internals to guess if the tick has been initialized in high resolution. Move instead the nohz mode field information into the tick flags and provide two new bits: one to know if the tick is in nohz mode and another one to know if the tick is in high resolution. The combination of those two flags provides all the needed informations to determine which of the three tick modes is running. Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240225225508.11587-14-frederic@kernel.org --- kernel/time/hrtimer.c | 2 +- kernel/time/tick-sched.c | 32 +++++++++++++++++--------------- kernel/time/tick-sched.h | 13 +++++-------- kernel/time/timer_list.c | 5 +++-- 4 files changed, 26 insertions(+), 26 deletions(-) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 3e95474199ac..70625dff62ce 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -747,7 +747,7 @@ static void hrtimer_switch_to_hres(void) base->hres_active = 1; hrtimer_resolution = HIGH_RES_NSEC; - tick_setup_sched_timer(NOHZ_MODE_HIGHRES); + tick_setup_sched_timer(true); /* "Retrigger" the interrupt to get things going */ retrigger_next_event(NULL); } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 4aa7ce04a72c..dcb9f0394182 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -831,7 +831,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) /* Forward the time to expire in the future */ hrtimer_forward(&ts->sched_timer, now, TICK_NSEC); - if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { + if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES)) { hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD); } else { @@ -1040,14 +1040,14 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) * the tick timer. */ if (unlikely(expires == KTIME_MAX)) { - if (ts->nohz_mode == NOHZ_MODE_HIGHRES) + if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES)) hrtimer_cancel(&ts->sched_timer); else tick_program_event(KTIME_MAX, 1); return; } - if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { + if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES)) { hrtimer_start(&ts->sched_timer, expires, HRTIMER_MODE_ABS_PINNED_HARD); } else { @@ -1108,7 +1108,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts) if (!tick_nohz_full_cpu(smp_processor_id())) return; - if (ts->nohz_mode == NOHZ_MODE_INACTIVE) + if (!tick_sched_flag_test(ts, TS_FLAG_NOHZ)) return; __tick_nohz_full_update_tick(ts, ktime_get()); @@ -1168,7 +1168,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) return false; } - if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) + if (unlikely(!tick_sched_flag_test(ts, TS_FLAG_NOHZ))) return false; if (need_resched()) @@ -1487,11 +1487,11 @@ static void tick_nohz_lowres_handler(struct clock_event_device *dev) tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); } -static inline void tick_nohz_activate(struct tick_sched *ts, int mode) +static inline void tick_nohz_activate(struct tick_sched *ts) { if (!tick_nohz_enabled) return; - ts->nohz_mode = mode; + tick_sched_flag_set(ts, TS_FLAG_NOHZ); /* One update is enough */ if (!test_and_set_bit(0, &tick_nohz_active)) timers_update_nohz(); @@ -1512,7 +1512,7 @@ static void tick_nohz_switch_to_nohz(void) * Recycle the hrtimer in 'ts', so we can share the * highres code. */ - tick_setup_sched_timer(NOHZ_MODE_LOWRES); + tick_setup_sched_timer(false); } static inline void tick_nohz_irq_enter(void) @@ -1540,7 +1540,7 @@ static inline void tick_nohz_irq_enter(void) static inline void tick_nohz_switch_to_nohz(void) { } static inline void tick_nohz_irq_enter(void) { } -static inline void tick_nohz_activate(struct tick_sched *ts, int mode) { } +static inline void tick_nohz_activate(struct tick_sched *ts) { } #endif /* CONFIG_NO_HZ_COMMON */ @@ -1567,15 +1567,17 @@ early_param("skew_tick", skew_tick); * tick_setup_sched_timer - setup the tick emulation timer * @mode: tick_nohz_mode to setup for */ -void tick_setup_sched_timer(int mode) +void tick_setup_sched_timer(bool hrtimer) { struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); /* Emulate tick processing via per-CPU hrtimers: */ hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); - if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && mode == NOHZ_MODE_HIGHRES) + if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && hrtimer) { + tick_sched_flag_set(ts, TS_FLAG_HIGHRES); ts->sched_timer.function = tick_nohz_handler; + } /* Get the next period (per-CPU) */ hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); @@ -1589,11 +1591,11 @@ void tick_setup_sched_timer(int mode) } hrtimer_forward_now(&ts->sched_timer, TICK_NSEC); - if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && mode == NOHZ_MODE_HIGHRES) + if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && hrtimer) hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD); else tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); - tick_nohz_activate(ts, mode); + tick_nohz_activate(ts); } void tick_cancel_sched_timer(int cpu) @@ -1602,7 +1604,7 @@ void tick_cancel_sched_timer(int cpu) ktime_t idle_sleeptime, iowait_sleeptime; unsigned long idle_calls, idle_sleeps; - if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && ts->sched_timer.base) + if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES)) hrtimer_cancel(&ts->sched_timer); idle_sleeptime = ts->idle_sleeptime; @@ -1652,7 +1654,7 @@ int tick_check_oneshot_change(int allow_nohz) if (!test_and_clear_bit(0, &ts->check_clocks)) return 0; - if (ts->nohz_mode != NOHZ_MODE_INACTIVE) + if (tick_sched_flag_test(ts, TS_FLAG_NOHZ)) return 0; if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available()) diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index 07a4c0144c47..bbe72a078985 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -14,12 +14,6 @@ struct tick_device { enum tick_device_mode mode; }; -enum tick_nohz_mode { - NOHZ_MODE_INACTIVE, - NOHZ_MODE_LOWRES, - NOHZ_MODE_HIGHRES, -}; - /* The CPU is in the tick idle mode */ #define TS_FLAG_INIDLE BIT(0) /* The idle tick has been stopped */ @@ -31,6 +25,10 @@ enum tick_nohz_mode { #define TS_FLAG_IDLE_ACTIVE BIT(2) /* CPU was the last one doing do_timer before going idle */ #define TS_FLAG_DO_TIMER_LAST BIT(3) +/* NO_HZ is enabled */ +#define TS_FLAG_NOHZ BIT(4) +/* High resolution tick mode */ +#define TS_FLAG_HIGHRES BIT(5) /** * struct tick_sched - sched tick emulation and no idle tick control/stats @@ -84,7 +82,6 @@ struct tick_sched { ktime_t idle_entrytime; /* Tick stop */ - enum tick_nohz_mode nohz_mode; unsigned long last_jiffies; u64 timer_expires_base; u64 timer_expires; @@ -107,7 +104,7 @@ struct tick_sched { extern struct tick_sched *tick_get_tick_sched(int cpu); -extern void tick_setup_sched_timer(int mode); +extern void tick_setup_sched_timer(bool hrtimer); #if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS extern void tick_cancel_sched_timer(int cpu); #else diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 38f81d836fc5..1c311c46da50 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -152,7 +152,8 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now) { struct tick_sched *ts = tick_get_tick_sched(cpu); - P(nohz_mode); + P_flag(nohz, TS_FLAG_NOHZ); + P_flag(highres, TS_FLAG_HIGHRES); P_ns(last_tick); P_flag(tick_stopped, TS_FLAG_STOPPED); P(idle_jiffies); @@ -259,7 +260,7 @@ static void timer_list_show_tickdevices_header(struct seq_file *m) static inline void timer_list_header(struct seq_file *m, u64 now) { - SEQ_printf(m, "Timer List Version: v0.9\n"); + SEQ_printf(m, "Timer List Version: v0.10\n"); SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); SEQ_printf(m, "\n"); -- cgit v1.2.3-70-g09d2 From 3f69d04e146c6e14ccdd4e7b37d93f789229202a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 25 Feb 2024 23:55:06 +0100 Subject: tick: Shut down low-res tick from dying CPU The timekeeping duty is handed over from the outgoing CPU within stop machine. This works well if CONFIG_NO_HZ_COMMON=n or the tick is in high-res mode. However in low-res dynticks mode, the tick isn't cancelled until the clockevent is shut down, which can happen later. The tick may therefore fire again once IRQs are re-enabled on stop machine and until IRQs are disabled for good upon the last call to idle. That's so many opportunities for a timekeeper to go idle and the outgoing CPU to take over that duty. This is why tick_nohz_idle_stop_tick() is called one last time on idle if the CPU is seen offline: so that the timekeeping duty is handed over again in case the CPU has re-taken the duty. This means there are two timekeeping handovers on CPU down hotplug with different undocumented constraints and purposes: 1) A handover on stop machine for !dynticks || highres. All online CPUs are guaranteed to be non-idle and the timekeeping duty can be safely handed-over. The hrtimer tick is cancelled so it is guaranteed that in dynticks mode the outgoing CPU won't take again the duty. 2) A handover on last idle call for dynticks && lowres. Setting the duty to TICK_DO_TIMER_NONE makes sure that a CPU will take over the timekeeping. Prepare for consolidating the handover to a single place (the first one) with shutting down the low-res tick as well from tick_cancel_sched_timer() as well. This will simplify the handover and unify the tick cancellation between high-res and low-res. Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240225225508.11587-15-frederic@kernel.org --- kernel/time/tick-common.c | 3 ++- kernel/time/tick-sched.c | 32 +++++++++++++++++++++++++------- kernel/time/tick-sched.h | 4 ++-- 3 files changed, 29 insertions(+), 10 deletions(-) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 522414089c0d..9cd09eea06d6 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -410,7 +410,8 @@ int tick_cpu_dying(unsigned int dying_cpu) if (tick_do_timer_cpu == dying_cpu) tick_do_timer_cpu = cpumask_first(cpu_online_mask); - tick_cancel_sched_timer(dying_cpu); + /* Make sure the CPU won't try to retake the timekeeping duty */ + tick_sched_timer_dying(dying_cpu); /* Remove CPU from timer broadcasting */ tick_offline_cpu(dying_cpu); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index dcb9f0394182..89d16b8ea2c4 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -308,6 +308,14 @@ static enum hrtimer_restart tick_nohz_handler(struct hrtimer *timer) return HRTIMER_RESTART; } +static void tick_sched_timer_cancel(struct tick_sched *ts) +{ + if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES)) + hrtimer_cancel(&ts->sched_timer); + else if (tick_sched_flag_test(ts, TS_FLAG_NOHZ)) + tick_program_event(KTIME_MAX, 1); +} + #ifdef CONFIG_NO_HZ_FULL cpumask_var_t tick_nohz_full_mask; EXPORT_SYMBOL_GPL(tick_nohz_full_mask); @@ -1040,10 +1048,7 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) * the tick timer. */ if (unlikely(expires == KTIME_MAX)) { - if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES)) - hrtimer_cancel(&ts->sched_timer); - else - tick_program_event(KTIME_MAX, 1); + tick_sched_timer_cancel(ts); return; } @@ -1598,14 +1603,27 @@ void tick_setup_sched_timer(bool hrtimer) tick_nohz_activate(ts); } -void tick_cancel_sched_timer(int cpu) +/* + * Shut down the tick and make sure the CPU won't try to retake the timekeeping + * duty before disabling IRQs in idle for the last time. + */ +void tick_sched_timer_dying(int cpu) { + struct tick_device *td = &per_cpu(tick_cpu_device, cpu); struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + struct clock_event_device *dev = td->evtdev; ktime_t idle_sleeptime, iowait_sleeptime; unsigned long idle_calls, idle_sleeps; - if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES)) - hrtimer_cancel(&ts->sched_timer); + /* This must happen before hrtimers are migrated! */ + tick_sched_timer_cancel(ts); + + /* + * If the clockevents doesn't support CLOCK_EVT_STATE_ONESHOT_STOPPED, + * make sure not to call low-res tick handler. + */ + if (tick_sched_flag_test(ts, TS_FLAG_NOHZ)) + dev->event_handler = clockevents_handle_noop; idle_sleeptime = ts->idle_sleeptime; iowait_sleeptime = ts->iowait_sleeptime; diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index bbe72a078985..58d8d1c49dd3 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -106,9 +106,9 @@ extern struct tick_sched *tick_get_tick_sched(int cpu); extern void tick_setup_sched_timer(bool hrtimer); #if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS -extern void tick_cancel_sched_timer(int cpu); +extern void tick_sched_timer_dying(int cpu); #else -static inline void tick_cancel_sched_timer(int cpu) { } +static inline void tick_sched_timer_dying(int cpu) { } #endif #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST -- cgit v1.2.3-70-g09d2 From 500f8f9bced86f0c0f2482773bd64a1b7ec9c4e1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 25 Feb 2024 23:55:07 +0100 Subject: tick: Assume timekeeping is correctly handed over upon last offline idle call The timekeeping duty is handed over from the outgoing CPU on stop machine, then the oneshot tick is stopped right after. Therefore it's guaranteed that the current CPU isn't the timekeeper upon its last call to idle. Besides, calling tick_nohz_idle_stop_tick() while the dying CPU goes into idle suggests that the tick is going to be stopped while it is actually stopped already from the appropriate CPU hotplug state. Remove the confusing call and the obsolete case handling and convert it to a sanity check that verifies the above assumption. Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240225225508.11587-16-frederic@kernel.org --- include/linux/tick.h | 2 ++ kernel/cpu.c | 1 + kernel/sched/idle.c | 1 - kernel/time/tick-common.c | 4 ++++ kernel/time/tick-sched.c | 13 +------------ 5 files changed, 8 insertions(+), 13 deletions(-) (limited to 'kernel/time/tick-sched.c') diff --git a/include/linux/tick.h b/include/linux/tick.h index c7840ae8ebaf..44fddfa93e18 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -29,8 +29,10 @@ static inline void tick_cleanup_dead_cpu(int cpu) { } #if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_HOTPLUG_CPU) extern int tick_cpu_dying(unsigned int cpu); +extern void tick_assert_timekeeping_handover(void); #else #define tick_cpu_dying NULL +static inline void tick_assert_timekeeping_handover(void) { } #endif #if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_SUSPEND) diff --git a/kernel/cpu.c b/kernel/cpu.c index 5a8ad4f5ccf3..7e84a7b0675e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1399,6 +1399,7 @@ void cpuhp_report_idle_dead(void) struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); BUG_ON(st->state != CPUHP_AP_OFFLINE); + tick_assert_timekeeping_handover(); rcutree_report_cpu_dead(); st->state = CPUHP_AP_IDLE_DEAD; /* diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 31231925f1ec..b15d40cad7ea 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -291,7 +291,6 @@ static void do_idle(void) local_irq_disable(); if (cpu_is_offline(cpu)) { - tick_nohz_idle_stop_tick(); cpuhp_report_idle_dead(); arch_cpu_idle_dead(); } diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 9cd09eea06d6..fb0fdec8719a 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -396,6 +396,10 @@ int tick_broadcast_oneshot_control(enum tick_broadcast_state state) EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control); #ifdef CONFIG_HOTPLUG_CPU +void tick_assert_timekeeping_handover(void) +{ + WARN_ON_ONCE(tick_do_timer_cpu == smp_processor_id()); +} /* * Stop the tick and transfer the timekeeping job away from a dying cpu. */ diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 89d16b8ea2c4..269e21590df5 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1160,18 +1160,7 @@ static bool report_idle_softirq(void) static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) { - /* - * If this CPU is offline and it is the one which updates - * jiffies, then give up the assignment and let it be taken by - * the CPU which runs the tick timer next. If we don't drop - * this here, the jiffies might be stale and do_timer() never - * gets invoked. - */ - if (unlikely(!cpu_online(cpu))) { - if (cpu == tick_do_timer_cpu) - tick_do_timer_cpu = TICK_DO_TIMER_NONE; - return false; - } + WARN_ON_ONCE(cpu_is_offline(cpu)); if (unlikely(!tick_sched_flag_test(ts, TS_FLAG_NOHZ))) return false; -- cgit v1.2.3-70-g09d2