From 27dc08096ce498ec8b87fb12ce4b9932c8111898 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 25 Feb 2024 23:54:56 +0100 Subject: tick: Use IS_ENABLED() whenever possible Avoid ifdeferry if it can be converted to IS_ENABLED() whenever possible Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240225225508.11587-5-frederic@kernel.org --- kernel/time/tick-common.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'kernel/time/tick-common.c') diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index e9138cd7a0f5..0084e1ae2583 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -111,15 +111,13 @@ void tick_handle_periodic(struct clock_event_device *dev) tick_periodic(cpu); -#if defined(CONFIG_HIGH_RES_TIMERS) || defined(CONFIG_NO_HZ_COMMON) /* * The cpu might have transitioned to HIGHRES or NOHZ mode via * update_process_times() -> run_local_timers() -> * hrtimer_run_queues(). */ - if (dev->event_handler != tick_handle_periodic) + if (IS_ENABLED(CONFIG_TICK_ONESHOT) && dev->event_handler != tick_handle_periodic) return; -#endif if (!clockevent_state_oneshot(dev)) return; -- cgit v1.2.3-70-g09d2 From 3ad6eb0683a1edbb4bb117b85d61f17a879155a1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 25 Feb 2024 23:54:59 +0100 Subject: tick: Start centralizing tick related CPU hotplug operations During the CPU offlining process, the various timer tick features are shut down from scattered places, sometimes from teardown callbacks on stop machine, sometimes through explicit calls, sometimes from the control CPU after the CPU died. The reason why these shutdown operations are spread around is not always clear and it makes the tick lifecycle hard to follow. The tick should be shut down in order from highest to lowest level: On stop machine from the dying CPU (high-level): 1) Hand-over the timekeeping duty (tick_handover_do_timer()) 2) Cancel the tick implementation called by the clockevent callback (tick_cancel_sched_timer()) 3) Shutdown broadcasting (tick_offline_cpu() / tick_broadcast_offline()) On stop machine from the dying CPU (low-level): 4) Shutdown clockevents drivers (CPUHP_AP_*_TIMER_STARTING states) From the control CPU after the CPU died (low-level): 5) Shutdown/unregister/cleanup clockevents for the dead CPU (tick_cleanup_dead_cpu()) Instead the current order is 2, 4 (both from CPU hotplug states), then 1 and 3 through direct calls. This layout and order don't make much sense. The operations 1, 2, 3 should be gathered together and in order. Sort this situation with creating a new TICK shut-down CPU hotplug state and start with introducing the timekeeping duty hand-over there. The state must precede hrtimers migration because the tick hrtimer will be stopped from it in a further patch. Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240225225508.11587-8-frederic@kernel.org --- include/linux/cpuhotplug.h | 1 + include/linux/tick.h | 8 ++++++-- kernel/cpu.c | 8 +++++--- kernel/time/tick-common.c | 17 +++++++++++------ 4 files changed, 23 insertions(+), 11 deletions(-) (limited to 'kernel/time/tick-common.c') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 7651904c6db5..35e78ddb2b37 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -184,6 +184,7 @@ enum cpuhp_state { CPUHP_AP_ARM64_ISNDEP_STARTING, CPUHP_AP_SMPCFD_DYING, CPUHP_AP_HRTIMERS_DYING, + CPUHP_AP_TICK_DYING, CPUHP_AP_X86_TBOOT_DYING, CPUHP_AP_ARM_CACHE_B15_RAC_DYING, CPUHP_AP_ONLINE, diff --git a/include/linux/tick.h b/include/linux/tick.h index 716d17f31c45..afff4c207bd8 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -19,16 +19,20 @@ extern void __init tick_init(void); extern void tick_suspend_local(void); /* Should be core only, but XEN resume magic and ARM BL switcher require it */ extern void tick_resume_local(void); -extern void tick_handover_do_timer(void); extern void tick_cleanup_dead_cpu(int cpu); #else /* CONFIG_GENERIC_CLOCKEVENTS */ static inline void tick_init(void) { } static inline void tick_suspend_local(void) { } static inline void tick_resume_local(void) { } -static inline void tick_handover_do_timer(void) { } static inline void tick_cleanup_dead_cpu(int cpu) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ +#if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_HOTPLUG_CPU) +extern int tick_cpu_dying(unsigned int cpu); +#else +#define tick_cpu_dying NULL +#endif + #if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_SUSPEND) extern void tick_freeze(void); extern void tick_unfreeze(void); diff --git a/kernel/cpu.c b/kernel/cpu.c index e6ec3ba4950b..263508073da8 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1324,8 +1324,6 @@ static int take_cpu_down(void *_param) */ cpuhp_invoke_callback_range_nofail(false, cpu, st, target); - /* Give up timekeeping duties */ - tick_handover_do_timer(); /* Remove CPU from timer broadcasting */ tick_offline_cpu(cpu); /* Park the stopper thread */ @@ -2205,7 +2203,11 @@ static struct cpuhp_step cpuhp_hp_states[] = { .startup.single = NULL, .teardown.single = hrtimers_cpu_dying, }, - + [CPUHP_AP_TICK_DYING] = { + .name = "tick:dying", + .startup.single = NULL, + .teardown.single = tick_cpu_dying, + }, /* Entry state on starting. Interrupts enabled from here on. Transient * state for synchronsization */ [CPUHP_AP_ONLINE] = { diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 0084e1ae2583..a89ef450fda7 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -397,15 +397,20 @@ EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control); #ifdef CONFIG_HOTPLUG_CPU /* - * Transfer the do_timer job away from a dying cpu. - * - * Called with interrupts disabled. No locking required. If - * tick_do_timer_cpu is owned by this cpu, nothing can change it. + * Stop the tick and transfer the timekeeping job away from a dying cpu. */ -void tick_handover_do_timer(void) +int tick_cpu_dying(unsigned int dying_cpu) { - if (tick_do_timer_cpu == smp_processor_id()) + /* + * If the current CPU is the timekeeper, it's the only one that + * can safely hand over its duty. Also all online CPUs are in + * stop machine, guaranteed not to be idle, therefore it's safe + * to pick any online successor. + */ + if (tick_do_timer_cpu == dying_cpu) tick_do_timer_cpu = cpumask_first(cpu_online_mask); + + return 0; } /* -- cgit v1.2.3-70-g09d2 From f04e51220ad5cf35540f67f3ca15c8617c1f0bef Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 25 Feb 2024 23:55:00 +0100 Subject: tick: Move tick cancellation up to CPUHP_AP_TICK_DYING The tick hrtimer is cancelled right before hrtimers are migrated. This is done from the hrtimer subsystem even though it shouldn't know about its actual users. Move instead the tick hrtimer cancellation to the relevant CPU hotplug state that aims at centralizing high level tick shutdown operations so that the related flow is easy to follow. Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240225225508.11587-9-frederic@kernel.org --- kernel/time/hrtimer.c | 2 -- kernel/time/tick-common.c | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/time/tick-common.c') diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 95f1f351dcd9..3e95474199ac 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -2229,8 +2229,6 @@ int hrtimers_cpu_dying(unsigned int dying_cpu) int i, ncpu = cpumask_any_and(cpu_active_mask, housekeeping_cpumask(HK_TYPE_TIMER)); struct hrtimer_cpu_base *old_base, *new_base; - tick_cancel_sched_timer(dying_cpu); - old_base = this_cpu_ptr(&hrtimer_bases); new_base = &per_cpu(hrtimer_bases, ncpu); diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index a89ef450fda7..b4af8c743b73 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -410,6 +410,8 @@ int tick_cpu_dying(unsigned int dying_cpu) if (tick_do_timer_cpu == dying_cpu) tick_do_timer_cpu = cpumask_first(cpu_online_mask); + tick_cancel_sched_timer(dying_cpu); + return 0; } -- cgit v1.2.3-70-g09d2 From ef8969bb552c1c75e997a42d3e2c576b6ed4025a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 25 Feb 2024 23:55:01 +0100 Subject: tick: Move broadcast cancellation up to CPUHP_AP_TICK_DYING The broadcast shutdown code is executed through a random explicit call within stop machine from the outgoing CPU. However the tick broadcast is a midware between the tick callback and the clocksource, therefore it makes more sense to shut it down after the tick callback and before the clocksource drivers. Move it instead to the common tick shutdown CPU hotplug state where related operations can be ordered from highest to lowest level. Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240225225508.11587-10-frederic@kernel.org --- include/linux/tick.h | 6 ------ kernel/cpu.c | 2 -- kernel/time/tick-common.c | 3 +++ kernel/time/tick-internal.h | 2 ++ 4 files changed, 5 insertions(+), 8 deletions(-) (limited to 'kernel/time/tick-common.c') diff --git a/include/linux/tick.h b/include/linux/tick.h index afff4c207bd8..c7840ae8ebaf 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -73,12 +73,6 @@ extern void tick_broadcast_control(enum tick_broadcast_mode mode); static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { } #endif /* BROADCAST */ -#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_HOTPLUG_CPU) -extern void tick_offline_cpu(unsigned int cpu); -#else -static inline void tick_offline_cpu(unsigned int cpu) { } -#endif - #ifdef CONFIG_GENERIC_CLOCKEVENTS extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state); #else diff --git a/kernel/cpu.c b/kernel/cpu.c index 263508073da8..5a8ad4f5ccf3 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1324,8 +1324,6 @@ static int take_cpu_down(void *_param) */ cpuhp_invoke_callback_range_nofail(false, cpu, st, target); - /* Remove CPU from timer broadcasting */ - tick_offline_cpu(cpu); /* Park the stopper thread */ stop_machine_park(cpu); return 0; diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index b4af8c743b73..522414089c0d 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -412,6 +412,9 @@ int tick_cpu_dying(unsigned int dying_cpu) tick_cancel_sched_timer(dying_cpu); + /* Remove CPU from timer broadcasting */ + tick_offline_cpu(dying_cpu); + return 0; } diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index a3243c4ac45f..5f2105e637bd 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -142,8 +142,10 @@ static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_ #endif /* !(BROADCAST && ONESHOT) */ #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_HOTPLUG_CPU) +extern void tick_offline_cpu(unsigned int cpu); extern void tick_broadcast_offline(unsigned int cpu); #else +static inline void tick_offline_cpu(unsigned int cpu) { } static inline void tick_broadcast_offline(unsigned int cpu) { } #endif -- cgit v1.2.3-70-g09d2 From 3f69d04e146c6e14ccdd4e7b37d93f789229202a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 25 Feb 2024 23:55:06 +0100 Subject: tick: Shut down low-res tick from dying CPU The timekeeping duty is handed over from the outgoing CPU within stop machine. This works well if CONFIG_NO_HZ_COMMON=n or the tick is in high-res mode. However in low-res dynticks mode, the tick isn't cancelled until the clockevent is shut down, which can happen later. The tick may therefore fire again once IRQs are re-enabled on stop machine and until IRQs are disabled for good upon the last call to idle. That's so many opportunities for a timekeeper to go idle and the outgoing CPU to take over that duty. This is why tick_nohz_idle_stop_tick() is called one last time on idle if the CPU is seen offline: so that the timekeeping duty is handed over again in case the CPU has re-taken the duty. This means there are two timekeeping handovers on CPU down hotplug with different undocumented constraints and purposes: 1) A handover on stop machine for !dynticks || highres. All online CPUs are guaranteed to be non-idle and the timekeeping duty can be safely handed-over. The hrtimer tick is cancelled so it is guaranteed that in dynticks mode the outgoing CPU won't take again the duty. 2) A handover on last idle call for dynticks && lowres. Setting the duty to TICK_DO_TIMER_NONE makes sure that a CPU will take over the timekeeping. Prepare for consolidating the handover to a single place (the first one) with shutting down the low-res tick as well from tick_cancel_sched_timer() as well. This will simplify the handover and unify the tick cancellation between high-res and low-res. Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240225225508.11587-15-frederic@kernel.org --- kernel/time/tick-common.c | 3 ++- kernel/time/tick-sched.c | 32 +++++++++++++++++++++++++------- kernel/time/tick-sched.h | 4 ++-- 3 files changed, 29 insertions(+), 10 deletions(-) (limited to 'kernel/time/tick-common.c') diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 522414089c0d..9cd09eea06d6 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -410,7 +410,8 @@ int tick_cpu_dying(unsigned int dying_cpu) if (tick_do_timer_cpu == dying_cpu) tick_do_timer_cpu = cpumask_first(cpu_online_mask); - tick_cancel_sched_timer(dying_cpu); + /* Make sure the CPU won't try to retake the timekeeping duty */ + tick_sched_timer_dying(dying_cpu); /* Remove CPU from timer broadcasting */ tick_offline_cpu(dying_cpu); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index dcb9f0394182..89d16b8ea2c4 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -308,6 +308,14 @@ static enum hrtimer_restart tick_nohz_handler(struct hrtimer *timer) return HRTIMER_RESTART; } +static void tick_sched_timer_cancel(struct tick_sched *ts) +{ + if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES)) + hrtimer_cancel(&ts->sched_timer); + else if (tick_sched_flag_test(ts, TS_FLAG_NOHZ)) + tick_program_event(KTIME_MAX, 1); +} + #ifdef CONFIG_NO_HZ_FULL cpumask_var_t tick_nohz_full_mask; EXPORT_SYMBOL_GPL(tick_nohz_full_mask); @@ -1040,10 +1048,7 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) * the tick timer. */ if (unlikely(expires == KTIME_MAX)) { - if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES)) - hrtimer_cancel(&ts->sched_timer); - else - tick_program_event(KTIME_MAX, 1); + tick_sched_timer_cancel(ts); return; } @@ -1598,14 +1603,27 @@ void tick_setup_sched_timer(bool hrtimer) tick_nohz_activate(ts); } -void tick_cancel_sched_timer(int cpu) +/* + * Shut down the tick and make sure the CPU won't try to retake the timekeeping + * duty before disabling IRQs in idle for the last time. + */ +void tick_sched_timer_dying(int cpu) { + struct tick_device *td = &per_cpu(tick_cpu_device, cpu); struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + struct clock_event_device *dev = td->evtdev; ktime_t idle_sleeptime, iowait_sleeptime; unsigned long idle_calls, idle_sleeps; - if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES)) - hrtimer_cancel(&ts->sched_timer); + /* This must happen before hrtimers are migrated! */ + tick_sched_timer_cancel(ts); + + /* + * If the clockevents doesn't support CLOCK_EVT_STATE_ONESHOT_STOPPED, + * make sure not to call low-res tick handler. + */ + if (tick_sched_flag_test(ts, TS_FLAG_NOHZ)) + dev->event_handler = clockevents_handle_noop; idle_sleeptime = ts->idle_sleeptime; iowait_sleeptime = ts->iowait_sleeptime; diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index bbe72a078985..58d8d1c49dd3 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -106,9 +106,9 @@ extern struct tick_sched *tick_get_tick_sched(int cpu); extern void tick_setup_sched_timer(bool hrtimer); #if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS -extern void tick_cancel_sched_timer(int cpu); +extern void tick_sched_timer_dying(int cpu); #else -static inline void tick_cancel_sched_timer(int cpu) { } +static inline void tick_sched_timer_dying(int cpu) { } #endif #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST -- cgit v1.2.3-70-g09d2 From 500f8f9bced86f0c0f2482773bd64a1b7ec9c4e1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 25 Feb 2024 23:55:07 +0100 Subject: tick: Assume timekeeping is correctly handed over upon last offline idle call The timekeeping duty is handed over from the outgoing CPU on stop machine, then the oneshot tick is stopped right after. Therefore it's guaranteed that the current CPU isn't the timekeeper upon its last call to idle. Besides, calling tick_nohz_idle_stop_tick() while the dying CPU goes into idle suggests that the tick is going to be stopped while it is actually stopped already from the appropriate CPU hotplug state. Remove the confusing call and the obsolete case handling and convert it to a sanity check that verifies the above assumption. Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240225225508.11587-16-frederic@kernel.org --- include/linux/tick.h | 2 ++ kernel/cpu.c | 1 + kernel/sched/idle.c | 1 - kernel/time/tick-common.c | 4 ++++ kernel/time/tick-sched.c | 13 +------------ 5 files changed, 8 insertions(+), 13 deletions(-) (limited to 'kernel/time/tick-common.c') diff --git a/include/linux/tick.h b/include/linux/tick.h index c7840ae8ebaf..44fddfa93e18 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -29,8 +29,10 @@ static inline void tick_cleanup_dead_cpu(int cpu) { } #if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_HOTPLUG_CPU) extern int tick_cpu_dying(unsigned int cpu); +extern void tick_assert_timekeeping_handover(void); #else #define tick_cpu_dying NULL +static inline void tick_assert_timekeeping_handover(void) { } #endif #if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_SUSPEND) diff --git a/kernel/cpu.c b/kernel/cpu.c index 5a8ad4f5ccf3..7e84a7b0675e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1399,6 +1399,7 @@ void cpuhp_report_idle_dead(void) struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); BUG_ON(st->state != CPUHP_AP_OFFLINE); + tick_assert_timekeeping_handover(); rcutree_report_cpu_dead(); st->state = CPUHP_AP_IDLE_DEAD; /* diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 31231925f1ec..b15d40cad7ea 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -291,7 +291,6 @@ static void do_idle(void) local_irq_disable(); if (cpu_is_offline(cpu)) { - tick_nohz_idle_stop_tick(); cpuhp_report_idle_dead(); arch_cpu_idle_dead(); } diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 9cd09eea06d6..fb0fdec8719a 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -396,6 +396,10 @@ int tick_broadcast_oneshot_control(enum tick_broadcast_state state) EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control); #ifdef CONFIG_HOTPLUG_CPU +void tick_assert_timekeeping_handover(void) +{ + WARN_ON_ONCE(tick_do_timer_cpu == smp_processor_id()); +} /* * Stop the tick and transfer the timekeeping job away from a dying cpu. */ diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 89d16b8ea2c4..269e21590df5 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1160,18 +1160,7 @@ static bool report_idle_softirq(void) static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) { - /* - * If this CPU is offline and it is the one which updates - * jiffies, then give up the assignment and let it be taken by - * the CPU which runs the tick timer next. If we don't drop - * this here, the jiffies might be stale and do_timer() never - * gets invoked. - */ - if (unlikely(!cpu_online(cpu))) { - if (cpu == tick_do_timer_cpu) - tick_do_timer_cpu = TICK_DO_TIMER_NONE; - return false; - } + WARN_ON_ONCE(cpu_is_offline(cpu)); if (unlikely(!tick_sched_flag_test(ts, TS_FLAG_NOHZ))) return false; -- cgit v1.2.3-70-g09d2