diff options
-rw-r--r-- | arch/Kconfig | 15 | ||||
-rw-r--r-- | include/linux/cpuhotplug.h | 12 | ||||
-rw-r--r-- | kernel/cpu.c | 193 | ||||
-rw-r--r-- | kernel/smpboot.c | 2 |
4 files changed, 221 insertions, 1 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 205fd23e0cad..f55c5fcbea38 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -34,6 +34,21 @@ config ARCH_HAS_SUBPAGE_FAULTS config HOTPLUG_SMT bool +# Selected by HOTPLUG_CORE_SYNC_DEAD or HOTPLUG_CORE_SYNC_FULL +config HOTPLUG_CORE_SYNC + bool + +# Basic CPU dead synchronization selected by architecture +config HOTPLUG_CORE_SYNC_DEAD + bool + select HOTPLUG_CORE_SYNC + +# Full CPU synchronization with alive state selected by architecture +config HOTPLUG_CORE_SYNC_FULL + bool + select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU + select HOTPLUG_CORE_SYNC + config GENERIC_ENTRY bool diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 0f1001dca0e0..5def71f81ec5 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -517,4 +517,16 @@ void cpuhp_online_idle(enum cpuhp_state state); static inline void cpuhp_online_idle(enum cpuhp_state state) { } #endif +void cpuhp_ap_sync_alive(void); +void arch_cpuhp_sync_state_poll(void); +void arch_cpuhp_cleanup_kick_cpu(unsigned int cpu); + +#ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD +void cpuhp_ap_report_dead(void); +void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu); +#else +static inline void cpuhp_ap_report_dead(void) { } +static inline void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { } +#endif + #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index df8f137f0271..64b624291316 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -17,6 +17,7 @@ #include <linux/cpu.h> #include <linux/oom.h> #include <linux/rcupdate.h> +#include <linux/delay.h> #include <linux/export.h> #include <linux/bug.h> #include <linux/kthread.h> @@ -59,6 +60,7 @@ * @last: For multi-instance rollback, remember how far we got * @cb_state: The state for a single callback (install/uninstall) * @result: Result of the operation + * @ap_sync_state: State for AP synchronization * @done_up: Signal completion to the issuer of the task for cpu-up * @done_down: Signal completion to the issuer of the task for cpu-down */ @@ -76,6 +78,7 @@ struct cpuhp_cpu_state { struct hlist_node *last; enum cpuhp_state cb_state; int result; + atomic_t ap_sync_state; struct completion done_up; struct completion done_down; #endif @@ -276,6 +279,182 @@ static bool cpuhp_is_atomic_state(enum cpuhp_state state) return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE; } +/* Synchronization state management */ +enum cpuhp_sync_state { + SYNC_STATE_DEAD, + SYNC_STATE_KICKED, + SYNC_STATE_SHOULD_DIE, + SYNC_STATE_ALIVE, + SYNC_STATE_SHOULD_ONLINE, + SYNC_STATE_ONLINE, +}; + +#ifdef CONFIG_HOTPLUG_CORE_SYNC +/** + * cpuhp_ap_update_sync_state - Update synchronization state during bringup/teardown + * @state: The synchronization state to set + * + * No synchronization point. Just update of the synchronization state, but implies + * a full barrier so that the AP changes are visible before the control CPU proceeds. + */ +static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state) +{ + atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state); + + (void)atomic_xchg(st, state); +} + +void __weak arch_cpuhp_sync_state_poll(void) { cpu_relax(); } + +static bool cpuhp_wait_for_sync_state(unsigned int cpu, enum cpuhp_sync_state state, + enum cpuhp_sync_state next_state) +{ + atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu); + ktime_t now, end, start = ktime_get(); + int sync; + + end = start + 10ULL * NSEC_PER_SEC; + + sync = atomic_read(st); + while (1) { + if (sync == state) { + if (!atomic_try_cmpxchg(st, &sync, next_state)) + continue; + return true; + } + + now = ktime_get(); + if (now > end) { + /* Timeout. Leave the state unchanged */ + return false; + } else if (now - start < NSEC_PER_MSEC) { + /* Poll for one millisecond */ + arch_cpuhp_sync_state_poll(); + } else { + usleep_range_state(USEC_PER_MSEC, 2 * USEC_PER_MSEC, TASK_UNINTERRUPTIBLE); + } + sync = atomic_read(st); + } + return true; +} +#else /* CONFIG_HOTPLUG_CORE_SYNC */ +static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state) { } +#endif /* !CONFIG_HOTPLUG_CORE_SYNC */ + +#ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD +/** + * cpuhp_ap_report_dead - Update synchronization state to DEAD + * + * No synchronization point. Just update of the synchronization state. + */ +void cpuhp_ap_report_dead(void) +{ + cpuhp_ap_update_sync_state(SYNC_STATE_DEAD); +} + +void __weak arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { } + +/* + * Late CPU shutdown synchronization point. Cannot use cpuhp_state::done_down + * because the AP cannot issue complete() at this stage. + */ +static void cpuhp_bp_sync_dead(unsigned int cpu) +{ + atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu); + int sync = atomic_read(st); + + do { + /* CPU can have reported dead already. Don't overwrite that! */ + if (sync == SYNC_STATE_DEAD) + break; + } while (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_SHOULD_DIE)); + + if (cpuhp_wait_for_sync_state(cpu, SYNC_STATE_DEAD, SYNC_STATE_DEAD)) { + /* CPU reached dead state. Invoke the cleanup function */ + arch_cpuhp_cleanup_dead_cpu(cpu); + return; + } + + /* No further action possible. Emit message and give up. */ + pr_err("CPU%u failed to report dead state\n", cpu); +} +#else /* CONFIG_HOTPLUG_CORE_SYNC_DEAD */ +static inline void cpuhp_bp_sync_dead(unsigned int cpu) { } +#endif /* !CONFIG_HOTPLUG_CORE_SYNC_DEAD */ + +#ifdef CONFIG_HOTPLUG_CORE_SYNC_FULL +/** + * cpuhp_ap_sync_alive - Synchronize AP with the control CPU once it is alive + * + * Updates the AP synchronization state to SYNC_STATE_ALIVE and waits + * for the BP to release it. + */ +void cpuhp_ap_sync_alive(void) +{ + atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state); + + cpuhp_ap_update_sync_state(SYNC_STATE_ALIVE); + + /* Wait for the control CPU to release it. */ + while (atomic_read(st) != SYNC_STATE_SHOULD_ONLINE) + cpu_relax(); +} + +static bool cpuhp_can_boot_ap(unsigned int cpu) +{ + atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu); + int sync = atomic_read(st); + +again: + switch (sync) { + case SYNC_STATE_DEAD: + /* CPU is properly dead */ + break; + case SYNC_STATE_KICKED: + /* CPU did not come up in previous attempt */ + break; + case SYNC_STATE_ALIVE: + /* CPU is stuck cpuhp_ap_sync_alive(). */ + break; + default: + /* CPU failed to report online or dead and is in limbo state. */ + return false; + } + + /* Prepare for booting */ + if (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_KICKED)) + goto again; + + return true; +} + +void __weak arch_cpuhp_cleanup_kick_cpu(unsigned int cpu) { } + +/* + * Early CPU bringup synchronization point. Cannot use cpuhp_state::done_up + * because the AP cannot issue complete() so early in the bringup. + */ +static int cpuhp_bp_sync_alive(unsigned int cpu) +{ + int ret = 0; + + if (!IS_ENABLED(CONFIG_HOTPLUG_CORE_SYNC_FULL)) + return 0; + + if (!cpuhp_wait_for_sync_state(cpu, SYNC_STATE_ALIVE, SYNC_STATE_SHOULD_ONLINE)) { + pr_err("CPU%u failed to report alive state\n", cpu); + ret = -EIO; + } + + /* Let the architecture cleanup the kick alive mechanics. */ + arch_cpuhp_cleanup_kick_cpu(cpu); + return ret; +} +#else /* CONFIG_HOTPLUG_CORE_SYNC_FULL */ +static inline int cpuhp_bp_sync_alive(unsigned int cpu) { return 0; } +static inline bool cpuhp_can_boot_ap(unsigned int cpu) { return true; } +#endif /* !CONFIG_HOTPLUG_CORE_SYNC_FULL */ + /* Serializes the updates to cpu_online_mask, cpu_present_mask */ static DEFINE_MUTEX(cpu_add_remove_lock); bool cpuhp_tasks_frozen; @@ -588,6 +767,9 @@ static int bringup_cpu(unsigned int cpu) struct task_struct *idle = idle_thread_get(cpu); int ret; + if (!cpuhp_can_boot_ap(cpu)) + return -EAGAIN; + /* * Reset stale stack state from the last time this CPU was online. */ @@ -610,6 +792,10 @@ static int bringup_cpu(unsigned int cpu) if (ret) goto out_unlock; + ret = cpuhp_bp_sync_alive(cpu); + if (ret) + goto out_unlock; + ret = bringup_wait_for_ap_online(cpu); if (ret) goto out_unlock; @@ -1113,6 +1299,8 @@ static int takedown_cpu(unsigned int cpu) /* This actually kills the CPU. */ __cpu_die(cpu); + cpuhp_bp_sync_dead(cpu); + tick_cleanup_dead_cpu(cpu); rcutree_migrate_callbacks(cpu); return 0; @@ -1359,8 +1547,10 @@ void cpuhp_online_idle(enum cpuhp_state state) if (state != CPUHP_AP_ONLINE_IDLE) return; + cpuhp_ap_update_sync_state(SYNC_STATE_ONLINE); + /* - * Unpart the stopper thread before we start the idle loop (and start + * Unpark the stopper thread before we start the idle loop (and start * scheduling); this ensures the stopper task is always available. */ stop_machine_unpark(smp_processor_id()); @@ -2737,6 +2927,7 @@ void __init boot_cpu_hotplug_init(void) { #ifdef CONFIG_SMP cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask); + atomic_set(this_cpu_ptr(&cpuhp_state.ap_sync_state), SYNC_STATE_ONLINE); #endif this_cpu_write(cpuhp_state.state, CPUHP_ONLINE); this_cpu_write(cpuhp_state.target, CPUHP_ONLINE); diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 2c7396da470c..3dcfd3f04ed0 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -326,6 +326,7 @@ void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread) } EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread); +#ifndef CONFIG_HOTPLUG_CORE_SYNC static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD); /* @@ -488,3 +489,4 @@ bool cpu_report_death(void) } #endif /* #ifdef CONFIG_HOTPLUG_CPU */ +#endif /* !CONFIG_HOTPLUG_CORE_SYNC */ |