From a75a6068dac25d4022ebcd82192ed6345407843c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 10 Sep 2015 15:07:50 +0200 Subject: cpu/hotplug: Read_lock(tasklist_lock) doesn't need to disable irqs check_for_tasks() doesn't need to disable irqs, recursive read_lock() from interrupt is fine. While at it, s/do_each_thread/for_each_process_thread/. Signed-off-by: Oleg Nesterov Reviewed-by: Kirill Tkhai Reviewed-by: Srikar Dronamraju Cc: Kirill Tkhai Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150910130750.GA20055@redhat.com Signed-off-by: Ingo Molnar --- kernel/cpu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index 82cf9dff4295..050c63472f03 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -304,8 +304,8 @@ static inline void check_for_tasks(int dead_cpu) { struct task_struct *g, *p; - read_lock_irq(&tasklist_lock); - do_each_thread(g, p) { + read_lock(&tasklist_lock); + for_each_process_thread(g, p) { if (!p->on_rq) continue; /* @@ -320,8 +320,8 @@ static inline void check_for_tasks(int dead_cpu) pr_warn("Task %s (pid=%d) is on cpu %d (state=%ld, flags=%x)\n", p->comm, task_pid_nr(p), dead_cpu, p->state, p->flags); - } while_each_thread(g, p); - read_unlock_irq(&tasklist_lock); + } + read_unlock(&tasklist_lock); } struct take_cpu_down_param { -- cgit v1.2.3-70-g09d2 From 233e7f267e580fefdeb36628b7efe8bfe056d27c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 8 Oct 2015 16:51:31 +0200 Subject: stop_machine: Ensure that a queued callback will be called before cpu_stop_park() cpu_stop_queue_work() checks stopper->enabled before it queues the work, but ->enabled == T can only guarantee cpu_stop_signal_done() if we race with cpu_down(). This is not enough for stop_two_cpus() or stop_machine(), they will deadlock if multi_cpu_stop() won't be called by one of the target CPU's. stop_machine/stop_cpus are fine, they rely on stop_cpus_mutex. But stop_two_cpus() has to check cpu_active() to avoid the same race with hotplug, and this check is very unobvious and probably not even correct if we race with cpu_up(). Change cpu_down() pass to clear ->enabled before cpu_stopper_thread() flushes the pending ->works and returns with KTHREAD_SHOULD_PARK set. Note also that smpboot_thread_call() calls cpu_stop_unpark() which sets enabled == T at CPU_ONLINE stage, so this CPU can't go away until cpu_stopper_thread() is called at least once. This all means that if cpu_stop_queue_work() succeeds, we know that work->fn() will be called. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Tejun Heo Cc: Thomas Gleixner Cc: heiko.carstens@de.ibm.com Link: http://lkml.kernel.org/r/20151008145131.GA18139@redhat.com Signed-off-by: Ingo Molnar --- include/linux/stop_machine.h | 1 + kernel/cpu.c | 2 +- kernel/stop_machine.c | 23 +++++++++++++---------- 3 files changed, 15 insertions(+), 11 deletions(-) (limited to 'kernel/cpu.c') diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 414d924318ce..7b76362b381c 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -33,6 +33,7 @@ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, struct cpu_stop_work *work_buf); int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg); int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg); +void stop_machine_park(int cpu); #else /* CONFIG_SMP */ diff --git a/kernel/cpu.c b/kernel/cpu.c index 050c63472f03..c85df2775b73 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -344,7 +344,7 @@ static int take_cpu_down(void *_param) /* Give up timekeeping duties */ tick_handover_do_timer(); /* Park the stopper thread */ - kthread_park(current); + stop_machine_park((long)param->hcpu); return 0; } diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 12484e5d5c88..6a402098d4ab 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -452,6 +452,18 @@ repeat: } } +void stop_machine_park(int cpu) +{ + struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); + /* + * Lockless. cpu_stopper_thread() will take stopper->lock and flush + * the pending works before it parks, until then it is fine to queue + * the new works. + */ + stopper->enabled = false; + kthread_park(stopper->thread); +} + extern void sched_set_stop_task(int cpu, struct task_struct *stop); static void cpu_stop_create(unsigned int cpu) @@ -462,17 +474,8 @@ static void cpu_stop_create(unsigned int cpu) static void cpu_stop_park(unsigned int cpu) { struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); - struct cpu_stop_work *work, *tmp; - unsigned long flags; - /* drain remaining works */ - spin_lock_irqsave(&stopper->lock, flags); - list_for_each_entry_safe(work, tmp, &stopper->works, list) { - list_del_init(&work->list); - cpu_stop_signal_done(work->done, false); - } - stopper->enabled = false; - spin_unlock_irqrestore(&stopper->lock, flags); + WARN_ON(!list_empty(&stopper->works)); } static void cpu_stop_unpark(unsigned int cpu) -- cgit v1.2.3-70-g09d2 From c00166d87e730088d919814020e96ffed129d0d1 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 9 Oct 2015 18:00:49 +0200 Subject: stop_machine: Kill smp_hotplug_thread->pre_unpark, introduce stop_machine_unpark() 1. Change smpboot_unpark_thread() to check ->selfparking, just like smpboot_park_thread() does. 2. Introduce stop_machine_unpark() which sets ->enabled and calls kthread_unpark(). 3. Change smpboot_thread_call() and cpu_stop_init() to call stop_machine_unpark() by hand. This way: - IMO the ->selfparking logic becomes more consistent. - We can kill the smp_hotplug_thread->pre_unpark() method. - We can easily unpark the stopper thread earlier. Say, we can move stop_machine_unpark() from smpboot_thread_call() to sched_cpu_active() as Peter suggests. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Tejun Heo Cc: Thomas Gleixner Cc: heiko.carstens@de.ibm.com Link: http://lkml.kernel.org/r/20151009160049.GA10166@redhat.com Signed-off-by: Ingo Molnar --- include/linux/smpboot.h | 4 ---- include/linux/stop_machine.h | 1 + kernel/cpu.c | 1 + kernel/smpboot.c | 5 ++--- kernel/stop_machine.c | 10 +++++++++- 5 files changed, 13 insertions(+), 8 deletions(-) (limited to 'kernel/cpu.c') diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h index e6109a6cd8f6..12910cf19869 100644 --- a/include/linux/smpboot.h +++ b/include/linux/smpboot.h @@ -24,9 +24,6 @@ struct smpboot_thread_data; * parked (cpu offline) * @unpark: Optional unpark function, called when the thread is * unparked (cpu online) - * @pre_unpark: Optional unpark function, called before the thread is - * unparked (cpu online). This is not guaranteed to be - * called on the target cpu of the thread. Careful! * @cpumask: Internal state. To update which threads are unparked, * call smpboot_update_cpumask_percpu_thread(). * @selfparking: Thread is not parked by the park function. @@ -42,7 +39,6 @@ struct smp_hotplug_thread { void (*cleanup)(unsigned int cpu, bool online); void (*park)(unsigned int cpu); void (*unpark)(unsigned int cpu); - void (*pre_unpark)(unsigned int cpu); cpumask_var_t cpumask; bool selfparking; const char *thread_comm; diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 7b76362b381c..0adedca24c5b 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -34,6 +34,7 @@ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg); int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg); void stop_machine_park(int cpu); +void stop_machine_unpark(int cpu); #else /* CONFIG_SMP */ diff --git a/kernel/cpu.c b/kernel/cpu.c index c85df2775b73..6467521e1e15 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -475,6 +475,7 @@ static int smpboot_thread_call(struct notifier_block *nfb, case CPU_DOWN_FAILED: case CPU_ONLINE: + stop_machine_unpark(cpu); smpboot_unpark_threads(cpu); break; diff --git a/kernel/smpboot.c b/kernel/smpboot.c index a818cbc73e14..d264f59bff56 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -222,9 +222,8 @@ static void smpboot_unpark_thread(struct smp_hotplug_thread *ht, unsigned int cp { struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); - if (ht->pre_unpark) - ht->pre_unpark(cpu); - kthread_unpark(tsk); + if (!ht->selfparking) + kthread_unpark(tsk); } void smpboot_unpark_threads(unsigned int cpu) diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 91fbb109de6c..59096a55089f 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -513,6 +513,14 @@ static void cpu_stop_unpark(unsigned int cpu) spin_unlock_irq(&stopper->lock); } +void stop_machine_unpark(int cpu) +{ + struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); + + cpu_stop_unpark(cpu); + kthread_unpark(stopper->thread); +} + static struct smp_hotplug_thread cpu_stop_threads = { .store = &cpu_stopper.thread, .thread_should_run = cpu_stop_should_run, @@ -521,7 +529,6 @@ static struct smp_hotplug_thread cpu_stop_threads = { .create = cpu_stop_create, .setup = cpu_stop_unpark, .park = cpu_stop_park, - .pre_unpark = cpu_stop_unpark, .selfparking = true, }; @@ -537,6 +544,7 @@ static int __init cpu_stop_init(void) } BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads)); + stop_machine_unpark(raw_smp_processor_id()); stop_machine_initialized = true; return 0; } -- cgit v1.2.3-70-g09d2 From 07f06cb3b5f6bd21374a48dbefdb431d71d53974 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 9 Oct 2015 18:00:54 +0200 Subject: sched: Start stopper early Ensure the stopper thread is active 'early', because the load balancer pretty much assumes that its available. And when 'online && active' the load-balancer is fully available. Not only the numa balancing stop_two_cpus() caller relies on it, but also the self migration stuff does, and at CPU_ONLINE time the cpu really is 'free' to run anything. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Tejun Heo Cc: Thomas Gleixner Cc: heiko.carstens@de.ibm.com Link: http://lkml.kernel.org/r/20151009160054.GA10176@redhat.com Signed-off-by: Ingo Molnar --- kernel/cpu.c | 1 - kernel/sched/core.c | 12 +++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index 6467521e1e15..c85df2775b73 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -475,7 +475,6 @@ static int smpboot_thread_call(struct notifier_block *nfb, case CPU_DOWN_FAILED: case CPU_ONLINE: - stop_machine_unpark(cpu); smpboot_unpark_threads(cpu); break; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f45a7c70f264..7ee8caea1195 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5545,21 +5545,27 @@ static void set_cpu_rq_start_time(void) static int sched_cpu_active(struct notifier_block *nfb, unsigned long action, void *hcpu) { + int cpu = (long)hcpu; + switch (action & ~CPU_TASKS_FROZEN) { case CPU_STARTING: set_cpu_rq_start_time(); return NOTIFY_OK; + case CPU_ONLINE: /* * At this point a starting CPU has marked itself as online via * set_cpu_online(). But it might not yet have marked itself * as active, which is essential from here on. - * - * Thus, fall-through and help the starting CPU along. */ + set_cpu_active(cpu, true); + stop_machine_unpark(cpu); + return NOTIFY_OK; + case CPU_DOWN_FAILED: - set_cpu_active((long)hcpu, true); + set_cpu_active(cpu, true); return NOTIFY_OK; + default: return NOTIFY_DONE; } -- cgit v1.2.3-70-g09d2