diff options
author | Ingo Molnar <mingo@kernel.org> | 2014-07-05 11:06:10 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2014-07-05 11:06:10 +0200 |
commit | 51da9830d7a58c8f77127c622ee57d453c88af09 (patch) | |
tree | 85bd2caae0344f77f0afd5f9617a45855000b821 /kernel | |
parent | 5d5e2b1bcbdc996e72815c03fdc5ea82c4642397 (diff) | |
parent | d490b3e2c23369c6adfa183d18d9a24ced247797 (diff) |
Merge branch 'timers/nohz' into sched/core
Merge these two, because upcoming patches will touch both areas.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/irq_work.c | 76 | ||||
-rw-r--r-- | kernel/sched/core.c | 22 | ||||
-rw-r--r-- | kernel/sched/sched.h | 12 | ||||
-rw-r--r-- | kernel/smp.c | 9 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 10 |
5 files changed, 84 insertions, 45 deletions
diff --git a/kernel/irq_work.c b/kernel/irq_work.c index a82170e2fa78..4b0a890a304a 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -16,11 +16,12 @@ #include <linux/tick.h> #include <linux/cpu.h> #include <linux/notifier.h> +#include <linux/smp.h> #include <asm/processor.h> -static DEFINE_PER_CPU(struct llist_head, irq_work_list); -static DEFINE_PER_CPU(int, irq_work_raised); +static DEFINE_PER_CPU(struct llist_head, raised_list); +static DEFINE_PER_CPU(struct llist_head, lazy_list); /* * Claim the entry so that no one else will poke at it. @@ -55,12 +56,34 @@ void __weak arch_irq_work_raise(void) */ } +#ifdef CONFIG_SMP /* - * Enqueue the irq_work @entry unless it's already pending + * Enqueue the irq_work @work on @cpu unless it's already pending * somewhere. * * Can be re-enqueued while the callback is still in progress. */ +bool irq_work_queue_on(struct irq_work *work, int cpu) +{ + /* All work should have been flushed before going offline */ + WARN_ON_ONCE(cpu_is_offline(cpu)); + + /* Arch remote IPI send/receive backend aren't NMI safe */ + WARN_ON_ONCE(in_nmi()); + + /* Only queue if not already pending */ + if (!irq_work_claim(work)) + return false; + + if (llist_add(&work->llnode, &per_cpu(raised_list, cpu))) + arch_send_call_function_single_ipi(cpu); + + return true; +} +EXPORT_SYMBOL_GPL(irq_work_queue_on); +#endif + +/* Enqueue the irq work @work on the current CPU */ bool irq_work_queue(struct irq_work *work) { /* Only queue if not already pending */ @@ -70,15 +93,13 @@ bool irq_work_queue(struct irq_work *work) /* Queue the entry and raise the IPI if needed. */ preempt_disable(); - llist_add(&work->llnode, &__get_cpu_var(irq_work_list)); - - /* - * If the work is not "lazy" or the tick is stopped, raise the irq - * work interrupt (if supported by the arch), otherwise, just wait - * for the next tick. - */ - if (!(work->flags & IRQ_WORK_LAZY) || tick_nohz_tick_stopped()) { - if (!this_cpu_cmpxchg(irq_work_raised, 0, 1)) + /* If the work is "lazy", handle it from next tick if any */ + if (work->flags & IRQ_WORK_LAZY) { + if (llist_add(&work->llnode, &__get_cpu_var(lazy_list)) && + tick_nohz_tick_stopped()) + arch_irq_work_raise(); + } else { + if (llist_add(&work->llnode, &__get_cpu_var(raised_list))) arch_irq_work_raise(); } @@ -90,10 +111,11 @@ EXPORT_SYMBOL_GPL(irq_work_queue); bool irq_work_needs_cpu(void) { - struct llist_head *this_list; + struct llist_head *raised, *lazy; - this_list = &__get_cpu_var(irq_work_list); - if (llist_empty(this_list)) + raised = &__get_cpu_var(raised_list); + lazy = &__get_cpu_var(lazy_list); + if (llist_empty(raised) && llist_empty(lazy)) return false; /* All work should have been flushed before going offline */ @@ -102,28 +124,18 @@ bool irq_work_needs_cpu(void) return true; } -static void __irq_work_run(void) +static void irq_work_run_list(struct llist_head *list) { unsigned long flags; struct irq_work *work; - struct llist_head *this_list; struct llist_node *llnode; + BUG_ON(!irqs_disabled()); - /* - * Reset the "raised" state right before we check the list because - * an NMI may enqueue after we find the list empty from the runner. - */ - __this_cpu_write(irq_work_raised, 0); - barrier(); - - this_list = &__get_cpu_var(irq_work_list); - if (llist_empty(this_list)) + if (llist_empty(list)) return; - BUG_ON(!irqs_disabled()); - - llnode = llist_del_all(this_list); + llnode = llist_del_all(list); while (llnode != NULL) { work = llist_entry(llnode, struct irq_work, llnode); @@ -148,6 +160,12 @@ static void __irq_work_run(void) } } +static void __irq_work_run(void) +{ + irq_work_run_list(&__get_cpu_var(raised_list)); + irq_work_run_list(&__get_cpu_var(lazy_list)); +} + /* * Run the irq_work entries on this cpu. Requires to be ran from hardirq * context with local IRQs disabled. diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3bdf01b494fe..7f3063c153d8 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -684,10 +684,16 @@ static void wake_up_idle_cpu(int cpu) static bool wake_up_full_nohz_cpu(int cpu) { + /* + * We just need the target to call irq_exit() and re-evaluate + * the next tick. The nohz full kick at least implies that. + * If needed we can still optimize that later with an + * empty IRQ. + */ if (tick_nohz_full_cpu(cpu)) { if (cpu != smp_processor_id() || tick_nohz_tick_stopped()) - smp_send_reschedule(cpu); + tick_nohz_full_kick_cpu(cpu); return true; } @@ -734,10 +740,11 @@ bool sched_can_stop_tick(void) rq = this_rq(); - /* Make sure rq->nr_running update is visible after the IPI */ - smp_rmb(); - - /* More than one running task need preemption */ + /* + * More than one running task need preemption. + * nr_running update is assumed to be visible + * after IPI is sent from wakers. + */ if (rq->nr_running > 1) return false; @@ -1568,9 +1575,7 @@ void scheduler_ipi(void) */ preempt_fold_need_resched(); - if (llist_empty(&this_rq()->wake_list) - && !tick_nohz_full_cpu(smp_processor_id()) - && !got_nohz_idle_kick()) + if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()) return; /* @@ -1587,7 +1592,6 @@ void scheduler_ipi(void) * somewhat pessimize the simple resched case. */ irq_enter(); - tick_nohz_full_check(); sched_ttwu_pending(); /* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 31cc02ebc54e..eb8567610295 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1221,9 +1221,15 @@ static inline void add_nr_running(struct rq *rq, unsigned count) #ifdef CONFIG_NO_HZ_FULL if (prev_nr < 2 && rq->nr_running >= 2) { if (tick_nohz_full_cpu(rq->cpu)) { - /* Order rq->nr_running write against the IPI */ - smp_wmb(); - smp_send_reschedule(rq->cpu); + /* + * Tick is needed if more than one task runs on a CPU. + * Send the target an IPI to kick it out of nohz mode. + * + * We assume that IPI implies full memory barrier and the + * new value of rq->nr_running is visible on reception + * from the target. + */ + tick_nohz_full_kick_cpu(rq->cpu); } } #endif diff --git a/kernel/smp.c b/kernel/smp.c index 306f8180b0d5..a1812d184aed 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -3,6 +3,7 @@ * * (C) Jens Axboe <jens.axboe@oracle.com> 2008 */ +#include <linux/irq_work.h> #include <linux/rcupdate.h> #include <linux/rculist.h> #include <linux/kernel.h> @@ -210,6 +211,14 @@ void generic_smp_call_function_single_interrupt(void) csd->func(csd->info); csd_unlock(csd); } + + /* + * Handle irq works queued remotely by irq_work_queue_on(). + * Smp functions above are typically synchronous so they + * better run first since some other CPUs may be busy waiting + * for them. + */ + irq_work_run(); } /* diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 6558b7ac112d..3d63944a3eca 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -224,13 +224,15 @@ static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { }; /* - * Kick the current CPU if it's full dynticks in order to force it to + * Kick the CPU if it's full dynticks in order to force it to * re-evaluate its dependency on the tick and restart it if necessary. */ -void tick_nohz_full_kick(void) +void tick_nohz_full_kick_cpu(int cpu) { - if (tick_nohz_full_cpu(smp_processor_id())) - irq_work_queue(&__get_cpu_var(nohz_full_kick_work)); + if (!tick_nohz_full_cpu(cpu)) + return; + + irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu); } static void nohz_full_kick_ipi(void *info) |