summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpu.c1162
-rw-r--r--kernel/events/uprobes.c1
-rw-r--r--kernel/irq/Kconfig4
-rw-r--r--kernel/irq/Makefile1
-rw-r--r--kernel/irq/chip.c4
-rw-r--r--kernel/irq/handle.c6
-rw-r--r--kernel/irq/internals.h7
-rw-r--r--kernel/irq/ipi.c326
-rw-r--r--kernel/irq/irqdesc.c21
-rw-r--r--kernel/irq/irqdomain.c11
-rw-r--r--kernel/irq/manage.c8
-rw-r--r--kernel/irq/proc.c2
-rw-r--r--kernel/irq/spurious.c4
-rw-r--r--kernel/rcu/rcutorture.c14
-rw-r--r--kernel/rcu/tiny_plugin.h15
-rw-r--r--kernel/rcu/tree.c214
-rw-r--r--kernel/rcu/tree.h42
-rw-r--r--kernel/rcu/tree_plugin.h27
-rw-r--r--kernel/rcu/update.c1
-rw-r--r--kernel/sched/core.c10
-rw-r--r--kernel/sched/idle.c9
-rw-r--r--kernel/smp.c1
-rw-r--r--kernel/smpboot.c6
-rw-r--r--kernel/smpboot.h6
-rw-r--r--kernel/time/clocksource.c52
-rw-r--r--kernel/time/jiffies.c2
-rw-r--r--kernel/time/timekeeping.c286
27 files changed, 1890 insertions, 352 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 5b9d39633ce9..6ea42e8da861 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -22,13 +22,88 @@
#include <linux/lockdep.h>
#include <linux/tick.h>
#include <linux/irq.h>
+#include <linux/smpboot.h>
+
#include <trace/events/power.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/cpuhp.h>
#include "smpboot.h"
+/**
+ * cpuhp_cpu_state - Per cpu hotplug state storage
+ * @state: The current cpu state
+ * @target: The target state
+ * @thread: Pointer to the hotplug thread
+ * @should_run: Thread should execute
+ * @cb_stat: The state for a single callback (install/uninstall)
+ * @cb: Single callback function (install/uninstall)
+ * @result: Result of the operation
+ * @done: Signal completion to the issuer of the task
+ */
+struct cpuhp_cpu_state {
+ enum cpuhp_state state;
+ enum cpuhp_state target;
+#ifdef CONFIG_SMP
+ struct task_struct *thread;
+ bool should_run;
+ enum cpuhp_state cb_state;
+ int (*cb)(unsigned int cpu);
+ int result;
+ struct completion done;
+#endif
+};
+
+static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state);
+
+/**
+ * cpuhp_step - Hotplug state machine step
+ * @name: Name of the step
+ * @startup: Startup function of the step
+ * @teardown: Teardown function of the step
+ * @skip_onerr: Do not invoke the functions on error rollback
+ * Will go away once the notifiers are gone
+ * @cant_stop: Bringup/teardown can't be stopped at this step
+ */
+struct cpuhp_step {
+ const char *name;
+ int (*startup)(unsigned int cpu);
+ int (*teardown)(unsigned int cpu);
+ bool skip_onerr;
+ bool cant_stop;
+};
+
+static DEFINE_MUTEX(cpuhp_state_mutex);
+static struct cpuhp_step cpuhp_bp_states[];
+static struct cpuhp_step cpuhp_ap_states[];
+
+/**
+ * cpuhp_invoke_callback _ Invoke the callbacks for a given state
+ * @cpu: The cpu for which the callback should be invoked
+ * @step: The step in the state machine
+ * @cb: The callback function to invoke
+ *
+ * Called from cpu hotplug and from the state register machinery
+ */
+static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state step,
+ int (*cb)(unsigned int))
+{
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+ int ret = 0;
+
+ if (cb) {
+ trace_cpuhp_enter(cpu, st->target, step, cb);
+ ret = cb(cpu);
+ trace_cpuhp_exit(cpu, st->state, step, ret);
+ }
+ return ret;
+}
+
#ifdef CONFIG_SMP
/* Serializes the updates to cpu_online_mask, cpu_present_mask */
static DEFINE_MUTEX(cpu_add_remove_lock);
+bool cpuhp_tasks_frozen;
+EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
/*
* The following two APIs (cpu_maps_update_begin/done) must be used when
@@ -207,31 +282,281 @@ int __register_cpu_notifier(struct notifier_block *nb)
return raw_notifier_chain_register(&cpu_chain, nb);
}
-static int __cpu_notify(unsigned long val, void *v, int nr_to_call,
+static int __cpu_notify(unsigned long val, unsigned int cpu, int nr_to_call,
int *nr_calls)
{
+ unsigned long mod = cpuhp_tasks_frozen ? CPU_TASKS_FROZEN : 0;
+ void *hcpu = (void *)(long)cpu;
+
int ret;
- ret = __raw_notifier_call_chain(&cpu_chain, val, v, nr_to_call,
+ ret = __raw_notifier_call_chain(&cpu_chain, val | mod, hcpu, nr_to_call,
nr_calls);
return notifier_to_errno(ret);
}
-static int cpu_notify(unsigned long val, void *v)
+static int cpu_notify(unsigned long val, unsigned int cpu)
{
- return __cpu_notify(val, v, -1, NULL);
+ return __cpu_notify(val, cpu, -1, NULL);
}
-#ifdef CONFIG_HOTPLUG_CPU
+/* Notifier wrappers for transitioning to state machine */
+static int notify_prepare(unsigned int cpu)
+{
+ int nr_calls = 0;
+ int ret;
+
+ ret = __cpu_notify(CPU_UP_PREPARE, cpu, -1, &nr_calls);
+ if (ret) {
+ nr_calls--;
+ printk(KERN_WARNING "%s: attempt to bring up CPU %u failed\n",
+ __func__, cpu);
+ __cpu_notify(CPU_UP_CANCELED, cpu, nr_calls, NULL);
+ }
+ return ret;
+}
+
+static int notify_online(unsigned int cpu)
+{
+ cpu_notify(CPU_ONLINE, cpu);
+ return 0;
+}
+
+static int notify_starting(unsigned int cpu)
+{
+ cpu_notify(CPU_STARTING, cpu);
+ return 0;
+}
+
+static int bringup_wait_for_ap(unsigned int cpu)
+{
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+
+ wait_for_completion(&st->done);
+ return st->result;
+}
+
+static int bringup_cpu(unsigned int cpu)
+{
+ struct task_struct *idle = idle_thread_get(cpu);
+ int ret;
+
+ /* Arch-specific enabling code. */
+ ret = __cpu_up(cpu, idle);
+ if (ret) {
+ cpu_notify(CPU_UP_CANCELED, cpu);
+ return ret;
+ }
+ ret = bringup_wait_for_ap(cpu);
+ BUG_ON(!cpu_online(cpu));
+ return ret;
+}
+
+/*
+ * Hotplug state machine related functions
+ */
+static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st,
+ struct cpuhp_step *steps)
+{
+ for (st->state++; st->state < st->target; st->state++) {
+ struct cpuhp_step *step = steps + st->state;
+
+ if (!step->skip_onerr)
+ cpuhp_invoke_callback(cpu, st->state, step->startup);
+ }
+}
+
+static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
+ struct cpuhp_step *steps, enum cpuhp_state target)
+{
+ enum cpuhp_state prev_state = st->state;
+ int ret = 0;
+
+ for (; st->state > target; st->state--) {
+ struct cpuhp_step *step = steps + st->state;
+
+ ret = cpuhp_invoke_callback(cpu, st->state, step->teardown);
+ if (ret) {
+ st->target = prev_state;
+ undo_cpu_down(cpu, st, steps);
+ break;
+ }
+ }
+ return ret;
+}
+
+static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st,
+ struct cpuhp_step *steps)
+{
+ for (st->state--; st->state > st->target; st->state--) {
+ struct cpuhp_step *step = steps + st->state;
+
+ if (!step->skip_onerr)
+ cpuhp_invoke_callback(cpu, st->state, step->teardown);
+ }
+}
+
+static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
+ struct cpuhp_step *steps, enum cpuhp_state target)
+{
+ enum cpuhp_state prev_state = st->state;
+ int ret = 0;
+
+ while (st->state < target) {
+ struct cpuhp_step *step;
+
+ st->state++;
+ step = steps + st->state;
+ ret = cpuhp_invoke_callback(cpu, st->state, step->startup);
+ if (ret) {
+ st->target = prev_state;
+ undo_cpu_up(cpu, st, steps);
+ break;
+ }
+ }
+ return ret;
+}
+
+/*
+ * The cpu hotplug threads manage the bringup and teardown of the cpus
+ */
+static void cpuhp_create(unsigned int cpu)
+{
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+
+ init_completion(&st->done);
+}
+
+static int cpuhp_should_run(unsigned int cpu)
+{
+ struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
+
+ return st->should_run;
+}
+
+/* Execute the teardown callbacks. Used to be CPU_DOWN_PREPARE */
+static int cpuhp_ap_offline(unsigned int cpu, struct cpuhp_cpu_state *st)
+{
+ enum cpuhp_state target = max((int)st->target, CPUHP_TEARDOWN_CPU);
+
+ return cpuhp_down_callbacks(cpu, st, cpuhp_ap_states, target);
+}
+
+/* Execute the online startup callbacks. Used to be CPU_ONLINE */
+static int cpuhp_ap_online(unsigned int cpu, struct cpuhp_cpu_state *st)
+{
+ return cpuhp_up_callbacks(cpu, st, cpuhp_ap_states, st->target);
+}
+
+/*
+ * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
+ * callbacks when a state gets [un]installed at runtime.
+ */
+static void cpuhp_thread_fun(unsigned int cpu)
+{
+ struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
+ int ret = 0;
+
+ /*
+ * Paired with the mb() in cpuhp_kick_ap_work and
+ * cpuhp_invoke_ap_callback, so the work set is consistent visible.
+ */
+ smp_mb();
+ if (!st->should_run)
+ return;
+
+ st->should_run = false;
+
+ /* Single callback invocation for [un]install ? */
+ if (st->cb) {
+ if (st->cb_state < CPUHP_AP_ONLINE) {
+ local_irq_disable();
+ ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb);
+ local_irq_enable();
+ } else {
+ ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb);
+ }
+ } else {
+ /* Cannot happen .... */
+ BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
+
+ /* Regular hotplug work */
+ if (st->state < st->target)
+ ret = cpuhp_ap_online(cpu, st);
+ else if (st->state > st->target)
+ ret = cpuhp_ap_offline(cpu, st);
+ }
+ st->result = ret;
+ complete(&st->done);
+}
-static void cpu_notify_nofail(unsigned long val, void *v)
+/* Invoke a single callback on a remote cpu */
+static int cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state,
+ int (*cb)(unsigned int))
{
- BUG_ON(cpu_notify(val, v));
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+
+ if (!cpu_online(cpu))
+ return 0;
+
+ st->cb_state = state;
+ st->cb = cb;
+ /*
+ * Make sure the above stores are visible before should_run becomes
+ * true. Paired with the mb() above in cpuhp_thread_fun()
+ */
+ smp_mb();
+ st->should_run = true;
+ wake_up_process(st->thread);
+ wait_for_completion(&st->done);
+ return st->result;
}
+
+/* Regular hotplug invocation of the AP hotplug thread */
+static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st)
+{
+ st->result = 0;
+ st->cb = NULL;
+ /*
+ * Make sure the above stores are visible before should_run becomes
+ * true. Paired with the mb() above in cpuhp_thread_fun()
+ */
+ smp_mb();
+ st->should_run = true;
+ wake_up_process(st->thread);
+}
+
+static int cpuhp_kick_ap_work(unsigned int cpu)
+{
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+ enum cpuhp_state state = st->state;
+
+ trace_cpuhp_enter(cpu, st->target, state, cpuhp_kick_ap_work);
+ __cpuhp_kick_ap_work(st);
+ wait_for_completion(&st->done);
+ trace_cpuhp_exit(cpu, st->state, state, st->result);
+ return st->result;
+}
+
+static struct smp_hotplug_thread cpuhp_threads = {
+ .store = &cpuhp_state.thread,
+ .create = &cpuhp_create,
+ .thread_should_run = cpuhp_should_run,
+ .thread_fn = cpuhp_thread_fun,
+ .thread_comm = "cpuhp/%u",
+ .selfparking = true,
+};
+
+void __init cpuhp_threads_init(void)
+{
+ BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
+ kthread_unpark(this_cpu_read(cpuhp_state.thread));
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
EXPORT_SYMBOL(register_cpu_notifier);
EXPORT_SYMBOL(__register_cpu_notifier);
-
void unregister_cpu_notifier(struct notifier_block *nb)
{
cpu_maps_update_begin();
@@ -311,57 +636,60 @@ static inline void check_for_tasks(int dead_cpu)
read_unlock(&tasklist_lock);
}
-struct take_cpu_down_param {
- unsigned long mod;
- void *hcpu;
-};
+static void cpu_notify_nofail(unsigned long val, unsigned int cpu)
+{
+ BUG_ON(cpu_notify(val, cpu));
+}
+
+static int notify_down_prepare(unsigned int cpu)
+{
+ int err, nr_calls = 0;
+
+ err = __cpu_notify(CPU_DOWN_PREPARE, cpu, -1, &nr_calls);
+ if (err) {
+ nr_calls--;
+ __cpu_notify(CPU_DOWN_FAILED, cpu, nr_calls, NULL);
+ pr_warn("%s: attempt to take down CPU %u failed\n",
+ __func__, cpu);
+ }
+ return err;
+}
+
+static int notify_dying(unsigned int cpu)
+{
+ cpu_notify(CPU_DYING, cpu);
+ return 0;
+}
/* Take this CPU down. */
static int take_cpu_down(void *_param)
{
- struct take_cpu_down_param *param = _param;
- int err;
+ struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
+ enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
+ int err, cpu = smp_processor_id();
/* Ensure this CPU doesn't handle any more interrupts. */
err = __cpu_disable();
if (err < 0)
return err;
- cpu_notify(CPU_DYING | param->mod, param->hcpu);
+ /* Invoke the former CPU_DYING callbacks */
+ for (; st->state > target; st->state--) {
+ struct cpuhp_step *step = cpuhp_ap_states + st->state;
+
+ cpuhp_invoke_callback(cpu, st->state, step->teardown);
+ }
/* Give up timekeeping duties */
tick_handover_do_timer();
/* Park the stopper thread */
- stop_machine_park((long)param->hcpu);
+ stop_machine_park(cpu);
return 0;
}
-/* Requires cpu_add_remove_lock to be held */
-static int _cpu_down(unsigned int cpu, int tasks_frozen)
+static int takedown_cpu(unsigned int cpu)
{
- int err, nr_calls = 0;
- void *hcpu = (void *)(long)cpu;
- unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
- struct take_cpu_down_param tcd_param = {
- .mod = mod,
- .hcpu = hcpu,
- };
-
- if (num_online_cpus() == 1)
- return -EBUSY;
-
- if (!cpu_online(cpu))
- return -EINVAL;
-
- cpu_hotplug_begin();
-
- err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
- if (err) {
- nr_calls--;
- __cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
- pr_warn("%s: attempt to take down CPU %u failed\n",
- __func__, cpu);
- goto out_release;
- }
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+ int err;
/*
* By now we've cleared cpu_active_mask, wait for all preempt-disabled
@@ -378,6 +706,8 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
else
synchronize_rcu();
+ /* Park the smpboot threads */
+ kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
smpboot_park_threads(cpu);
/*
@@ -389,12 +719,12 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
/*
* So now all preempt/rcu users must observe !cpu_active().
*/
- err = stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
+ err = stop_machine(take_cpu_down, NULL, cpumask_of(cpu));
if (err) {
/* CPU didn't die: tell everyone. Can't complain. */
- cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
+ cpu_notify_nofail(CPU_DOWN_FAILED, cpu);
irq_unlock_sparse();
- goto out_release;
+ return err;
}
BUG_ON(cpu_online(cpu));
@@ -405,10 +735,8 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
*
* Wait for the stop thread to go away.
*/
- while (!per_cpu(cpu_dead_idle, cpu))
- cpu_relax();
- smp_mb(); /* Read from cpu_dead_idle before __cpu_die(). */
- per_cpu(cpu_dead_idle, cpu) = false;
+ wait_for_completion(&st->done);
+ BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
/* Interrupts are moved away from the dying cpu, reenable alloc/free */
irq_unlock_sparse();
@@ -417,20 +745,104 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
/* This actually kills the CPU. */
__cpu_die(cpu);
- /* CPU is completely dead: tell everyone. Too late to complain. */
tick_cleanup_dead_cpu(cpu);
- cpu_notify_nofail(CPU_DEAD | mod, hcpu);
+ return 0;
+}
+static int notify_dead(unsigned int cpu)
+{
+ cpu_notify_nofail(CPU_DEAD, cpu);
check_for_tasks(cpu);
+ return 0;
+}
-out_release:
+static void cpuhp_complete_idle_dead(void *arg)
+{
+ struct cpuhp_cpu_state *st = arg;
+
+ complete(&st->done);
+}
+
+void cpuhp_report_idle_dead(void)
+{
+ struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
+
+ BUG_ON(st->state != CPUHP_AP_OFFLINE);
+ rcu_report_dead(smp_processor_id());
+ st->state = CPUHP_AP_IDLE_DEAD;
+ /*
+ * We cannot call complete after rcu_report_dead() so we delegate it
+ * to an online cpu.
+ */
+ smp_call_function_single(cpumask_first(cpu_online_mask),
+ cpuhp_complete_idle_dead, st, 0);
+}
+
+#else
+#define notify_down_prepare NULL
+#define takedown_cpu NULL
+#define notify_dead NULL
+#define notify_dying NULL
+#endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/* Requires cpu_add_remove_lock to be held */
+static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
+ enum cpuhp_state target)
+{
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+ int prev_state, ret = 0;
+ bool hasdied = false;
+
+ if (num_online_cpus() == 1)
+ return -EBUSY;
+
+ if (!cpu_present(cpu))
+ return -EINVAL;
+
+ cpu_hotplug_begin();
+
+ cpuhp_tasks_frozen = tasks_frozen;
+
+ prev_state = st->state;
+ st->target = target;
+ /*
+ * If the current CPU state is in the range of the AP hotplug thread,
+ * then we need to kick the thread.
+ */
+ if (st->state > CPUHP_TEARDOWN_CPU) {
+ ret = cpuhp_kick_ap_work(cpu);
+ /*
+ * The AP side has done the error rollback already. Just
+ * return the error code..
+ */
+ if (ret)
+ goto out;
+
+ /*
+ * We might have stopped still in the range of the AP hotplug
+ * thread. Nothing to do anymore.
+ */
+ if (st->state > CPUHP_TEARDOWN_CPU)
+ goto out;
+ }
+ /*
+ * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
+ * to do the further cleanups.
+ */
+ ret = cpuhp_down_callbacks(cpu, st, cpuhp_bp_states, target);
+
+ hasdied = prev_state != st->state && st->state == CPUHP_OFFLINE;
+out:
cpu_hotplug_done();
- if (!err)
- cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
- return err;
+ /* This post dead nonsense must die */
+ if (!ret && hasdied)
+ cpu_notify_nofail(CPU_POST_DEAD, cpu);
+ return ret;
}
-int cpu_down(unsigned int cpu)
+static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
{
int err;
@@ -441,100 +853,131 @@ int cpu_down(unsigned int cpu)
goto out;
}
- err = _cpu_down(cpu, 0);
+ err = _cpu_down(cpu, 0, target);
out:
cpu_maps_update_done();
return err;
}
+int cpu_down(unsigned int cpu)
+{
+ return do_cpu_down(cpu, CPUHP_OFFLINE);
+}
EXPORT_SYMBOL(cpu_down);
#endif /*CONFIG_HOTPLUG_CPU*/
-/*
- * Unpark per-CPU smpboot kthreads at CPU-online time.
+/**
+ * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
+ * @cpu: cpu that just started
+ *
+ * This function calls the cpu_chain notifiers with CPU_STARTING.
+ * It must be called by the arch code on the new cpu, before the new cpu
+ * enables interrupts and before the "boot" cpu returns from __cpu_up().
*/
-static int smpboot_thread_call(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+void notify_cpu_starting(unsigned int cpu)
{
- int cpu = (long)hcpu;
-
- switch (action & ~CPU_TASKS_FROZEN) {
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+ enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
- case CPU_DOWN_FAILED:
- case CPU_ONLINE:
- smpboot_unpark_threads(cpu);
- break;
+ while (st->state < target) {
+ struct cpuhp_step *step;
- default:
- break;
+ st->state++;
+ step = cpuhp_ap_states + st->state;
+ cpuhp_invoke_callback(cpu, st->state, step->startup);
}
-
- return NOTIFY_OK;
}
-static struct notifier_block smpboot_thread_notifier = {
- .notifier_call = smpboot_thread_call,
- .priority = CPU_PRI_SMPBOOT,
-};
-
-void smpboot_thread_init(void)
+/*
+ * Called from the idle task. We need to set active here, so we can kick off
+ * the stopper thread and unpark the smpboot threads. If the target state is
+ * beyond CPUHP_AP_ONLINE_IDLE we kick cpuhp thread and let it bring up the
+ * cpu further.
+ */
+void cpuhp_online_idle(enum cpuhp_state state)
{
- register_cpu_notifier(&smpboot_thread_notifier);
+ struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
+ unsigned int cpu = smp_processor_id();
+
+ /* Happens for the boot cpu */
+ if (state != CPUHP_AP_ONLINE_IDLE)
+ return;
+
+ st->state = CPUHP_AP_ONLINE_IDLE;
+
+ /* The cpu is marked online, set it active now */
+ set_cpu_active(cpu, true);
+ /* Unpark the stopper thread and the hotplug thread of this cpu */
+ stop_machine_unpark(cpu);
+ kthread_unpark(st->thread);
+
+ /* Should we go further up ? */
+ if (st->target > CPUHP_AP_ONLINE_IDLE)
+ __cpuhp_kick_ap_work(st);
+ else
+ complete(&st->done);
}
/* Requires cpu_add_remove_lock to be held */
-static int _cpu_up(unsigned int cpu, int tasks_frozen)
+static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
{
- int ret, nr_calls = 0;
- void *hcpu = (void *)(long)cpu;
- unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
struct task_struct *idle;
+ int ret = 0;
cpu_hotplug_begin();
- if (cpu_online(cpu) || !cpu_present(cpu)) {
+ if (!cpu_present(cpu)) {
ret = -EINVAL;
goto out;
}
- idle = idle_thread_get(cpu);
- if (IS_ERR(idle)) {
- ret = PTR_ERR(idle);
- goto out;
- }
-
- ret = smpboot_create_threads(cpu);
- if (ret)
+ /*
+ * The caller of do_cpu_up might have raced with another
+ * caller. Ignore it for now.
+ */
+ if (st->state >= target)
goto out;
- ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls);
- if (ret) {
- nr_calls--;
- pr_warn("%s: attempt to bring up CPU %u failed\n",
- __func__, cpu);
- goto out_notify;
+ if (st->state == CPUHP_OFFLINE) {
+ /* Let it fail before we try to bring the cpu up */
+ idle = idle_thread_get(cpu);
+ if (IS_ERR(idle)) {
+ ret = PTR_ERR(idle);
+ goto out;
+ }
}
- /* Arch-specific enabling code. */
- ret = __cpu_up(cpu, idle);
-
- if (ret != 0)
- goto out_notify;
- BUG_ON(!cpu_online(cpu));
+ cpuhp_tasks_frozen = tasks_frozen;
- /* Now call notifier in preparation. */
- cpu_notify(CPU_ONLINE | mod, hcpu);
+ st->target = target;
+ /*
+ * If the current CPU state is in the range of the AP hotplug thread,
+ * then we need to kick the thread once more.
+ */
+ if (st->state > CPUHP_BRINGUP_CPU) {
+ ret = cpuhp_kick_ap_work(cpu);
+ /*
+ * The AP side has done the error rollback already. Just
+ * return the error code..
+ */
+ if (ret)
+ goto out;
+ }
-out_notify:
- if (ret != 0)
- __cpu_notify(CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
+ /*
+ * Try to reach the target state. We max out on the BP at
+ * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
+ * responsible for bringing it up to the target state.
+ */
+ target = min((int)target, CPUHP_BRINGUP_CPU);
+ ret = cpuhp_up_callbacks(cpu, st, cpuhp_bp_states, target);
out:
cpu_hotplug_done();
-
return ret;
}
-int cpu_up(unsigned int cpu)
+static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
{
int err = 0;
@@ -558,12 +1001,16 @@ int cpu_up(unsigned int cpu)
goto out;
}
- err = _cpu_up(cpu, 0);
-
+ err = _cpu_up(cpu, 0, target);
out:
cpu_maps_update_done();
return err;
}
+
+int cpu_up(unsigned int cpu)
+{
+ return do_cpu_up(cpu, CPUHP_ONLINE);
+}
EXPORT_SYMBOL_GPL(cpu_up);
#ifdef CONFIG_PM_SLEEP_SMP
@@ -586,7 +1033,7 @@ int disable_nonboot_cpus(void)
if (cpu == first_cpu)
continue;
trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
- error = _cpu_down(cpu, 1);
+ error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
if (!error)
cpumask_set_cpu(cpu, frozen_cpus);
@@ -636,7 +1083,7 @@ void enable_nonboot_cpus(void)
for_each_cpu(cpu, frozen_cpus) {
trace_suspend_resume(TPS("CPU_ON"), cpu, true);
- error = _cpu_up(cpu, 1);
+ error = _cpu_up(cpu, 1, CPUHP_ONLINE);
trace_suspend_resume(TPS("CPU_ON"), cpu, false);
if (!error) {
pr_info("CPU%d is up\n", cpu);
@@ -709,26 +1156,463 @@ core_initcall(cpu_hotplug_pm_sync_init);
#endif /* CONFIG_PM_SLEEP_SMP */
+#endif /* CONFIG_SMP */
+
+/* Boot processor state steps */
+static struct cpuhp_step cpuhp_bp_states[] = {
+ [CPUHP_OFFLINE] = {
+ .name = "offline",
+ .startup = NULL,
+ .teardown = NULL,
+ },
+#ifdef CONFIG_SMP
+ [CPUHP_CREATE_THREADS]= {
+ .name = "threads:create",
+ .startup = smpboot_create_threads,
+ .teardown = NULL,
+ .cant_stop = true,
+ },
+ /*
+ * Preparatory and dead notifiers. Will be replaced once the notifiers
+ * are converted to states.
+ */
+ [CPUHP_NOTIFY_PREPARE] = {
+ .name = "notify:prepare",
+ .startup = notify_prepare,
+ .teardown = notify_dead,
+ .skip_onerr = true,
+ .cant_stop = true,
+ },
+ /* Kicks the plugged cpu into life */
+ [CPUHP_BRINGUP_CPU] = {
+ .name = "cpu:bringup",
+ .startup = bringup_cpu,
+ .teardown = NULL,
+ .cant_stop = true,
+ },
+ /*
+ * Handled on controll processor until the plugged processor manages
+ * this itself.
+ */
+ [CPUHP_TEARDOWN_CPU] = {
+ .name = "cpu:teardown",
+ .startup = NULL,
+ .teardown = takedown_cpu,
+ .cant_stop = true,
+ },
+#endif
+};
+
+/* Application processor state steps */
+static struct cpuhp_step cpuhp_ap_states[] = {
+#ifdef CONFIG_SMP
+ /* Final state before CPU kills itself */
+ [CPUHP_AP_IDLE_DEAD] = {
+ .name = "idle:dead",
+ },
+ /*
+ * Last state before CPU enters the idle loop to die. Transient state
+ * for synchronization.
+ */
+ [CPUHP_AP_OFFLINE] = {
+ .name = "ap:offline",
+ .cant_stop = true,
+ },
+ /*
+ * Low level startup/teardown notifiers. Run with interrupts
+ * disabled. Will be removed once the notifiers are converted to
+ * states.
+ */
+ [CPUHP_AP_NOTIFY_STARTING] = {
+ .name = "notify:starting",
+ .startup = notify_starting,
+ .teardown = notify_dying,
+ .skip_onerr = true,
+ .cant_stop = true,
+ },
+ /* Entry state on starting. Interrupts enabled from here on. Transient
+ * state for synchronsization */
+ [CPUHP_AP_ONLINE] = {
+ .name = "ap:online",
+ },
+ /* Handle smpboot threads park/unpark */
+ [CPUHP_AP_SMPBOOT_THREADS] = {
+ .name = "smpboot:threads",
+ .startup = smpboot_unpark_threads,
+ .teardown = NULL,
+ },
+ /*
+ * Online/down_prepare notifiers. Will be removed once the notifiers
+ * are converted to states.
+ */
+ [CPUHP_AP_NOTIFY_ONLINE] = {
+ .name = "notify:online",
+ .startup = notify_online,
+ .teardown = notify_down_prepare,
+ },
+#endif
+ /*
+ * The dynamically registered state space is here
+ */
+
+ /* CPU is fully up and running. */
+ [CPUHP_ONLINE] = {
+ .name = "online",
+ .startup = NULL,
+ .teardown = NULL,
+ },
+};
+
+/* Sanity check for callbacks */
+static int cpuhp_cb_check(enum cpuhp_state state)
+{
+ if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE)
+ return -EINVAL;
+ return 0;
+}
+
+static bool cpuhp_is_ap_state(enum cpuhp_state state)
+{
+ /*
+ * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
+ * purposes as that state is handled explicitely in cpu_down.
+ */
+ return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
+}
+
+static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
+{
+ struct cpuhp_step *sp;
+
+ sp = cpuhp_is_ap_state(state) ? cpuhp_ap_states : cpuhp_bp_states;
+ return sp + state;
+}
+
+static void cpuhp_store_callbacks(enum cpuhp_state state,
+ const char *name,
+ int (*startup)(unsigned int cpu),
+ int (*teardown)(unsigned int cpu))
+{
+ /* (Un)Install the callbacks for further cpu hotplug operations */
+ struct cpuhp_step *sp;
+
+ mutex_lock(&cpuhp_state_mutex);
+ sp = cpuhp_get_step(state);
+ sp->startup = startup;
+ sp->teardown = teardown;
+ sp->name = name;
+ mutex_unlock(&cpuhp_state_mutex);
+}
+
+static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
+{
+ return cpuhp_get_step(state)->teardown;
+}
+
+/*
+ * Call the startup/teardown function for a step either on the AP or
+ * on the current CPU.
+ */
+static int cpuhp_issue_call(int cpu, enum cpuhp_state state,
+ int (*cb)(unsigned int), bool bringup)
+{
+ int ret;
+
+ if (!cb)
+ return 0;
+ /*
+ * The non AP bound callbacks can fail on bringup. On teardown
+ * e.g. module removal we crash for now.
+ */
+#ifdef CONFIG_SMP
+ if (cpuhp_is_ap_state(state))
+ ret = cpuhp_invoke_ap_callback(cpu, state, cb);
+ else
+ ret = cpuhp_invoke_callback(cpu, state, cb);
+#else
+ ret = cpuhp_invoke_callback(cpu, state, cb);
+#endif
+ BUG_ON(ret && !bringup);
+ return ret;
+}
+
+/*
+ * Called from __cpuhp_setup_state on a recoverable failure.
+ *
+ * Note: The teardown callbacks for rollback are not allowed to fail!
+ */
+static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
+ int (*teardown)(unsigned int cpu))
+{
+ int cpu;
+
+ if (!teardown)
+ return;
+
+ /* Roll back the already executed steps on the other cpus */
+ for_each_present_cpu(cpu) {
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+ int cpustate = st->state;
+
+ if (cpu >= failedcpu)
+ break;
+
+ /* Did we invoke the startup call on that cpu ? */
+ if (cpustate >= state)
+ cpuhp_issue_call(cpu, state, teardown, false);
+ }
+}
+
+/*
+ * Returns a free for dynamic slot assignment of the Online state. The states
+ * are protected by the cpuhp_slot_states mutex and an empty slot is identified
+ * by having no name assigned.
+ */
+static int cpuhp_reserve_state(enum cpuhp_state state)
+{
+ enum cpuhp_state i;
+
+ mutex_lock(&cpuhp_state_mutex);
+ for (i = CPUHP_AP_ONLINE_DYN; i <= CPUHP_AP_ONLINE_DYN_END; i++) {
+ if (cpuhp_ap_states[i].name)
+ continue;
+
+ cpuhp_ap_states[i].name = "Reserved";
+ mutex_unlock(&cpuhp_state_mutex);
+ return i;
+ }
+ mutex_unlock(&cpuhp_state_mutex);
+ WARN(1, "No more dynamic states available for CPU hotplug\n");
+ return -ENOSPC;
+}
+
/**
- * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
- * @cpu: cpu that just started
+ * __cpuhp_setup_state - Setup the callbacks for an hotplug machine state
+ * @state: The state to setup
+ * @invoke: If true, the startup function is invoked for cpus where
+ * cpu state >= @state
+ * @startup: startup callback function
+ * @teardown: teardown callback function
*
- * This function calls the cpu_chain notifiers with CPU_STARTING.
- * It must be called by the arch code on the new cpu, before the new cpu
- * enables interrupts and before the "boot" cpu returns from __cpu_up().
+ * Returns 0 if successful, otherwise a proper error code
*/
-void notify_cpu_starting(unsigned int cpu)
+int __cpuhp_setup_state(enum cpuhp_state state,
+ const char *name, bool invoke,
+ int (*startup)(unsigned int cpu),
+ int (*teardown)(unsigned int cpu))
{
- unsigned long val = CPU_STARTING;
+ int cpu, ret = 0;
+ int dyn_state = 0;
-#ifdef CONFIG_PM_SLEEP_SMP
- if (frozen_cpus != NULL && cpumask_test_cpu(cpu, frozen_cpus))
- val = CPU_STARTING_FROZEN;
-#endif /* CONFIG_PM_SLEEP_SMP */
- cpu_notify(val, (void *)(long)cpu);
+ if (cpuhp_cb_check(state) || !name)
+ return -EINVAL;
+
+ get_online_cpus();
+
+ /* currently assignments for the ONLINE state are possible */
+ if (state == CPUHP_AP_ONLINE_DYN) {
+ dyn_state = 1;
+ ret = cpuhp_reserve_state(state);
+ if (ret < 0)
+ goto out;
+ state = ret;
+ }
+
+ cpuhp_store_callbacks(state, name, startup, teardown);
+
+ if (!invoke || !startup)
+ goto out;
+
+ /*
+ * Try to call the startup callback for each present cpu
+ * depending on the hotplug state of the cpu.
+ */
+ for_each_present_cpu(cpu) {
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+ int cpustate = st->state;
+
+ if (cpustate < state)
+ continue;
+
+ ret = cpuhp_issue_call(cpu, state, startup, true);
+ if (ret) {
+ cpuhp_rollback_install(cpu, state, teardown);
+ cpuhp_store_callbacks(state, NULL, NULL, NULL);
+ goto out;
+ }
+ }
+out:
+ put_online_cpus();
+ if (!ret && dyn_state)
+ return state;
+ return ret;
}
+EXPORT_SYMBOL(__cpuhp_setup_state);
-#endif /* CONFIG_SMP */
+/**
+ * __cpuhp_remove_state - Remove the callbacks for an hotplug machine state
+ * @state: The state to remove
+ * @invoke: If true, the teardown function is invoked for cpus where
+ * cpu state >= @state
+ *
+ * The teardown callback is currently not allowed to fail. Think
+ * about module removal!
+ */
+void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
+{
+ int (*teardown)(unsigned int cpu) = cpuhp_get_teardown_cb(state);
+ int cpu;
+
+ BUG_ON(cpuhp_cb_check(state));
+
+ get_online_cpus();
+
+ if (!invoke || !teardown)
+ goto remove;
+
+ /*
+ * Call the teardown callback for each present cpu depending
+ * on the hotplug state of the cpu. This function is not
+ * allowed to fail currently!
+ */
+ for_each_present_cpu(cpu) {
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+ int cpustate = st->state;
+
+ if (cpustate >= state)
+ cpuhp_issue_call(cpu, state, teardown, false);
+ }
+remove:
+ cpuhp_store_callbacks(state, NULL, NULL, NULL);
+ put_online_cpus();
+}
+EXPORT_SYMBOL(__cpuhp_remove_state);
+
+#if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
+static ssize_t show_cpuhp_state(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
+
+ return sprintf(buf, "%d\n", st->state);
+}
+static DEVICE_ATTR(state, 0444, show_cpuhp_state, NULL);
+
+static ssize_t write_cpuhp_target(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
+ struct cpuhp_step *sp;
+ int target, ret;
+
+ ret = kstrtoint(buf, 10, &target);
+ if (ret)
+ return ret;
+
+#ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
+ if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE)
+ return -EINVAL;
+#else
+ if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE)
+ return -EINVAL;
+#endif
+
+ ret = lock_device_hotplug_sysfs();
+ if (ret)
+ return ret;
+
+ mutex_lock(&cpuhp_state_mutex);
+ sp = cpuhp_get_step(target);
+ ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
+ mutex_unlock(&cpuhp_state_mutex);
+ if (ret)
+ return ret;
+
+ if (st->state < target)
+ ret = do_cpu_up(dev->id, target);
+ else
+ ret = do_cpu_down(dev->id, target);
+
+ unlock_device_hotplug();
+ return ret ? ret : count;
+}
+
+static ssize_t show_cpuhp_target(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
+
+ return sprintf(buf, "%d\n", st->target);
+}
+static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target);
+
+static struct attribute *cpuhp_cpu_attrs[] = {
+ &dev_attr_state.attr,
+ &dev_attr_target.attr,
+ NULL
+};
+
+static struct attribute_group cpuhp_cpu_attr_group = {
+ .attrs = cpuhp_cpu_attrs,
+ .name = "hotplug",
+ NULL
+};
+
+static ssize_t show_cpuhp_states(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ ssize_t cur, res = 0;
+ int i;
+
+ mutex_lock(&cpuhp_state_mutex);
+ for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
+ struct cpuhp_step *sp = cpuhp_get_step(i);
+
+ if (sp->name) {
+ cur = sprintf(buf, "%3d: %s\n", i, sp->name);
+ buf += cur;
+ res += cur;
+ }
+ }
+ mutex_unlock(&cpuhp_state_mutex);
+ return res;
+}
+static DEVICE_ATTR(states, 0444, show_cpuhp_states, NULL);
+
+static struct attribute *cpuhp_cpu_root_attrs[] = {
+ &dev_attr_states.attr,
+ NULL
+};
+
+static struct attribute_group cpuhp_cpu_root_attr_group = {
+ .attrs = cpuhp_cpu_root_attrs,
+ .name = "hotplug",
+ NULL
+};
+
+static int __init cpuhp_sysfs_init(void)
+{
+ int cpu, ret;
+
+ ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
+ &cpuhp_cpu_root_attr_group);
+ if (ret)
+ return ret;
+
+ for_each_possible_cpu(cpu) {
+ struct device *dev = get_cpu_device(cpu);
+
+ if (!dev)
+ continue;
+ ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+device_initcall(cpuhp_sysfs_init);
+#endif
/*
* cpu_bit_bitmap[] is a special, "compressed" data structure that
@@ -789,3 +1673,25 @@ void init_cpu_online(const struct cpumask *src)
{
cpumask_copy(&__cpu_online_mask, src);
}
+
+/*
+ * Activate the first processor.
+ */
+void __init boot_cpu_init(void)
+{
+ int cpu = smp_processor_id();
+
+ /* Mark the boot cpu "present", "online" etc for SMP and UP case */
+ set_cpu_online(cpu, true);
+ set_cpu_active(cpu, true);
+ set_cpu_present(cpu, true);
+ set_cpu_possible(cpu, true);
+}
+
+/*
+ * Must be called _AFTER_ setting up the per_cpu areas
+ */
+void __init boot_cpu_state_init(void)
+{
+ per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE;
+}
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 0167679182c0..5f6ce931f1ea 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1178,6 +1178,7 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
goto free_area;
area->xol_mapping.name = "[uprobes]";
+ area->xol_mapping.fault = NULL;
area->xol_mapping.pages = area->pages;
area->pages[0] = alloc_page(GFP_HIGHUSER);
if (!area->pages[0])
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 3b48dab80164..3bbfd6a9c475 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -64,6 +64,10 @@ config IRQ_DOMAIN_HIERARCHY
bool
select IRQ_DOMAIN
+# Generic IRQ IPI support
+config GENERIC_IRQ_IPI
+ bool
+
# Generic MSI interrupt support
config GENERIC_MSI_IRQ
bool
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index 2fc9cbdf35b6..2ee42e95a3ce 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -8,3 +8,4 @@ obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
obj-$(CONFIG_GENERIC_IRQ_MIGRATION) += cpuhotplug.o
obj-$(CONFIG_PM_SLEEP) += pm.o
obj-$(CONFIG_GENERIC_MSI_IRQ) += msi.o
+obj-$(CONFIG_GENERIC_IRQ_IPI) += ipi.o
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 5797909f4e5b..2f9f2b0e79f2 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -961,6 +961,7 @@ void irq_chip_mask_parent(struct irq_data *data)
data = data->parent_data;
data->chip->irq_mask(data);
}
+EXPORT_SYMBOL_GPL(irq_chip_mask_parent);
/**
* irq_chip_unmask_parent - Unmask the parent interrupt
@@ -971,6 +972,7 @@ void irq_chip_unmask_parent(struct irq_data *data)
data = data->parent_data;
data->chip->irq_unmask(data);
}
+EXPORT_SYMBOL_GPL(irq_chip_unmask_parent);
/**
* irq_chip_eoi_parent - Invoke EOI on the parent interrupt
@@ -981,6 +983,7 @@ void irq_chip_eoi_parent(struct irq_data *data)
data = data->parent_data;
data->chip->irq_eoi(data);
}
+EXPORT_SYMBOL_GPL(irq_chip_eoi_parent);
/**
* irq_chip_set_affinity_parent - Set affinity on the parent interrupt
@@ -1016,6 +1019,7 @@ int irq_chip_set_type_parent(struct irq_data *data, unsigned int type)
return -ENOSYS;
}
+EXPORT_SYMBOL_GPL(irq_chip_set_type_parent);
/**
* irq_chip_retrigger_hierarchy - Retrigger an interrupt in hardware
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 57bff7857e87..a15b5485b446 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -136,10 +136,9 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
{
irqreturn_t retval = IRQ_NONE;
unsigned int flags = 0, irq = desc->irq_data.irq;
- struct irqaction *action = desc->action;
+ struct irqaction *action;
- /* action might have become NULL since we dropped the lock */
- while (action) {
+ for_each_action_of_desc(desc, action) {
irqreturn_t res;
trace_irq_handler_entry(irq, action);
@@ -173,7 +172,6 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
}
retval |= res;
- action = action->next;
}
add_interrupt_randomness(irq, flags);
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index fcab63c66905..09be2c903c6d 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -131,6 +131,9 @@ static inline void chip_bus_sync_unlock(struct irq_desc *desc)
#define IRQ_GET_DESC_CHECK_GLOBAL (_IRQ_DESC_CHECK)
#define IRQ_GET_DESC_CHECK_PERCPU (_IRQ_DESC_CHECK | _IRQ_DESC_PERCPU)
+#define for_each_action_of_desc(desc, act) \
+ for (act = desc->act; act; act = act->next)
+
struct irq_desc *
__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus,
unsigned int check);
@@ -160,6 +163,8 @@ irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags)
__irq_put_desc_unlock(desc, flags, false);
}
+#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
+
/*
* Manipulation functions for irq_data.state
*/
@@ -188,6 +193,8 @@ static inline bool irqd_has_set(struct irq_data *d, unsigned int mask)
return __irqd_to_state(d) & mask;
}
+#undef __irqd_to_state
+
static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc)
{
__this_cpu_inc(*desc->kstat_irqs);
diff --git a/kernel/irq/ipi.c b/kernel/irq/ipi.c
new file mode 100644
index 000000000000..c37f34b00a11
--- /dev/null
+++ b/kernel/irq/ipi.c
@@ -0,0 +1,326 @@
+/*
+ * linux/kernel/irq/ipi.c
+ *
+ * Copyright (C) 2015 Imagination Technologies Ltd
+ * Author: Qais Yousef <qais.yousef@imgtec.com>
+ *
+ * This file contains driver APIs to the IPI subsystem.
+ */
+
+#define pr_fmt(fmt) "genirq/ipi: " fmt
+
+#include <linux/irqdomain.h>
+#include <linux/irq.h>
+
+/**
+ * irq_reserve_ipi() - Setup an IPI to destination cpumask
+ * @domain: IPI domain
+ * @dest: cpumask of cpus which can receive the IPI
+ *
+ * Allocate a virq that can be used to send IPI to any CPU in dest mask.
+ *
+ * On success it'll return linux irq number and 0 on failure
+ */
+unsigned int irq_reserve_ipi(struct irq_domain *domain,
+ const struct cpumask *dest)
+{
+ unsigned int nr_irqs, offset;
+ struct irq_data *data;
+ int virq, i;
+
+ if (!domain ||!irq_domain_is_ipi(domain)) {
+ pr_warn("Reservation on a non IPI domain\n");
+ return 0;
+ }
+
+ if (!cpumask_subset(dest, cpu_possible_mask)) {
+ pr_warn("Reservation is not in possible_cpu_mask\n");
+ return 0;
+ }
+
+ nr_irqs = cpumask_weight(dest);
+ if (!nr_irqs) {
+ pr_warn("Reservation for empty destination mask\n");
+ return 0;
+ }
+
+ if (irq_domain_is_ipi_single(domain)) {
+ /*
+ * If the underlying implementation uses a single HW irq on
+ * all cpus then we only need a single Linux irq number for
+ * it. We have no restrictions vs. the destination mask. The
+ * underlying implementation can deal with holes nicely.
+ */
+ nr_irqs = 1;
+ offset = 0;
+ } else {
+ unsigned int next;
+
+ /*
+ * The IPI requires a seperate HW irq on each CPU. We require
+ * that the destination mask is consecutive. If an
+ * implementation needs to support holes, it can reserve
+ * several IPI ranges.
+ */
+ offset = cpumask_first(dest);
+ /*
+ * Find a hole and if found look for another set bit after the
+ * hole. For now we don't support this scenario.
+ */
+ next = cpumask_next_zero(offset, dest);
+ if (next < nr_cpu_ids)
+ next = cpumask_next(next, dest);
+ if (next < nr_cpu_ids) {
+ pr_warn("Destination mask has holes\n");
+ return 0;
+ }
+ }
+
+ virq = irq_domain_alloc_descs(-1, nr_irqs, 0, NUMA_NO_NODE);
+ if (virq <= 0) {
+ pr_warn("Can't reserve IPI, failed to alloc descs\n");
+ return 0;
+ }
+
+ virq = __irq_domain_alloc_irqs(domain, virq, nr_irqs, NUMA_NO_NODE,
+ (void *) dest, true);
+
+ if (virq <= 0) {
+ pr_warn("Can't reserve IPI, failed to alloc hw irqs\n");
+ goto free_descs;
+ }
+
+ for (i = 0; i < nr_irqs; i++) {
+ data = irq_get_irq_data(virq + i);
+ cpumask_copy(data->common->affinity, dest);
+ data->common->ipi_offset = offset;
+ }
+ return virq;
+
+free_descs:
+ irq_free_descs(virq, nr_irqs);
+ return 0;
+}
+
+/**
+ * irq_destroy_ipi() - unreserve an IPI that was previously allocated
+ * @irq: linux irq number to be destroyed
+ *
+ * Return the IPIs allocated with irq_reserve_ipi() to the system destroying
+ * all virqs associated with them.
+ */
+void irq_destroy_ipi(unsigned int irq)
+{
+ struct irq_data *data = irq_get_irq_data(irq);
+ struct cpumask *ipimask = data ? irq_data_get_affinity_mask(data) : NULL;
+ struct irq_domain *domain;
+ unsigned int nr_irqs;
+
+ if (!irq || !data || !ipimask)
+ return;
+
+ domain = data->domain;
+ if (WARN_ON(domain == NULL))
+ return;
+
+ if (!irq_domain_is_ipi(domain)) {
+ pr_warn("Trying to destroy a non IPI domain!\n");
+ return;
+ }
+
+ if (irq_domain_is_ipi_per_cpu(domain))
+ nr_irqs = cpumask_weight(ipimask);
+ else
+ nr_irqs = 1;
+
+ irq_domain_free_irqs(irq, nr_irqs);
+}
+
+/**
+ * ipi_get_hwirq - Get the hwirq associated with an IPI to a cpu
+ * @irq: linux irq number
+ * @cpu: the target cpu
+ *
+ * When dealing with coprocessors IPI, we need to inform the coprocessor of
+ * the hwirq it needs to use to receive and send IPIs.
+ *
+ * Returns hwirq value on success and INVALID_HWIRQ on failure.
+ */
+irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu)
+{
+ struct irq_data *data = irq_get_irq_data(irq);
+ struct cpumask *ipimask = data ? irq_data_get_affinity_mask(data) : NULL;
+
+ if (!data || !ipimask || cpu > nr_cpu_ids)
+ return INVALID_HWIRQ;
+
+ if (!cpumask_test_cpu(cpu, ipimask))
+ return INVALID_HWIRQ;
+
+ /*
+ * Get the real hardware irq number if the underlying implementation
+ * uses a seperate irq per cpu. If the underlying implementation uses
+ * a single hardware irq for all cpus then the IPI send mechanism
+ * needs to take care of the cpu destinations.
+ */
+ if (irq_domain_is_ipi_per_cpu(data->domain))
+ data = irq_get_irq_data(irq + cpu - data->common->ipi_offset);
+
+ return data ? irqd_to_hwirq(data) : INVALID_HWIRQ;
+}
+EXPORT_SYMBOL_GPL(ipi_get_hwirq);
+
+static int ipi_send_verify(struct irq_chip *chip, struct irq_data *data,
+ const struct cpumask *dest, unsigned int cpu)
+{
+ struct cpumask *ipimask = irq_data_get_affinity_mask(data);
+
+ if (!chip || !ipimask)
+ return -EINVAL;
+
+ if (!chip->ipi_send_single && !chip->ipi_send_mask)
+ return -EINVAL;
+
+ if (cpu > nr_cpu_ids)
+ return -EINVAL;
+
+ if (dest) {
+ if (!cpumask_subset(dest, ipimask))
+ return -EINVAL;
+ } else {
+ if (!cpumask_test_cpu(cpu, ipimask))
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/**
+ * __ipi_send_single - send an IPI to a target Linux SMP CPU
+ * @desc: pointer to irq_desc of the IRQ
+ * @cpu: destination CPU, must in the destination mask passed to
+ * irq_reserve_ipi()
+ *
+ * This function is for architecture or core code to speed up IPI sending. Not
+ * usable from driver code.
+ *
+ * Returns zero on success and negative error number on failure.
+ */
+int __ipi_send_single(struct irq_desc *desc, unsigned int cpu)
+{
+ struct irq_data *data = irq_desc_get_irq_data(desc);
+ struct irq_chip *chip = irq_data_get_irq_chip(data);
+
+#ifdef DEBUG
+ /*
+ * Minimise the overhead by omitting the checks for Linux SMP IPIs.
+ * Since the callers should be arch or core code which is generally
+ * trusted, only check for errors when debugging.
+ */
+ if (WARN_ON_ONCE(ipi_send_verify(chip, data, NULL, cpu)))
+ return -EINVAL;
+#endif
+ if (!chip->ipi_send_single) {
+ chip->ipi_send_mask(data, cpumask_of(cpu));
+ return 0;
+ }
+
+ /* FIXME: Store this information in irqdata flags */
+ if (irq_domain_is_ipi_per_cpu(data->domain) &&
+ cpu != data->common->ipi_offset) {
+ /* use the correct data for that cpu */
+ unsigned irq = data->irq + cpu - data->common->ipi_offset;
+
+ data = irq_get_irq_data(irq);
+ }
+ chip->ipi_send_single(data, cpu);
+ return 0;
+}
+
+/**
+ * ipi_send_mask - send an IPI to target Linux SMP CPU(s)
+ * @desc: pointer to irq_desc of the IRQ
+ * @dest: dest CPU(s), must be a subset of the mask passed to
+ * irq_reserve_ipi()
+ *
+ * This function is for architecture or core code to speed up IPI sending. Not
+ * usable from driver code.
+ *
+ * Returns zero on success and negative error number on failure.
+ */
+int __ipi_send_mask(struct irq_desc *desc, const struct cpumask *dest)
+{
+ struct irq_data *data = irq_desc_get_irq_data(desc);
+ struct irq_chip *chip = irq_data_get_irq_chip(data);
+ unsigned int cpu;
+
+#ifdef DEBUG
+ /*
+ * Minimise the overhead by omitting the checks for Linux SMP IPIs.
+ * Since the callers should be arch or core code which is generally
+ * trusted, only check for errors when debugging.
+ */
+ if (WARN_ON_ONCE(ipi_send_verify(chip, data, dest, 0)))
+ return -EINVAL;
+#endif
+ if (chip->ipi_send_mask) {
+ chip->ipi_send_mask(data, dest);
+ return 0;
+ }
+
+ if (irq_domain_is_ipi_per_cpu(data->domain)) {
+ unsigned int base = data->irq;
+
+ for_each_cpu(cpu, dest) {
+ unsigned irq = base + cpu - data->common->ipi_offset;
+
+ data = irq_get_irq_data(irq);
+ chip->ipi_send_single(data, cpu);
+ }
+ } else {
+ for_each_cpu(cpu, dest)
+ chip->ipi_send_single(data, cpu);
+ }
+ return 0;
+}
+
+/**
+ * ipi_send_single - Send an IPI to a single CPU
+ * @virq: linux irq number from irq_reserve_ipi()
+ * @cpu: destination CPU, must in the destination mask passed to
+ * irq_reserve_ipi()
+ *
+ * Returns zero on success and negative error number on failure.
+ */
+int ipi_send_single(unsigned int virq, unsigned int cpu)
+{
+ struct irq_desc *desc = irq_to_desc(virq);
+ struct irq_data *data = desc ? irq_desc_get_irq_data(desc) : NULL;
+ struct irq_chip *chip = data ? irq_data_get_irq_chip(data) : NULL;
+
+ if (WARN_ON_ONCE(ipi_send_verify(chip, data, NULL, cpu)))
+ return -EINVAL;
+
+ return __ipi_send_single(desc, cpu);
+}
+EXPORT_SYMBOL_GPL(ipi_send_single);
+
+/**
+ * ipi_send_mask - Send an IPI to target CPU(s)
+ * @virq: linux irq number from irq_reserve_ipi()
+ * @dest: dest CPU(s), must be a subset of the mask passed to
+ * irq_reserve_ipi()
+ *
+ * Returns zero on success and negative error number on failure.
+ */
+int ipi_send_mask(unsigned int virq, const struct cpumask *dest)
+{
+ struct irq_desc *desc = irq_to_desc(virq);
+ struct irq_data *data = desc ? irq_desc_get_irq_data(desc) : NULL;
+ struct irq_chip *chip = data ? irq_data_get_irq_chip(data) : NULL;
+
+ if (WARN_ON_ONCE(ipi_send_verify(chip, data, dest, 0)))
+ return -EINVAL;
+
+ return __ipi_send_mask(desc, dest);
+}
+EXPORT_SYMBOL_GPL(ipi_send_mask);
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 0409da0bcc33..0ccd028817d7 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -24,10 +24,27 @@
static struct lock_class_key irq_desc_lock_class;
#if defined(CONFIG_SMP)
+static int __init irq_affinity_setup(char *str)
+{
+ zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
+ cpulist_parse(str, irq_default_affinity);
+ /*
+ * Set at least the boot cpu. We don't want to end up with
+ * bugreports caused by random comandline masks
+ */
+ cpumask_set_cpu(smp_processor_id(), irq_default_affinity);
+ return 1;
+}
+__setup("irqaffinity=", irq_affinity_setup);
+
static void __init init_irq_default_affinity(void)
{
- alloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
- cpumask_setall(irq_default_affinity);
+#ifdef CONFIG_CPUMASK_OFFSTACK
+ if (!irq_default_affinity)
+ zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
+#endif
+ if (cpumask_empty(irq_default_affinity))
+ cpumask_setall(irq_default_affinity);
}
#else
static void __init init_irq_default_affinity(void)
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 3e56d2f03e24..3a519a01118b 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -23,8 +23,6 @@ static DEFINE_MUTEX(irq_domain_mutex);
static DEFINE_MUTEX(revmap_trees_mutex);
static struct irq_domain *irq_default_domain;
-static int irq_domain_alloc_descs(int virq, unsigned int nr_irqs,
- irq_hw_number_t hwirq, int node);
static void irq_domain_check_hierarchy(struct irq_domain *domain);
struct irqchip_fwid {
@@ -840,8 +838,8 @@ const struct irq_domain_ops irq_domain_simple_ops = {
};
EXPORT_SYMBOL_GPL(irq_domain_simple_ops);
-static int irq_domain_alloc_descs(int virq, unsigned int cnt,
- irq_hw_number_t hwirq, int node)
+int irq_domain_alloc_descs(int virq, unsigned int cnt, irq_hw_number_t hwirq,
+ int node)
{
unsigned int hint;
@@ -895,6 +893,7 @@ struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
return domain;
}
+EXPORT_SYMBOL_GPL(irq_domain_create_hierarchy);
static void irq_domain_insert_irq(int virq)
{
@@ -1045,6 +1044,7 @@ int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq,
return 0;
}
+EXPORT_SYMBOL_GPL(irq_domain_set_hwirq_and_chip);
/**
* irq_domain_set_info - Set the complete data for a @virq in @domain
@@ -1078,6 +1078,7 @@ void irq_domain_reset_irq_data(struct irq_data *irq_data)
irq_data->chip = &no_irq_chip;
irq_data->chip_data = NULL;
}
+EXPORT_SYMBOL_GPL(irq_domain_reset_irq_data);
/**
* irq_domain_free_irqs_common - Clear irq_data and free the parent
@@ -1275,6 +1276,7 @@ int irq_domain_alloc_irqs_parent(struct irq_domain *domain,
nr_irqs, arg);
return -ENOSYS;
}
+EXPORT_SYMBOL_GPL(irq_domain_alloc_irqs_parent);
/**
* irq_domain_free_irqs_parent - Free interrupts from parent domain
@@ -1292,6 +1294,7 @@ void irq_domain_free_irqs_parent(struct irq_domain *domain,
irq_domain_free_irqs_recursive(domain->parent, irq_base,
nr_irqs);
}
+EXPORT_SYMBOL_GPL(irq_domain_free_irqs_parent);
/**
* irq_domain_activate_irq - Call domain_ops->activate recursively to activate
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 841187239adc..3ddd2297ee95 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -144,13 +144,11 @@ int irq_can_set_affinity(unsigned int irq)
*/
void irq_set_thread_affinity(struct irq_desc *desc)
{
- struct irqaction *action = desc->action;
+ struct irqaction *action;
- while (action) {
+ for_each_action_of_desc(desc, action)
if (action->thread)
set_bit(IRQTF_AFFINITY, &action->thread_flags);
- action = action->next;
- }
}
#ifdef CONFIG_GENERIC_PENDING_IRQ
@@ -994,7 +992,7 @@ void irq_wake_thread(unsigned int irq, void *dev_id)
return;
raw_spin_lock_irqsave(&desc->lock, flags);
- for (action = desc->action; action; action = action->next) {
+ for_each_action_of_desc(desc, action) {
if (action->dev_id == dev_id) {
if (action->thread)
__irq_wake_thread(desc, action);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index a2c02fd5d6d0..4e1b94726818 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -291,7 +291,7 @@ static int name_unique(unsigned int irq, struct irqaction *new_action)
int ret = 1;
raw_spin_lock_irqsave(&desc->lock, flags);
- for (action = desc->action ; action; action = action->next) {
+ for_each_action_of_desc(desc, action) {
if ((action != new_action) && action->name &&
!strcmp(new_action->name, action->name)) {
ret = 0;
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 32144175458d..5707f97a3e6a 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -211,14 +211,12 @@ static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
* desc->lock here. See synchronize_irq().
*/
raw_spin_lock_irqsave(&desc->lock, flags);
- action = desc->action;
- while (action) {
+ for_each_action_of_desc(desc, action) {
printk(KERN_ERR "[<%p>] %pf", action->handler, action->handler);
if (action->thread_fn)
printk(KERN_CONT " threaded [<%p>] %pf",
action->thread_fn, action->thread_fn);
printk(KERN_CONT "\n");
- action = action->next;
}
raw_spin_unlock_irqrestore(&desc->lock, flags);
}
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 4d5cc6aa7e1e..250ea67c1615 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -930,12 +930,14 @@ rcu_torture_writer(void *arg)
int nsynctypes = 0;
VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
- pr_alert("%s" TORTURE_FLAG
- " Grace periods expedited from boot/sysfs for %s,\n",
- torture_type, cur_ops->name);
- pr_alert("%s" TORTURE_FLAG
- " Testing of dynamic grace-period expediting diabled.\n",
- torture_type);
+ if (!can_expedite) {
+ pr_alert("%s" TORTURE_FLAG
+ " Grace periods expedited from boot/sysfs for %s,\n",
+ torture_type, cur_ops->name);
+ pr_alert("%s" TORTURE_FLAG
+ " Disabled dynamic grace-period expediting.\n",
+ torture_type);
+ }
/* Initialize synctype[] array. If none set, take default. */
if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_sync1)
diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h
index e492a5253e0f..196f0302e2f4 100644
--- a/kernel/rcu/tiny_plugin.h
+++ b/kernel/rcu/tiny_plugin.h
@@ -23,7 +23,7 @@
*/
#include <linux/kthread.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
@@ -122,18 +122,7 @@ free_out:
debugfs_remove_recursive(rcudir);
return 1;
}
-
-static void __exit rcutiny_trace_cleanup(void)
-{
- debugfs_remove_recursive(rcudir);
-}
-
-module_init(rcutiny_trace_init);
-module_exit(rcutiny_trace_cleanup);
-
-MODULE_AUTHOR("Paul E. McKenney");
-MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation");
-MODULE_LICENSE("GPL");
+device_initcall(rcutiny_trace_init);
static void check_cpu_stall(struct rcu_ctrlblk *rcp)
{
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 9fd5b628a88d..9a535a86e732 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -108,7 +108,6 @@ RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
static struct rcu_state *const rcu_state_p;
-static struct rcu_data __percpu *const rcu_data_p;
LIST_HEAD(rcu_struct_flavors);
/* Dump rcu_node combining tree at boot to verify correct setup. */
@@ -1083,13 +1082,12 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp,
rcu_sysidle_check_cpu(rdp, isidle, maxj);
if ((rdp->dynticks_snap & 0x1) == 0) {
trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
- return 1;
- } else {
if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4,
rdp->mynode->gpnum))
WRITE_ONCE(rdp->gpwrap, true);
- return 0;
+ return 1;
}
+ return 0;
}
/*
@@ -1173,15 +1171,16 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
smp_mb(); /* ->cond_resched_completed before *rcrmp. */
WRITE_ONCE(*rcrmp,
READ_ONCE(*rcrmp) + rdp->rsp->flavor_mask);
- resched_cpu(rdp->cpu); /* Force CPU into scheduler. */
- rdp->rsp->jiffies_resched += 5; /* Enable beating. */
- } else if (ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) {
- /* Time to beat on that CPU again! */
- resched_cpu(rdp->cpu); /* Force CPU into scheduler. */
- rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */
}
+ rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */
}
+ /* And if it has been a really long time, kick the CPU as well. */
+ if (ULONG_CMP_GE(jiffies,
+ rdp->rsp->gp_start + 2 * jiffies_till_sched_qs) ||
+ ULONG_CMP_GE(jiffies, rdp->rsp->gp_start + jiffies_till_sched_qs))
+ resched_cpu(rdp->cpu); /* Force CPU into scheduler. */
+
return 0;
}
@@ -1246,7 +1245,7 @@ static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
if (rnp->qsmask & (1UL << cpu))
dump_cpu_task(rnp->grplo + cpu);
}
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
}
@@ -1266,12 +1265,12 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
raw_spin_lock_irqsave_rcu_node(rnp, flags);
delta = jiffies - READ_ONCE(rsp->jiffies_stall);
if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) {
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return;
}
WRITE_ONCE(rsp->jiffies_stall,
jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
/*
* OK, time to rat on our buddy...
@@ -1292,7 +1291,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
ndetected++;
}
}
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
print_cpu_stall_info_end();
@@ -1357,7 +1356,7 @@ static void print_cpu_stall(struct rcu_state *rsp)
if (ULONG_CMP_GE(jiffies, READ_ONCE(rsp->jiffies_stall)))
WRITE_ONCE(rsp->jiffies_stall,
jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
/*
* Attempt to revive the RCU machinery by forcing a context switch.
@@ -1595,7 +1594,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
}
unlock_out:
if (rnp != rnp_root)
- raw_spin_unlock(&rnp_root->lock);
+ raw_spin_unlock_rcu_node(rnp_root);
out:
if (c_out != NULL)
*c_out = c;
@@ -1814,7 +1813,7 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
return;
}
needwake = __note_gp_changes(rsp, rnp, rdp);
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
if (needwake)
rcu_gp_kthread_wake(rsp);
}
@@ -1839,7 +1838,7 @@ static bool rcu_gp_init(struct rcu_state *rsp)
raw_spin_lock_irq_rcu_node(rnp);
if (!READ_ONCE(rsp->gp_flags)) {
/* Spurious wakeup, tell caller to go back to sleep. */
- raw_spin_unlock_irq(&rnp->lock);
+ raw_spin_unlock_irq_rcu_node(rnp);
return false;
}
WRITE_ONCE(rsp->gp_flags, 0); /* Clear all flags: New grace period. */
@@ -1849,7 +1848,7 @@ static bool rcu_gp_init(struct rcu_state *rsp)
* Grace period already in progress, don't start another.
* Not supposed to be able to happen.
*/
- raw_spin_unlock_irq(&rnp->lock);
+ raw_spin_unlock_irq_rcu_node(rnp);
return false;
}
@@ -1858,7 +1857,7 @@ static bool rcu_gp_init(struct rcu_state *rsp)
/* Record GP times before starting GP, hence smp_store_release(). */
smp_store_release(&rsp->gpnum, rsp->gpnum + 1);
trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start"));
- raw_spin_unlock_irq(&rnp->lock);
+ raw_spin_unlock_irq_rcu_node(rnp);
/*
* Apply per-leaf buffered online and offline operations to the
@@ -1872,7 +1871,7 @@ static bool rcu_gp_init(struct rcu_state *rsp)
if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
!rnp->wait_blkd_tasks) {
/* Nothing to do on this leaf rcu_node structure. */
- raw_spin_unlock_irq(&rnp->lock);
+ raw_spin_unlock_irq_rcu_node(rnp);
continue;
}
@@ -1906,7 +1905,7 @@ static bool rcu_gp_init(struct rcu_state *rsp)
rcu_cleanup_dead_rnp(rnp);
}
- raw_spin_unlock_irq(&rnp->lock);
+ raw_spin_unlock_irq_rcu_node(rnp);
}
/*
@@ -1937,7 +1936,7 @@ static bool rcu_gp_init(struct rcu_state *rsp)
trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
rnp->level, rnp->grplo,
rnp->grphi, rnp->qsmask);
- raw_spin_unlock_irq(&rnp->lock);
+ raw_spin_unlock_irq_rcu_node(rnp);
cond_resched_rcu_qs();
WRITE_ONCE(rsp->gp_activity, jiffies);
}
@@ -1995,7 +1994,7 @@ static void rcu_gp_fqs(struct rcu_state *rsp, bool first_time)
raw_spin_lock_irq_rcu_node(rnp);
WRITE_ONCE(rsp->gp_flags,
READ_ONCE(rsp->gp_flags) & ~RCU_GP_FLAG_FQS);
- raw_spin_unlock_irq(&rnp->lock);
+ raw_spin_unlock_irq_rcu_node(rnp);
}
}
@@ -2025,7 +2024,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
* safe for us to drop the lock in order to mark the grace
* period as completed in all of the rcu_node structures.
*/
- raw_spin_unlock_irq(&rnp->lock);
+ raw_spin_unlock_irq_rcu_node(rnp);
/*
* Propagate new ->completed value to rcu_node structures so
@@ -2047,7 +2046,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
/* smp_mb() provided by prior unlock-lock pair. */
nocb += rcu_future_gp_cleanup(rsp, rnp);
sq = rcu_nocb_gp_get(rnp);
- raw_spin_unlock_irq(&rnp->lock);
+ raw_spin_unlock_irq_rcu_node(rnp);
rcu_nocb_gp_cleanup(sq);
cond_resched_rcu_qs();
WRITE_ONCE(rsp->gp_activity, jiffies);
@@ -2070,7 +2069,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
READ_ONCE(rsp->gpnum),
TPS("newreq"));
}
- raw_spin_unlock_irq(&rnp->lock);
+ raw_spin_unlock_irq_rcu_node(rnp);
}
/*
@@ -2236,18 +2235,20 @@ static bool rcu_start_gp(struct rcu_state *rsp)
}
/*
- * Report a full set of quiescent states to the specified rcu_state
- * data structure. This involves cleaning up after the prior grace
- * period and letting rcu_start_gp() start up the next grace period
- * if one is needed. Note that the caller must hold rnp->lock, which
- * is released before return.
+ * Report a full set of quiescent states to the specified rcu_state data
+ * structure. Invoke rcu_gp_kthread_wake() to awaken the grace-period
+ * kthread if another grace period is required. Whether we wake
+ * the grace-period kthread or it awakens itself for the next round
+ * of quiescent-state forcing, that kthread will clean up after the
+ * just-completed grace period. Note that the caller must hold rnp->lock,
+ * which is released before return.
*/
static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
__releases(rcu_get_root(rsp)->lock)
{
WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
- raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags);
swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */
}
@@ -2277,7 +2278,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
* Our bit has already been cleared, or the
* relevant grace period is already over, so done.
*/
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return;
}
WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */
@@ -2289,7 +2290,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
/* Other bits still set at this level, so done. */
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return;
}
mask = rnp->grpmask;
@@ -2299,7 +2300,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
break;
}
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
rnp_c = rnp;
rnp = rnp->parent;
raw_spin_lock_irqsave_rcu_node(rnp, flags);
@@ -2331,7 +2332,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
if (rcu_state_p == &rcu_sched_state || rsp != rcu_state_p ||
rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return; /* Still need more quiescent states! */
}
@@ -2348,19 +2349,14 @@ static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
/* Report up the rest of the hierarchy, tracking current ->gpnum. */
gps = rnp->gpnum;
mask = rnp->grpmask;
- raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
+ raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
raw_spin_lock_rcu_node(rnp_p); /* irqs already disabled. */
rcu_report_qs_rnp(mask, rsp, rnp_p, gps, flags);
}
/*
* Record a quiescent state for the specified CPU to that CPU's rcu_data
- * structure. This must be either called from the specified CPU, or
- * called when the specified CPU is known to be offline (and when it is
- * also known that no other CPU is concurrently trying to help the offline
- * CPU). The lastcomp argument is used to make sure we are still in the
- * grace period of interest. We don't want to end the current grace period
- * based on quiescent states detected in an earlier grace period!
+ * structure. This must be called from the specified CPU.
*/
static void
rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
@@ -2385,14 +2381,14 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
*/
rdp->cpu_no_qs.b.norm = true; /* need qs for new gp. */
rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return;
}
mask = rdp->grpmask;
if ((rnp->qsmask & mask) == 0) {
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
} else {
- rdp->core_needs_qs = 0;
+ rdp->core_needs_qs = false;
/*
* This GP can't end until cpu checks in, so all of our
@@ -2601,36 +2597,15 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
rnp->qsmaskinit &= ~mask;
rnp->qsmask &= ~mask;
if (rnp->qsmaskinit) {
- raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
+ raw_spin_unlock_rcu_node(rnp);
+ /* irqs remain disabled. */
return;
}
- raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
+ raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
}
}
/*
- * The CPU is exiting the idle loop into the arch_cpu_idle_dead()
- * function. We now remove it from the rcu_node tree's ->qsmaskinit
- * bit masks.
- */
-static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
-{
- unsigned long flags;
- unsigned long mask;
- struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
- struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
-
- if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
- return;
-
- /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
- mask = rdp->grpmask;
- raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
- rnp->qsmaskinitnext &= ~mask;
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
-}
-
-/*
* The CPU has been completely removed, and some other CPU is reporting
* this fact from process context. Do the remainder of the cleanup,
* including orphaning the outgoing CPU's RCU callbacks, and also
@@ -2861,7 +2836,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
rcu_report_qs_rnp(mask, rsp, rnp, rnp->gpnum, flags);
} else {
/* Nothing to do here, so just drop the lock. */
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
}
}
@@ -2897,11 +2872,11 @@ static void force_quiescent_state(struct rcu_state *rsp)
raw_spin_unlock(&rnp_old->fqslock);
if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
rsp->n_force_qs_lh++;
- raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags);
return; /* Someone beat us to it. */
}
WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
- raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags);
swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */
}
@@ -2927,7 +2902,7 @@ __rcu_process_callbacks(struct rcu_state *rsp)
if (cpu_needs_another_gp(rsp, rdp)) {
raw_spin_lock_rcu_node(rcu_get_root(rsp)); /* irqs disabled. */
needwake = rcu_start_gp(rsp);
- raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags);
if (needwake)
rcu_gp_kthread_wake(rsp);
} else {
@@ -3018,7 +2993,7 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
raw_spin_lock_rcu_node(rnp_root);
needwake = rcu_start_gp(rsp);
- raw_spin_unlock(&rnp_root->lock);
+ raw_spin_unlock_rcu_node(rnp_root);
if (needwake)
rcu_gp_kthread_wake(rsp);
} else {
@@ -3438,14 +3413,14 @@ static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
rcu_for_each_leaf_node(rsp, rnp) {
raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rnp->expmaskinit == rnp->expmaskinitnext) {
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
continue; /* No new CPUs, nothing to do. */
}
/* Update this node's mask, track old value for propagation. */
oldmask = rnp->expmaskinit;
rnp->expmaskinit = rnp->expmaskinitnext;
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
/* If was already nonzero, nothing to propagate. */
if (oldmask)
@@ -3460,7 +3435,7 @@ static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
if (rnp_up->expmaskinit)
done = true;
rnp_up->expmaskinit |= mask;
- raw_spin_unlock_irqrestore(&rnp_up->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp_up, flags);
if (done)
break;
mask = rnp_up->grpmask;
@@ -3483,7 +3458,7 @@ static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
raw_spin_lock_irqsave_rcu_node(rnp, flags);
WARN_ON_ONCE(rnp->expmask);
rnp->expmask = rnp->expmaskinit;
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
}
@@ -3524,11 +3499,11 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
if (!rnp->expmask)
rcu_initiate_boost(rnp, flags);
else
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
break;
}
if (rnp->parent == NULL) {
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
if (wake) {
smp_mb(); /* EGP done before wake_up(). */
swake_up(&rsp->expedited_wq);
@@ -3536,7 +3511,7 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
break;
}
mask = rnp->grpmask;
- raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
+ raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled */
rnp = rnp->parent;
raw_spin_lock_rcu_node(rnp); /* irqs already disabled */
WARN_ON_ONCE(!(rnp->expmask & mask));
@@ -3571,7 +3546,7 @@ static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (!(rnp->expmask & mask)) {
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return;
}
rnp->expmask &= ~mask;
@@ -3732,7 +3707,7 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
*/
if (rcu_preempt_has_tasks(rnp))
rnp->exp_tasks = rnp->blkd_tasks.next;
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
/* IPI the remaining CPUs for expedited quiescent state. */
mask = 1;
@@ -3749,7 +3724,7 @@ retry_ipi:
raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (cpu_online(cpu) &&
(rnp->expmask & mask)) {
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
schedule_timeout_uninterruptible(1);
if (cpu_online(cpu) &&
(rnp->expmask & mask))
@@ -3758,7 +3733,7 @@ retry_ipi:
}
if (!(rnp->expmask & mask))
mask_ofl_ipi &= ~mask;
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
/* Report quiescent states for those that went offline. */
mask_ofl_test |= mask_ofl_ipi;
@@ -4165,7 +4140,7 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
return;
raw_spin_lock_rcu_node(rnp); /* Interrupts already disabled. */
rnp->qsmaskinit |= mask;
- raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */
+ raw_spin_unlock_rcu_node(rnp); /* Interrupts remain disabled. */
}
}
@@ -4189,7 +4164,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
rdp->rsp = rsp;
mutex_init(&rdp->exp_funnel_mutex);
rcu_boot_init_nocb_percpu_data(rdp);
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
/*
@@ -4217,7 +4192,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
rcu_sysidle_init_percpu_data(rdp->dynticks);
atomic_set(&rdp->dynticks->dynticks,
(atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
- raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
+ raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
/*
* Add CPU to leaf rcu_node pending-online bitmask. Any needed
@@ -4238,7 +4213,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
rdp->rcu_qs_ctr_snap = per_cpu(rcu_qs_ctr, cpu);
rdp->core_needs_qs = false;
trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
static void rcu_prepare_cpu(int cpu)
@@ -4249,6 +4224,46 @@ static void rcu_prepare_cpu(int cpu)
rcu_init_percpu_data(cpu, rsp);
}
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * The CPU is exiting the idle loop into the arch_cpu_idle_dead()
+ * function. We now remove it from the rcu_node tree's ->qsmaskinit
+ * bit masks.
+ * The CPU is exiting the idle loop into the arch_cpu_idle_dead()
+ * function. We now remove it from the rcu_node tree's ->qsmaskinit
+ * bit masks.
+ */
+static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
+{
+ unsigned long flags;
+ unsigned long mask;
+ struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+ struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
+
+ if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
+ return;
+
+ /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
+ mask = rdp->grpmask;
+ raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
+ rnp->qsmaskinitnext &= ~mask;
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+}
+
+void rcu_report_dead(unsigned int cpu)
+{
+ struct rcu_state *rsp;
+
+ /* QS for any half-done expedited RCU-sched GP. */
+ preempt_disable();
+ rcu_report_exp_rdp(&rcu_sched_state,
+ this_cpu_ptr(rcu_sched_state.rda), true);
+ preempt_enable();
+ for_each_rcu_flavor(rsp)
+ rcu_cleanup_dying_idle_cpu(cpu, rsp);
+}
+#endif
+
/*
* Handle CPU online/offline notification events.
*/
@@ -4280,17 +4295,6 @@ int rcu_cpu_notify(struct notifier_block *self,
for_each_rcu_flavor(rsp)
rcu_cleanup_dying_cpu(rsp);
break;
- case CPU_DYING_IDLE:
- /* QS for any half-done expedited RCU-sched GP. */
- preempt_disable();
- rcu_report_exp_rdp(&rcu_sched_state,
- this_cpu_ptr(rcu_sched_state.rda), true);
- preempt_enable();
-
- for_each_rcu_flavor(rsp) {
- rcu_cleanup_dying_idle_cpu(cpu, rsp);
- }
- break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
case CPU_UP_CANCELED:
@@ -4360,7 +4364,7 @@ static int __init rcu_spawn_gp_kthread(void)
sp.sched_priority = kthread_prio;
sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
}
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
wake_up_process(t);
}
rcu_spawn_nocb_kthreads();
@@ -4451,8 +4455,8 @@ static void __init rcu_init_one(struct rcu_state *rsp)
cpustride *= levelspread[i];
rnp = rsp->level[i];
for (j = 0; j < levelcnt[i]; j++, rnp++) {
- raw_spin_lock_init(&rnp->lock);
- lockdep_set_class_and_name(&rnp->lock,
+ raw_spin_lock_init(&ACCESS_PRIVATE(rnp, lock));
+ lockdep_set_class_and_name(&ACCESS_PRIVATE(rnp, lock),
&rcu_node_class[i], buf[i]);
raw_spin_lock_init(&rnp->fqslock);
lockdep_set_class_and_name(&rnp->fqslock,
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index bbd235d0e71f..df668c0f9e64 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -150,8 +150,9 @@ struct rcu_dynticks {
* Definition for node within the RCU grace-period-detection hierarchy.
*/
struct rcu_node {
- raw_spinlock_t lock; /* Root rcu_node's lock protects some */
- /* rcu_state fields as well as following. */
+ raw_spinlock_t __private lock; /* Root rcu_node's lock protects */
+ /* some rcu_state fields as well as */
+ /* following. */
unsigned long gpnum; /* Current grace period for this node. */
/* This will either be equal to or one */
/* behind the root rcu_node's gpnum. */
@@ -682,7 +683,7 @@ static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
#endif /* #else #ifdef CONFIG_PPC */
/*
- * Wrappers for the rcu_node::lock acquire.
+ * Wrappers for the rcu_node::lock acquire and release.
*
* Because the rcu_nodes form a tree, the tree traversal locking will observe
* different lock values, this in turn means that an UNLOCK of one level
@@ -691,29 +692,48 @@ static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
*
* In order to restore full ordering between tree levels, augment the regular
* lock acquire functions with smp_mb__after_unlock_lock().
+ *
+ * As ->lock of struct rcu_node is a __private field, therefore one should use
+ * these wrappers rather than directly call raw_spin_{lock,unlock}* on ->lock.
*/
static inline void raw_spin_lock_rcu_node(struct rcu_node *rnp)
{
- raw_spin_lock(&rnp->lock);
+ raw_spin_lock(&ACCESS_PRIVATE(rnp, lock));
smp_mb__after_unlock_lock();
}
+static inline void raw_spin_unlock_rcu_node(struct rcu_node *rnp)
+{
+ raw_spin_unlock(&ACCESS_PRIVATE(rnp, lock));
+}
+
static inline void raw_spin_lock_irq_rcu_node(struct rcu_node *rnp)
{
- raw_spin_lock_irq(&rnp->lock);
+ raw_spin_lock_irq(&ACCESS_PRIVATE(rnp, lock));
smp_mb__after_unlock_lock();
}
-#define raw_spin_lock_irqsave_rcu_node(rnp, flags) \
-do { \
- typecheck(unsigned long, flags); \
- raw_spin_lock_irqsave(&(rnp)->lock, flags); \
- smp_mb__after_unlock_lock(); \
+static inline void raw_spin_unlock_irq_rcu_node(struct rcu_node *rnp)
+{
+ raw_spin_unlock_irq(&ACCESS_PRIVATE(rnp, lock));
+}
+
+#define raw_spin_lock_irqsave_rcu_node(rnp, flags) \
+do { \
+ typecheck(unsigned long, flags); \
+ raw_spin_lock_irqsave(&ACCESS_PRIVATE(rnp, lock), flags); \
+ smp_mb__after_unlock_lock(); \
+} while (0)
+
+#define raw_spin_unlock_irqrestore_rcu_node(rnp, flags) \
+do { \
+ typecheck(unsigned long, flags); \
+ raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(rnp, lock), flags); \
} while (0)
static inline bool raw_spin_trylock_rcu_node(struct rcu_node *rnp)
{
- bool locked = raw_spin_trylock(&rnp->lock);
+ bool locked = raw_spin_trylock(&ACCESS_PRIVATE(rnp, lock));
if (locked)
smp_mb__after_unlock_lock();
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 080bd202d360..efdf7b61ce12 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -235,7 +235,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
rnp->gp_tasks = &t->rcu_node_entry;
if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD))
rnp->exp_tasks = &t->rcu_node_entry;
- raw_spin_unlock(&rnp->lock); /* rrupts remain disabled. */
+ raw_spin_unlock_rcu_node(rnp); /* interrupts remain disabled. */
/*
* Report the quiescent state for the expedited GP. This expedited
@@ -489,7 +489,7 @@ void rcu_read_unlock_special(struct task_struct *t)
!!rnp->gp_tasks);
rcu_report_unblock_qs_rnp(rcu_state_p, rnp, flags);
} else {
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
/* Unboost if we were boosted. */
@@ -518,14 +518,14 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (!rcu_preempt_blocked_readers_cgp(rnp)) {
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return;
}
t = list_entry(rnp->gp_tasks->prev,
struct task_struct, rcu_node_entry);
list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
sched_show_task(t);
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
/*
@@ -807,7 +807,6 @@ void exit_rcu(void)
#else /* #ifdef CONFIG_PREEMPT_RCU */
static struct rcu_state *const rcu_state_p = &rcu_sched_state;
-static struct rcu_data __percpu *const rcu_data_p = &rcu_sched_data;
/*
* Tell them what RCU they are running.
@@ -991,7 +990,7 @@ static int rcu_boost(struct rcu_node *rnp)
* might exit their RCU read-side critical sections on their own.
*/
if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return 0;
}
@@ -1028,7 +1027,7 @@ static int rcu_boost(struct rcu_node *rnp)
*/
t = container_of(tb, struct task_struct, rcu_node_entry);
rt_mutex_init_proxy_locked(&rnp->boost_mtx, t);
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
/* Lock only for side effect: boosts task t's priority. */
rt_mutex_lock(&rnp->boost_mtx);
rt_mutex_unlock(&rnp->boost_mtx); /* Then keep lockdep happy. */
@@ -1088,7 +1087,7 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
rnp->n_balk_exp_gp_tasks++;
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return;
}
if (rnp->exp_tasks != NULL ||
@@ -1098,13 +1097,13 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
ULONG_CMP_GE(jiffies, rnp->boost_time))) {
if (rnp->exp_tasks == NULL)
rnp->boost_tasks = rnp->gp_tasks;
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
t = rnp->boost_kthread_task;
if (t)
rcu_wake_cond(t, rnp->boost_kthread_status);
} else {
rcu_initiate_boost_trace(rnp);
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
}
@@ -1172,7 +1171,7 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
return PTR_ERR(t);
raw_spin_lock_irqsave_rcu_node(rnp, flags);
rnp->boost_kthread_task = t;
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
sp.sched_priority = kthread_prio;
sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
@@ -1308,7 +1307,7 @@ static void rcu_prepare_kthreads(int cpu)
static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
__releases(rnp->lock)
{
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
static void invoke_rcu_callbacks_kthread(void)
@@ -1559,7 +1558,7 @@ static void rcu_prepare_for_idle(void)
rnp = rdp->mynode;
raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
- raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
+ raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
if (needwake)
rcu_gp_kthread_wake(rsp);
}
@@ -2064,7 +2063,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
raw_spin_lock_irqsave_rcu_node(rnp, flags);
needwake = rcu_start_future_gp(rnp, rdp, &c);
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
if (needwake)
rcu_gp_kthread_wake(rdp->rsp);
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 76b94e19430b..ca828b41c938 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -128,6 +128,7 @@ bool rcu_gp_is_normal(void)
{
return READ_ONCE(rcu_normal);
}
+EXPORT_SYMBOL_GPL(rcu_gp_is_normal);
static atomic_t rcu_expedited_nesting =
ATOMIC_INIT(IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT) ? 1 : 0);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e5725b931bee..ea8f49ae0062 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5434,16 +5434,6 @@ static int sched_cpu_active(struct notifier_block *nfb,
set_cpu_rq_start_time();
return NOTIFY_OK;
- case CPU_ONLINE:
- /*
- * At this point a starting CPU has marked itself as online via
- * set_cpu_online(). But it might not yet have marked itself
- * as active, which is essential from here on.
- */
- set_cpu_active(cpu, true);
- stop_machine_unpark(cpu);
- return NOTIFY_OK;
-
case CPU_DOWN_FAILED:
set_cpu_active(cpu, true);
return NOTIFY_OK;
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 544a7133cbd1..bd12c6c714ec 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -4,6 +4,7 @@
#include <linux/sched.h>
#include <linux/cpu.h>
#include <linux/cpuidle.h>
+#include <linux/cpuhotplug.h>
#include <linux/tick.h>
#include <linux/mm.h>
#include <linux/stackprotector.h>
@@ -193,8 +194,6 @@ exit_idle:
rcu_idle_exit();
}
-DEFINE_PER_CPU(bool, cpu_dead_idle);
-
/*
* Generic idle loop implementation
*
@@ -221,10 +220,7 @@ static void cpu_idle_loop(void)
rmb();
if (cpu_is_offline(smp_processor_id())) {
- rcu_cpu_notify(NULL, CPU_DYING_IDLE,
- (void *)(long)smp_processor_id());
- smp_mb(); /* all activity before dead. */
- this_cpu_write(cpu_dead_idle, true);
+ cpuhp_report_idle_dead();
arch_cpu_idle_dead();
}
@@ -291,5 +287,6 @@ void cpu_startup_entry(enum cpuhp_state state)
boot_init_stack_canary();
#endif
arch_cpu_idle_prepare();
+ cpuhp_online_idle(state);
cpu_idle_loop();
}
diff --git a/kernel/smp.c b/kernel/smp.c
index 300d29391e07..74165443c240 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -568,6 +568,7 @@ void __init smp_init(void)
unsigned int cpu;
idle_threads_init();
+ cpuhp_threads_init();
/* FIXME: This should be done in userspace --RR */
for_each_present_cpu(cpu) {
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index d264f59bff56..13bc43d1fb22 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -226,7 +226,7 @@ static void smpboot_unpark_thread(struct smp_hotplug_thread *ht, unsigned int cp
kthread_unpark(tsk);
}
-void smpboot_unpark_threads(unsigned int cpu)
+int smpboot_unpark_threads(unsigned int cpu)
{
struct smp_hotplug_thread *cur;
@@ -235,6 +235,7 @@ void smpboot_unpark_threads(unsigned int cpu)
if (cpumask_test_cpu(cpu, cur->cpumask))
smpboot_unpark_thread(cur, cpu);
mutex_unlock(&smpboot_threads_lock);
+ return 0;
}
static void smpboot_park_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
@@ -245,7 +246,7 @@ static void smpboot_park_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
kthread_park(tsk);
}
-void smpboot_park_threads(unsigned int cpu)
+int smpboot_park_threads(unsigned int cpu)
{
struct smp_hotplug_thread *cur;
@@ -253,6 +254,7 @@ void smpboot_park_threads(unsigned int cpu)
list_for_each_entry_reverse(cur, &hotplug_threads, list)
smpboot_park_thread(cur, cpu);
mutex_unlock(&smpboot_threads_lock);
+ return 0;
}
static void smpboot_destroy_threads(struct smp_hotplug_thread *ht)
diff --git a/kernel/smpboot.h b/kernel/smpboot.h
index 72415a0eb955..485b81cfab34 100644
--- a/kernel/smpboot.h
+++ b/kernel/smpboot.h
@@ -14,7 +14,9 @@ static inline void idle_threads_init(void) { }
#endif
int smpboot_create_threads(unsigned int cpu);
-void smpboot_park_threads(unsigned int cpu);
-void smpboot_unpark_threads(unsigned int cpu);
+int smpboot_park_threads(unsigned int cpu);
+int smpboot_unpark_threads(unsigned int cpu);
+
+void __init cpuhp_threads_init(void);
#endif
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 664de539299b..56ece145a814 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -323,13 +323,42 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs)
/* cs is a watchdog. */
if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
+ }
+ spin_unlock_irqrestore(&watchdog_lock, flags);
+}
+
+static void clocksource_select_watchdog(bool fallback)
+{
+ struct clocksource *cs, *old_wd;
+ unsigned long flags;
+
+ spin_lock_irqsave(&watchdog_lock, flags);
+ /* save current watchdog */
+ old_wd = watchdog;
+ if (fallback)
+ watchdog = NULL;
+
+ list_for_each_entry(cs, &clocksource_list, list) {
+ /* cs is a clocksource to be watched. */
+ if (cs->flags & CLOCK_SOURCE_MUST_VERIFY)
+ continue;
+
+ /* Skip current if we were requested for a fallback. */
+ if (fallback && cs == old_wd)
+ continue;
+
/* Pick the best watchdog. */
- if (!watchdog || cs->rating > watchdog->rating) {
+ if (!watchdog || cs->rating > watchdog->rating)
watchdog = cs;
- /* Reset watchdog cycles */
- clocksource_reset_watchdog();
- }
}
+ /* If we failed to find a fallback restore the old one. */
+ if (!watchdog)
+ watchdog = old_wd;
+
+ /* If we changed the watchdog we need to reset cycles. */
+ if (watchdog != old_wd)
+ clocksource_reset_watchdog();
+
/* Check if the watchdog timer needs to be started. */
clocksource_start_watchdog();
spin_unlock_irqrestore(&watchdog_lock, flags);
@@ -404,6 +433,7 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs)
cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
}
+static void clocksource_select_watchdog(bool fallback) { }
static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
static inline void clocksource_resume_watchdog(void) { }
static inline int __clocksource_watchdog_kthread(void) { return 0; }
@@ -736,6 +766,7 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
clocksource_enqueue(cs);
clocksource_enqueue_watchdog(cs);
clocksource_select();
+ clocksource_select_watchdog(false);
mutex_unlock(&clocksource_mutex);
return 0;
}
@@ -758,6 +789,7 @@ void clocksource_change_rating(struct clocksource *cs, int rating)
mutex_lock(&clocksource_mutex);
__clocksource_change_rating(cs, rating);
clocksource_select();
+ clocksource_select_watchdog(false);
mutex_unlock(&clocksource_mutex);
}
EXPORT_SYMBOL(clocksource_change_rating);
@@ -767,12 +799,12 @@ EXPORT_SYMBOL(clocksource_change_rating);
*/
static int clocksource_unbind(struct clocksource *cs)
{
- /*
- * I really can't convince myself to support this on hardware
- * designed by lobotomized monkeys.
- */
- if (clocksource_is_watchdog(cs))
- return -EBUSY;
+ if (clocksource_is_watchdog(cs)) {
+ /* Select and try to install a replacement watchdog. */
+ clocksource_select_watchdog(true);
+ if (clocksource_is_watchdog(cs))
+ return -EBUSY;
+ }
if (cs == curr_clocksource) {
/* Select and try to install a replacement clock source */
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 347fecf86a3f..555e21f7b966 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -68,7 +68,7 @@ static struct clocksource clocksource_jiffies = {
.name = "jiffies",
.rating = 1, /* lowest valid rating*/
.read = jiffies_read,
- .mask = 0xffffffff, /*32bits*/
+ .mask = CLOCKSOURCE_MASK(32),
.mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
.shift = JIFFIES_SHIFT,
.max_cycles = 10,
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 34b4cedfa80d..9c629bbed572 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -233,6 +233,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
u64 tmp, ntpinterval;
struct clocksource *old_clock;
+ ++tk->cs_was_changed_seq;
old_clock = tk->tkr_mono.clock;
tk->tkr_mono.clock = clock;
tk->tkr_mono.read = clock->read;
@@ -298,17 +299,34 @@ u32 (*arch_gettimeoffset)(void) = default_arch_gettimeoffset;
static inline u32 arch_gettimeoffset(void) { return 0; }
#endif
+static inline s64 timekeeping_delta_to_ns(struct tk_read_base *tkr,
+ cycle_t delta)
+{
+ s64 nsec;
+
+ nsec = delta * tkr->mult + tkr->xtime_nsec;
+ nsec >>= tkr->shift;
+
+ /* If arch requires, add in get_arch_timeoffset() */
+ return nsec + arch_gettimeoffset();
+}
+
static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
{
cycle_t delta;
- s64 nsec;
delta = timekeeping_get_delta(tkr);
+ return timekeeping_delta_to_ns(tkr, delta);
+}
- nsec = (delta * tkr->mult + tkr->xtime_nsec) >> tkr->shift;
+static inline s64 timekeeping_cycles_to_ns(struct tk_read_base *tkr,
+ cycle_t cycles)
+{
+ cycle_t delta;
- /* If arch requires, add in get_arch_timeoffset() */
- return nsec + arch_gettimeoffset();
+ /* calculate the delta since the last update_wall_time */
+ delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);
+ return timekeeping_delta_to_ns(tkr, delta);
}
/**
@@ -857,44 +875,262 @@ time64_t __ktime_get_real_seconds(void)
return tk->xtime_sec;
}
+/**
+ * ktime_get_snapshot - snapshots the realtime/monotonic raw clocks with counter
+ * @systime_snapshot: pointer to struct receiving the system time snapshot
+ */
+void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
+{
+ struct timekeeper *tk = &tk_core.timekeeper;
+ unsigned long seq;
+ ktime_t base_raw;
+ ktime_t base_real;
+ s64 nsec_raw;
+ s64 nsec_real;
+ cycle_t now;
-#ifdef CONFIG_NTP_PPS
+ WARN_ON_ONCE(timekeeping_suspended);
+
+ do {
+ seq = read_seqcount_begin(&tk_core.seq);
+
+ now = tk->tkr_mono.read(tk->tkr_mono.clock);
+ systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
+ systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
+ base_real = ktime_add(tk->tkr_mono.base,
+ tk_core.timekeeper.offs_real);
+ base_raw = tk->tkr_raw.base;
+ nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, now);
+ nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, now);
+ } while (read_seqcount_retry(&tk_core.seq, seq));
+
+ systime_snapshot->cycles = now;
+ systime_snapshot->real = ktime_add_ns(base_real, nsec_real);
+ systime_snapshot->raw = ktime_add_ns(base_raw, nsec_raw);
+}
+EXPORT_SYMBOL_GPL(ktime_get_snapshot);
+
+/* Scale base by mult/div checking for overflow */
+static int scale64_check_overflow(u64 mult, u64 div, u64 *base)
+{
+ u64 tmp, rem;
+
+ tmp = div64_u64_rem(*base, div, &rem);
+
+ if (((int)sizeof(u64)*8 - fls64(mult) < fls64(tmp)) ||
+ ((int)sizeof(u64)*8 - fls64(mult) < fls64(rem)))
+ return -EOVERFLOW;
+ tmp *= mult;
+ rem *= mult;
+
+ do_div(rem, div);
+ *base = tmp + rem;
+ return 0;
+}
/**
- * ktime_get_raw_and_real_ts64 - get day and raw monotonic time in timespec format
- * @ts_raw: pointer to the timespec to be set to raw monotonic time
- * @ts_real: pointer to the timespec to be set to the time of day
+ * adjust_historical_crosststamp - adjust crosstimestamp previous to current interval
+ * @history: Snapshot representing start of history
+ * @partial_history_cycles: Cycle offset into history (fractional part)
+ * @total_history_cycles: Total history length in cycles
+ * @discontinuity: True indicates clock was set on history period
+ * @ts: Cross timestamp that should be adjusted using
+ * partial/total ratio
*
- * This function reads both the time of day and raw monotonic time at the
- * same time atomically and stores the resulting timestamps in timespec
- * format.
+ * Helper function used by get_device_system_crosststamp() to correct the
+ * crosstimestamp corresponding to the start of the current interval to the
+ * system counter value (timestamp point) provided by the driver. The
+ * total_history_* quantities are the total history starting at the provided
+ * reference point and ending at the start of the current interval. The cycle
+ * count between the driver timestamp point and the start of the current
+ * interval is partial_history_cycles.
*/
-void ktime_get_raw_and_real_ts64(struct timespec64 *ts_raw, struct timespec64 *ts_real)
+static int adjust_historical_crosststamp(struct system_time_snapshot *history,
+ cycle_t partial_history_cycles,
+ cycle_t total_history_cycles,
+ bool discontinuity,
+ struct system_device_crosststamp *ts)
{
struct timekeeper *tk = &tk_core.timekeeper;
- unsigned long seq;
- s64 nsecs_raw, nsecs_real;
+ u64 corr_raw, corr_real;
+ bool interp_forward;
+ int ret;
- WARN_ON_ONCE(timekeeping_suspended);
+ if (total_history_cycles == 0 || partial_history_cycles == 0)
+ return 0;
+
+ /* Interpolate shortest distance from beginning or end of history */
+ interp_forward = partial_history_cycles > total_history_cycles/2 ?
+ true : false;
+ partial_history_cycles = interp_forward ?
+ total_history_cycles - partial_history_cycles :
+ partial_history_cycles;
+
+ /*
+ * Scale the monotonic raw time delta by:
+ * partial_history_cycles / total_history_cycles
+ */
+ corr_raw = (u64)ktime_to_ns(
+ ktime_sub(ts->sys_monoraw, history->raw));
+ ret = scale64_check_overflow(partial_history_cycles,
+ total_history_cycles, &corr_raw);
+ if (ret)
+ return ret;
+
+ /*
+ * If there is a discontinuity in the history, scale monotonic raw
+ * correction by:
+ * mult(real)/mult(raw) yielding the realtime correction
+ * Otherwise, calculate the realtime correction similar to monotonic
+ * raw calculation
+ */
+ if (discontinuity) {
+ corr_real = mul_u64_u32_div
+ (corr_raw, tk->tkr_mono.mult, tk->tkr_raw.mult);
+ } else {
+ corr_real = (u64)ktime_to_ns(
+ ktime_sub(ts->sys_realtime, history->real));
+ ret = scale64_check_overflow(partial_history_cycles,
+ total_history_cycles, &corr_real);
+ if (ret)
+ return ret;
+ }
+
+ /* Fixup monotonic raw and real time time values */
+ if (interp_forward) {
+ ts->sys_monoraw = ktime_add_ns(history->raw, corr_raw);
+ ts->sys_realtime = ktime_add_ns(history->real, corr_real);
+ } else {
+ ts->sys_monoraw = ktime_sub_ns(ts->sys_monoraw, corr_raw);
+ ts->sys_realtime = ktime_sub_ns(ts->sys_realtime, corr_real);
+ }
+
+ return 0;
+}
+
+/*
+ * cycle_between - true if test occurs chronologically between before and after
+ */
+static bool cycle_between(cycle_t before, cycle_t test, cycle_t after)
+{
+ if (test > before && test < after)
+ return true;
+ if (test < before && before > after)
+ return true;
+ return false;
+}
+
+/**
+ * get_device_system_crosststamp - Synchronously capture system/device timestamp
+ * @get_time_fn: Callback to get simultaneous device time and
+ * system counter from the device driver
+ * @ctx: Context passed to get_time_fn()
+ * @history_begin: Historical reference point used to interpolate system
+ * time when counter provided by the driver is before the current interval
+ * @xtstamp: Receives simultaneously captured system and device time
+ *
+ * Reads a timestamp from a device and correlates it to system time
+ */
+int get_device_system_crosststamp(int (*get_time_fn)
+ (ktime_t *device_time,
+ struct system_counterval_t *sys_counterval,
+ void *ctx),
+ void *ctx,
+ struct system_time_snapshot *history_begin,
+ struct system_device_crosststamp *xtstamp)
+{
+ struct system_counterval_t system_counterval;
+ struct timekeeper *tk = &tk_core.timekeeper;
+ cycle_t cycles, now, interval_start;
+ unsigned int clock_was_set_seq = 0;
+ ktime_t base_real, base_raw;
+ s64 nsec_real, nsec_raw;
+ u8 cs_was_changed_seq;
+ unsigned long seq;
+ bool do_interp;
+ int ret;
do {
seq = read_seqcount_begin(&tk_core.seq);
+ /*
+ * Try to synchronously capture device time and a system
+ * counter value calling back into the device driver
+ */
+ ret = get_time_fn(&xtstamp->device, &system_counterval, ctx);
+ if (ret)
+ return ret;
+
+ /*
+ * Verify that the clocksource associated with the captured
+ * system counter value is the same as the currently installed
+ * timekeeper clocksource
+ */
+ if (tk->tkr_mono.clock != system_counterval.cs)
+ return -ENODEV;
+ cycles = system_counterval.cycles;
- *ts_raw = tk->raw_time;
- ts_real->tv_sec = tk->xtime_sec;
- ts_real->tv_nsec = 0;
+ /*
+ * Check whether the system counter value provided by the
+ * device driver is on the current timekeeping interval.
+ */
+ now = tk->tkr_mono.read(tk->tkr_mono.clock);
+ interval_start = tk->tkr_mono.cycle_last;
+ if (!cycle_between(interval_start, cycles, now)) {
+ clock_was_set_seq = tk->clock_was_set_seq;
+ cs_was_changed_seq = tk->cs_was_changed_seq;
+ cycles = interval_start;
+ do_interp = true;
+ } else {
+ do_interp = false;
+ }
- nsecs_raw = timekeeping_get_ns(&tk->tkr_raw);
- nsecs_real = timekeeping_get_ns(&tk->tkr_mono);
+ base_real = ktime_add(tk->tkr_mono.base,
+ tk_core.timekeeper.offs_real);
+ base_raw = tk->tkr_raw.base;
+ nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono,
+ system_counterval.cycles);
+ nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw,
+ system_counterval.cycles);
} while (read_seqcount_retry(&tk_core.seq, seq));
- timespec64_add_ns(ts_raw, nsecs_raw);
- timespec64_add_ns(ts_real, nsecs_real);
-}
-EXPORT_SYMBOL(ktime_get_raw_and_real_ts64);
+ xtstamp->sys_realtime = ktime_add_ns(base_real, nsec_real);
+ xtstamp->sys_monoraw = ktime_add_ns(base_raw, nsec_raw);
-#endif /* CONFIG_NTP_PPS */
+ /*
+ * Interpolate if necessary, adjusting back from the start of the
+ * current interval
+ */
+ if (do_interp) {
+ cycle_t partial_history_cycles, total_history_cycles;
+ bool discontinuity;
+
+ /*
+ * Check that the counter value occurs after the provided
+ * history reference and that the history doesn't cross a
+ * clocksource change
+ */
+ if (!history_begin ||
+ !cycle_between(history_begin->cycles,
+ system_counterval.cycles, cycles) ||
+ history_begin->cs_was_changed_seq != cs_was_changed_seq)
+ return -EINVAL;
+ partial_history_cycles = cycles - system_counterval.cycles;
+ total_history_cycles = cycles - history_begin->cycles;
+ discontinuity =
+ history_begin->clock_was_set_seq != clock_was_set_seq;
+
+ ret = adjust_historical_crosststamp(history_begin,
+ partial_history_cycles,
+ total_history_cycles,
+ discontinuity, xtstamp);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(get_device_system_crosststamp);
/**
* do_gettimeofday - Returns the time of day in a timeval