From 0453b435df0d69dd0d8c42eb9b3015aaf0d8a032 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 27 Mar 2013 02:18:34 +0100 Subject: nohz: Force boot CPU outside full dynticks range The timekeeping job must be able to run early on boot because there may be some pre-SMP (and thus pre-initcalls ) components that rely on it. The IO-APIC is one such users as it tests the timer health by watching jiffies progression. Given that it happens before we know the initial online set, we can't rely on it to select a timekeeper. We need one before SMP time otherwise we simply crash on boot. To fix this and keep things simple for now, force the boot CPU outside of the full dynticks range in any case and do this early on kernel parameter parsing time. We might want a trickier solution later, expecially for aSMP architectures that need to assign housekeeping tasks to arbitrary low power CPUs. But it's still first pass KISS time for now. Reviewed-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- Documentation/kernel-parameters.txt | 4 +-- kernel/time/tick-sched.c | 54 +++++++++++-------------------------- 2 files changed, 17 insertions(+), 41 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 82365dde00a8..887b29708447 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1916,8 +1916,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nohz_full= [KNL,BOOT] In kernels built with CONFIG_NO_HZ_FULL=y, set the specified list of CPUs whose tick will be stopped - whenever possible. You need to keep at least one online - CPU outside the range to maintain the timekeeping. + whenever possible. The boot CPU will be forced outside + the range to maintain the timekeeping. noiotrap [SH] Disables trapped I/O port accesses. diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 369b5769fc97..2bac5ea2c9af 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -158,11 +158,21 @@ int tick_nohz_full_cpu(int cpu) /* Parse the boot-time nohz CPU list from the kernel parameters. */ static int __init tick_nohz_full_setup(char *str) { + int cpu; + alloc_bootmem_cpumask_var(&nohz_full_mask); - if (cpulist_parse(str, nohz_full_mask) < 0) + if (cpulist_parse(str, nohz_full_mask) < 0) { pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); - else - have_nohz_full_mask = true; + return 1; + } + + cpu = smp_processor_id(); + if (cpumask_test_cpu(cpu, nohz_full_mask)) { + pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu); + cpumask_clear_cpu(cpu, nohz_full_mask); + } + have_nohz_full_mask = true; + return 1; } __setup("nohz_full=", tick_nohz_full_setup); @@ -195,42 +205,8 @@ static char __initdata nohz_full_buf[NR_CPUS + 1]; static int __init init_tick_nohz_full(void) { - cpumask_var_t online_nohz; - int cpu; - - if (!have_nohz_full_mask) - return 0; - - cpu_notifier(tick_nohz_cpu_down_callback, 0); - - if (!zalloc_cpumask_var(&online_nohz, GFP_KERNEL)) { - pr_warning("NO_HZ: Not enough memory to check full nohz mask\n"); - return -ENOMEM; - } - - /* - * CPUs can probably not be concurrently offlined on initcall time. - * But we are paranoid, aren't we? - */ - get_online_cpus(); - - /* Ensure we keep a CPU outside the dynticks range for timekeeping */ - cpumask_and(online_nohz, cpu_online_mask, nohz_full_mask); - if (cpumask_equal(online_nohz, cpu_online_mask)) { - pr_warning("NO_HZ: Must keep at least one online CPU " - "out of nohz_full range\n"); - /* - * We know the current CPU doesn't have its tick stopped. - * Let's use it for the timekeeping duty. - */ - preempt_disable(); - cpu = smp_processor_id(); - pr_warning("NO_HZ: Clearing %d from nohz_full range\n", cpu); - cpumask_clear_cpu(cpu, nohz_full_mask); - preempt_enable(); - } - put_online_cpus(); - free_cpumask_var(online_nohz); + if (have_nohz_full_mask) + cpu_notifier(tick_nohz_cpu_down_callback, 0); cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); -- cgit v1.2.3-70-g09d2 From d1e43fa5f8bb25f83a86a29f11fcfb57ed4d7566 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 26 Mar 2013 23:47:24 +0100 Subject: nohz: Ensure full dynticks CPUs are RCU nocbs We need full dynticks CPU to also be RCU nocb so that we don't have to keep the tick to handle RCU callbacks. Make sure the range passed to nohz_full= boot parameter is a subset of rcu_nocbs= The CPUs that fail to meet this requirement will be excluded from the nohz_full range. This is checked early in boot time, before any CPU has the opportunity to stop its tick. Suggested-by: Steven Rostedt Reviewed-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- Documentation/kernel-parameters.txt | 2 ++ include/linux/rcupdate.h | 7 +++++++ include/linux/tick.h | 2 ++ init/main.c | 1 + kernel/rcutree.c | 6 +++--- kernel/rcutree.h | 1 - kernel/rcutree_plugin.h | 13 ++++--------- kernel/time/tick-sched.c | 22 ++++++++++++++++------ 8 files changed, 35 insertions(+), 19 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 887b29708447..4865e9bfd08d 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1918,6 +1918,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. the specified list of CPUs whose tick will be stopped whenever possible. The boot CPU will be forced outside the range to maintain the timekeeping. + The CPUs in this range must also be included in the + rcu_nocbs= set. noiotrap [SH] Disables trapped I/O port accesses. diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index b758ce17b309..8e0948c872fc 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -999,4 +999,11 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define kfree_rcu(ptr, rcu_head) \ __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) +#ifdef CONFIG_RCU_NOCB_CPU +extern bool rcu_is_nocb_cpu(int cpu); +#else +static inline bool rcu_is_nocb_cpu(int cpu) { return false; } +#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ + + #endif /* __LINUX_RCUPDATE_H */ diff --git a/include/linux/tick.h b/include/linux/tick.h index b4e3b0c9639e..0b6873cbf512 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -158,8 +158,10 @@ static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } # endif /* !CONFIG_NO_HZ_COMMON */ #ifdef CONFIG_NO_HZ_FULL +extern void tick_nohz_init(void); extern int tick_nohz_full_cpu(int cpu); #else +static inline void tick_nohz_init(void) { } static inline int tick_nohz_full_cpu(int cpu) { return 0; } #endif diff --git a/init/main.c b/init/main.c index 63534a141b4e..2acb5bbde99b 100644 --- a/init/main.c +++ b/init/main.c @@ -547,6 +547,7 @@ asmlinkage void __init start_kernel(void) idr_init_cache(); perf_event_init(); rcu_init(); + tick_nohz_init(); radix_tree_init(); /* init some links before init_ISA_irqs() */ early_irq_init(); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index f5ab50235cba..1d4ceff793a4 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1695,7 +1695,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) { /* No-CBs CPUs do not have orphanable callbacks. */ - if (is_nocb_cpu(rdp->cpu)) + if (rcu_is_nocb_cpu(rdp->cpu)) return; /* @@ -2757,10 +2757,10 @@ static void _rcu_barrier(struct rcu_state *rsp) * corresponding CPU's preceding callbacks have been invoked. */ for_each_possible_cpu(cpu) { - if (!cpu_online(cpu) && !is_nocb_cpu(cpu)) + if (!cpu_online(cpu) && !rcu_is_nocb_cpu(cpu)) continue; rdp = per_cpu_ptr(rsp->rda, cpu); - if (is_nocb_cpu(cpu)) { + if (rcu_is_nocb_cpu(cpu)) { _rcu_barrier_trace(rsp, "OnlineNoCB", cpu, rsp->n_barrier_done); atomic_inc(&rsp->barrier_cpu_count); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index f993c0ac47db..38acc49da2c6 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -529,7 +529,6 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu); static void print_cpu_stall_info_end(void); static void zero_cpu_stall_ticks(struct rcu_data *rdp); static void increment_cpu_stall_ticks(void); -static bool is_nocb_cpu(int cpu); static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, bool lazy); static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index a5745e9b5d5a..0cd91cc18db4 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -2167,7 +2167,7 @@ static int __init parse_rcu_nocb_poll(char *arg) early_param("rcu_nocb_poll", parse_rcu_nocb_poll); /* Is the specified CPU a no-CPUs CPU? */ -static bool is_nocb_cpu(int cpu) +bool rcu_is_nocb_cpu(int cpu) { if (have_rcu_nocb_mask) return cpumask_test_cpu(cpu, rcu_nocb_mask); @@ -2225,7 +2225,7 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, bool lazy) { - if (!is_nocb_cpu(rdp->cpu)) + if (!rcu_is_nocb_cpu(rdp->cpu)) return 0; __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy); return 1; @@ -2242,7 +2242,7 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, long qll = rsp->qlen_lazy; /* If this is not a no-CBs CPU, tell the caller to do it the old way. */ - if (!is_nocb_cpu(smp_processor_id())) + if (!rcu_is_nocb_cpu(smp_processor_id())) return 0; rsp->qlen = 0; rsp->qlen_lazy = 0; @@ -2282,7 +2282,7 @@ static bool nocb_cpu_expendable(int cpu) * If there are no no-CB CPUs or if this CPU is not a no-CB CPU, * then offlining this CPU is harmless. Let it happen. */ - if (!have_rcu_nocb_mask || is_nocb_cpu(cpu)) + if (!have_rcu_nocb_mask || rcu_is_nocb_cpu(cpu)) return 1; /* If no memory, play it safe and keep the CPU around. */ @@ -2464,11 +2464,6 @@ static void __init rcu_init_nocb(void) #else /* #ifdef CONFIG_RCU_NOCB_CPU */ -static bool is_nocb_cpu(int cpu) -{ - return false; -} - static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, bool lazy) { diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 2bac5ea2c9af..d71a5f2bd7b2 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -203,17 +203,27 @@ static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, */ static char __initdata nohz_full_buf[NR_CPUS + 1]; -static int __init init_tick_nohz_full(void) +void __init tick_nohz_init(void) { - if (have_nohz_full_mask) - cpu_notifier(tick_nohz_cpu_down_callback, 0); + int cpu; + + if (!have_nohz_full_mask) + return; + + cpu_notifier(tick_nohz_cpu_down_callback, 0); + + /* Make sure full dynticks CPU are also RCU nocbs */ + for_each_cpu(cpu, nohz_full_mask) { + if (!rcu_is_nocb_cpu(cpu)) { + pr_warning("NO_HZ: CPU %d is not RCU nocb: " + "cleared from nohz_full range", cpu); + cpumask_clear_cpu(cpu, nohz_full_mask); + } + } cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); - - return 0; } -core_initcall(init_tick_nohz_full); #else #define have_nohz_full_mask (0) #endif -- cgit v1.2.3-70-g09d2 From f98823ac758ba1aa77c6e3f8ad4ef3ad84ee0a7c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 27 Mar 2013 01:17:22 +0100 Subject: nohz: New option to default all CPUs in full dynticks range Provide a new kernel config that defaults all CPUs to be part of the full dynticks range, except the boot one for timekeeping. This default setting is overriden by the nohz_full= boot option if passed by the user. This is helpful for those who don't need a finegrained range of full dynticks CPU and also for automated testing. Suggested-by: Ingo Molnar Reviewed-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/Kconfig | 10 ++++++++++ kernel/time/tick-sched.c | 23 +++++++++++++++++++++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 358d601a4fec..99c3f13dd478 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -128,6 +128,16 @@ config NO_HZ_FULL endchoice +config NO_HZ_FULL_ALL + bool "Full dynticks system on all CPUs by default" + depends on NO_HZ_FULL + help + If the user doesn't pass the nohz_full boot option to + define the range of full dynticks CPUs, consider that all + CPUs in the system are full dynticks by default. + Note the boot CPU will still be kept outside the range to + handle the timekeeping duty. + config NO_HZ bool "Old Idle dynticks config" depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d71a5f2bd7b2..a76e09044f9f 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -203,12 +203,31 @@ static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, */ static char __initdata nohz_full_buf[NR_CPUS + 1]; +static int tick_nohz_init_all(void) +{ + int err = -1; + +#ifdef CONFIG_NO_HZ_FULL_ALL + if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) { + pr_err("NO_HZ: Can't allocate full dynticks cpumask\n"); + return err; + } + err = 0; + cpumask_setall(nohz_full_mask); + cpumask_clear_cpu(smp_processor_id(), nohz_full_mask); + have_nohz_full_mask = true; +#endif + return err; +} + void __init tick_nohz_init(void) { int cpu; - if (!have_nohz_full_mask) - return; + if (!have_nohz_full_mask) { + if (tick_nohz_init_all() < 0) + return; + } cpu_notifier(tick_nohz_cpu_down_callback, 0); -- cgit v1.2.3-70-g09d2