From 90b14889d2f9b29d7e5b4b2d36251c13ce3dd13f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 3 Nov 2016 15:49:58 +0100 Subject: kernel/printk: Convert to hotplug state machine Install the callbacks via the state machine. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Cc: Andrew Morton Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20161103145021.28528-3-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- kernel/printk/printk.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index de08fc90baaf..4487ffcd42d5 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2185,27 +2185,18 @@ void resume_console(void) /** * console_cpu_notify - print deferred console messages after CPU hotplug - * @self: notifier struct - * @action: CPU hotplug event - * @hcpu: unused + * @cpu: unused * * If printk() is called from a CPU that is not online yet, the messages * will be spooled but will not show up on the console. This function is * called when a new CPU comes online (or fails to come up), and ensures * that any such output gets printed. */ -static int console_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - switch (action) { - case CPU_ONLINE: - case CPU_DEAD: - case CPU_DOWN_FAILED: - case CPU_UP_CANCELED: - console_lock(); - console_unlock(); - } - return NOTIFY_OK; +static int console_cpu_notify(unsigned int cpu) +{ + console_lock(); + console_unlock(); + return 0; } /** @@ -2843,6 +2834,7 @@ EXPORT_SYMBOL(unregister_console); static int __init printk_late_init(void) { struct console *con; + int ret; for_each_console(con) { if (!keep_bootcon && con->flags & CON_BOOT) { @@ -2857,7 +2849,12 @@ static int __init printk_late_init(void) unregister_console(con); } } - hotcpu_notifier(console_cpu_notify, 0); + ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL, + console_cpu_notify); + WARN_ON(ret < 0); + ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "printk:online", + console_cpu_notify, NULL); + WARN_ON(ret < 0); return 0; } late_initcall(printk_late_init); -- cgit v1.2.3-70-g09d2 From f97960fbddd98e1b7bfba469b8510b49a62b4bc5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 17 Nov 2016 17:31:55 +0100 Subject: kernel/printk: Block cpuhotplug callback when tasks are frozen The recent conversion of the console hotplug notifier to the state machine missed the fact, that the notifier only operated on the non frozen transitions. As a consequence the console_lock/unlock() pair is also invoked during suspend, which results in a lockdep warning. Restore the previous state by making the lock/unlock conditional on !tasks_frozen. Fixes: 90b14889d2f9 ("kernel/printk: Convert to hotplug state machine") Reported-and-tested-by: Borislav Petkov Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1611171729320.3645@nanos Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior --- kernel/printk/printk.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 4487ffcd42d5..37893da9bae8 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2194,8 +2194,10 @@ void resume_console(void) */ static int console_cpu_notify(unsigned int cpu) { - console_lock(); - console_unlock(); + if (!cpuhp_tasks_frozen) { + console_lock(); + console_unlock(); + } return 0; } -- cgit v1.2.3-70-g09d2 From 31eff2434db542763a00074a8368d7bd78d14ea1 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 17 Nov 2016 19:35:34 +0100 Subject: sched/nohz: Convert to hotplug state machine Install the callbacks via the state machine. Signed-off-by: Sebastian Andrzej Siewior Cc: rt@linuxtronix.de Link: http://lkml.kernel.org/r/20161117183541.8588-14-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- kernel/time/tick-sched.c | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) (limited to 'kernel') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 3bcb61b52f6c..71496a20e670 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -390,24 +390,16 @@ static int __init tick_nohz_full_setup(char *str) } __setup("nohz_full=", tick_nohz_full_setup); -static int tick_nohz_cpu_down_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) +static int tick_nohz_cpu_down(unsigned int cpu) { - unsigned int cpu = (unsigned long)hcpu; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_DOWN_PREPARE: - /* - * The boot CPU handles housekeeping duty (unbound timers, - * workqueues, timekeeping, ...) on behalf of full dynticks - * CPUs. It must remain online when nohz full is enabled. - */ - if (tick_nohz_full_running && tick_do_timer_cpu == cpu) - return NOTIFY_BAD; - break; - } - return NOTIFY_OK; + /* + * The boot CPU handles housekeeping duty (unbound timers, + * workqueues, timekeeping, ...) on behalf of full dynticks + * CPUs. It must remain online when nohz full is enabled. + */ + if (tick_nohz_full_running && tick_do_timer_cpu == cpu) + return -EBUSY; + return 0; } static int tick_nohz_init_all(void) @@ -428,7 +420,7 @@ static int tick_nohz_init_all(void) void __init tick_nohz_init(void) { - int cpu; + int cpu, ret; if (!tick_nohz_full_running) { if (tick_nohz_init_all() < 0) @@ -469,7 +461,10 @@ void __init tick_nohz_init(void) for_each_cpu(cpu, tick_nohz_full_mask) context_tracking_cpu_set(cpu); - cpu_notifier(tick_nohz_cpu_down_callback, 0); + ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, + "kernel/nohz:predown", NULL, + tick_nohz_cpu_down); + WARN_ON(ret < 0); pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n", cpumask_pr_args(tick_nohz_full_mask)); -- cgit v1.2.3-70-g09d2 From b32614c03413f8a6025d8677c2b7c0ee976e63d4 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sun, 27 Nov 2016 00:13:34 +0100 Subject: tracing/rb: Convert to hotplug state machine Install the callbacks via the state machine. The notifier in struct ring_buffer is replaced by the multi instance interface. Upon __ring_buffer_alloc() invocation, cpuhp_state_add_instance() will invoke the trace_rb_cpu_prepare() on each CPU. This callback may now fail. This means __ring_buffer_alloc() will fail and cleanup (like previously) and during a CPU up event this failure will not allow the CPU to come up. Signed-off-by: Sebastian Andrzej Siewior Cc: Steven Rostedt Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20161126231350.10321-7-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + include/linux/ring_buffer.h | 6 ++ kernel/trace/ring_buffer.c | 135 +++++++++++++++----------------------------- kernel/trace/trace.c | 15 ++++- 4 files changed, 66 insertions(+), 91 deletions(-) (limited to 'kernel') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index e3771fb959c0..18bcfeb2463e 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -62,6 +62,7 @@ enum cpuhp_state { CPUHP_TOPOLOGY_PREPARE, CPUHP_NET_IUCV_PREPARE, CPUHP_ARM_BL_PREPARE, + CPUHP_TRACE_RB_PREPARE, CPUHP_TIMERS_DEAD, CPUHP_NOTF_ERR_INJ_PREPARE, CPUHP_MIPS_SOC_PREPARE, diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 4acc552e9279..b6d4568795a7 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -198,4 +198,10 @@ enum ring_buffer_flags { RB_FL_OVERWRITE = 1 << 0, }; +#ifdef CONFIG_RING_BUFFER +int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node); +#else +#define trace_rb_cpu_prepare NULL +#endif + #endif /* _LINUX_RING_BUFFER_H */ diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 9c143739b8d7..a7a055f167c7 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -479,9 +479,7 @@ struct ring_buffer { struct ring_buffer_per_cpu **buffers; -#ifdef CONFIG_HOTPLUG_CPU - struct notifier_block cpu_notify; -#endif + struct hlist_node node; u64 (*clock)(void); struct rb_irq_work irq_work; @@ -1274,11 +1272,6 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) kfree(cpu_buffer); } -#ifdef CONFIG_HOTPLUG_CPU -static int rb_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu); -#endif - /** * __ring_buffer_alloc - allocate a new ring_buffer * @size: the size in bytes per cpu that is needed. @@ -1296,6 +1289,7 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, long nr_pages; int bsize; int cpu; + int ret; /* keep it in its own cache line */ buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), @@ -1318,17 +1312,6 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, if (nr_pages < 2) nr_pages = 2; - /* - * In case of non-hotplug cpu, if the ring-buffer is allocated - * in early initcall, it will not be notified of secondary cpus. - * In that off case, we need to allocate for all possible cpus. - */ -#ifdef CONFIG_HOTPLUG_CPU - cpu_notifier_register_begin(); - cpumask_copy(buffer->cpumask, cpu_online_mask); -#else - cpumask_copy(buffer->cpumask, cpu_possible_mask); -#endif buffer->cpus = nr_cpu_ids; bsize = sizeof(void *) * nr_cpu_ids; @@ -1337,19 +1320,15 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, if (!buffer->buffers) goto fail_free_cpumask; - for_each_buffer_cpu(buffer, cpu) { - buffer->buffers[cpu] = - rb_allocate_cpu_buffer(buffer, nr_pages, cpu); - if (!buffer->buffers[cpu]) - goto fail_free_buffers; - } + cpu = raw_smp_processor_id(); + cpumask_set_cpu(cpu, buffer->cpumask); + buffer->buffers[cpu] = rb_allocate_cpu_buffer(buffer, nr_pages, cpu); + if (!buffer->buffers[cpu]) + goto fail_free_buffers; -#ifdef CONFIG_HOTPLUG_CPU - buffer->cpu_notify.notifier_call = rb_cpu_notify; - buffer->cpu_notify.priority = 0; - __register_cpu_notifier(&buffer->cpu_notify); - cpu_notifier_register_done(); -#endif + ret = cpuhp_state_add_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node); + if (ret < 0) + goto fail_free_buffers; mutex_init(&buffer->mutex); @@ -1364,9 +1343,6 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, fail_free_cpumask: free_cpumask_var(buffer->cpumask); -#ifdef CONFIG_HOTPLUG_CPU - cpu_notifier_register_done(); -#endif fail_free_buffer: kfree(buffer); @@ -1383,18 +1359,11 @@ ring_buffer_free(struct ring_buffer *buffer) { int cpu; -#ifdef CONFIG_HOTPLUG_CPU - cpu_notifier_register_begin(); - __unregister_cpu_notifier(&buffer->cpu_notify); -#endif + cpuhp_state_remove_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node); for_each_buffer_cpu(buffer, cpu) rb_free_cpu_buffer(buffer->buffers[cpu]); -#ifdef CONFIG_HOTPLUG_CPU - cpu_notifier_register_done(); -#endif - kfree(buffer->buffers); free_cpumask_var(buffer->cpumask); @@ -4633,62 +4602,48 @@ int ring_buffer_read_page(struct ring_buffer *buffer, } EXPORT_SYMBOL_GPL(ring_buffer_read_page); -#ifdef CONFIG_HOTPLUG_CPU -static int rb_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +/* + * We only allocate new buffers, never free them if the CPU goes down. + * If we were to free the buffer, then the user would lose any trace that was in + * the buffer. + */ +int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node) { - struct ring_buffer *buffer = - container_of(self, struct ring_buffer, cpu_notify); - long cpu = (long)hcpu; + struct ring_buffer *buffer; long nr_pages_same; int cpu_i; unsigned long nr_pages; - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - if (cpumask_test_cpu(cpu, buffer->cpumask)) - return NOTIFY_OK; - - nr_pages = 0; - nr_pages_same = 1; - /* check if all cpu sizes are same */ - for_each_buffer_cpu(buffer, cpu_i) { - /* fill in the size from first enabled cpu */ - if (nr_pages == 0) - nr_pages = buffer->buffers[cpu_i]->nr_pages; - if (nr_pages != buffer->buffers[cpu_i]->nr_pages) { - nr_pages_same = 0; - break; - } - } - /* allocate minimum pages, user can later expand it */ - if (!nr_pages_same) - nr_pages = 2; - buffer->buffers[cpu] = - rb_allocate_cpu_buffer(buffer, nr_pages, cpu); - if (!buffer->buffers[cpu]) { - WARN(1, "failed to allocate ring buffer on CPU %ld\n", - cpu); - return NOTIFY_OK; + buffer = container_of(node, struct ring_buffer, node); + if (cpumask_test_cpu(cpu, buffer->cpumask)) + return 0; + + nr_pages = 0; + nr_pages_same = 1; + /* check if all cpu sizes are same */ + for_each_buffer_cpu(buffer, cpu_i) { + /* fill in the size from first enabled cpu */ + if (nr_pages == 0) + nr_pages = buffer->buffers[cpu_i]->nr_pages; + if (nr_pages != buffer->buffers[cpu_i]->nr_pages) { + nr_pages_same = 0; + break; } - smp_wmb(); - cpumask_set_cpu(cpu, buffer->cpumask); - break; - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - /* - * Do nothing. - * If we were to free the buffer, then the user would - * lose any trace that was in the buffer. - */ - break; - default: - break; } - return NOTIFY_OK; + /* allocate minimum pages, user can later expand it */ + if (!nr_pages_same) + nr_pages = 2; + buffer->buffers[cpu] = + rb_allocate_cpu_buffer(buffer, nr_pages, cpu); + if (!buffer->buffers[cpu]) { + WARN(1, "failed to allocate ring buffer on CPU %u\n", + cpu); + return -ENOMEM; + } + smp_wmb(); + cpumask_set_cpu(cpu, buffer->cpumask); + return 0; } -#endif #ifdef CONFIG_RING_BUFFER_STARTUP_TEST /* diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8696ce6bf2f6..465d56febc5b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7659,10 +7659,21 @@ __init static int tracer_alloc_buffers(void) raw_spin_lock_init(&global_trace.start_lock); + /* + * The prepare callbacks allocates some memory for the ring buffer. We + * don't free the buffer if the if the CPU goes down. If we were to free + * the buffer, then the user would lose any trace that was in the + * buffer. The memory will be removed once the "instance" is removed. + */ + ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE, + "trace/RB:preapre", trace_rb_cpu_prepare, + NULL); + if (ret < 0) + goto out_free_cpumask; /* Used for event triggers */ temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE); if (!temp_buffer) - goto out_free_cpumask; + goto out_rm_hp_state; if (trace_create_savedcmd() < 0) goto out_free_temp_buffer; @@ -7723,6 +7734,8 @@ out_free_savedcmd: free_saved_cmdlines_buffer(savedcmd); out_free_temp_buffer: ring_buffer_free(temp_buffer); +out_rm_hp_state: + cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE); out_free_cpumask: free_cpumask_var(global_trace.tracing_cpumask); out_free_buffer_mask: -- cgit v1.2.3-70-g09d2 From b18cc3de00ec3442cf40ac60787dbe0703b99e24 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 7 Dec 2016 14:31:33 +0100 Subject: tracing/rb: Init the CPU mask on allocation Before commit b32614c03413 ("tracing/rb: Convert to hotplug state machine") the allocated cpumask was initialized to the mask of online or possible CPUs. After the CPU hotplug changes the buffer initialization moved to trace_rb_cpu_prepare() but the cpumask is allocated with alloc_cpumask() and therefor has random content. As a consequence the cpu buffers are not initialized and a later access dereferences a NULL pointer. Use zalloc_cpumask() instead so trace_rb_cpu_prepare() initializes the buffers properly. Fixes: b32614c03413 ("tracing/rb: Convert to hotplug state machine") Reported-by: Tetsuo Handa Signed-off-by: Sebastian Andrzej Siewior Cc: rostedt@goodmis.org Link: http://lkml.kernel.org/r/20161207133133.hzkcqfllxcdi3joz@linutronix.de Signed-off-by: Thomas Gleixner --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a7a055f167c7..89a2611a1635 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1297,7 +1297,7 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, if (!buffer) return NULL; - if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL)) + if (!zalloc_cpumask_var(&buffer->cpumask, GFP_KERNEL)) goto fail_free_buffer; nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); -- cgit v1.2.3-70-g09d2