diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-02-21 12:04:41 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-02-21 12:04:41 -0800 |
commit | d089f48fba28db14d0fe7753248f2575a9ddfc73 (patch) | |
tree | a3821c02dd38342193459e41ba453c058f75e3d2 /kernel/rcu/tree_stall.h | |
parent | 3f6ec19f2d05d800bbc42d95dece433da7697864 (diff) | |
parent | 2b392cb11c0db645ba81a08b6a2e96c56ec1fc64 (diff) |
Merge tag 'core-rcu-2021-02-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RCU updates from Ingo Molnar:
"These are the latest RCU updates for v5.12:
- Documentation updates.
- Miscellaneous fixes.
- kfree_rcu() updates: Addition of mem_dump_obj() to provide
allocator return addresses to more easily locate bugs. This has a
couple of RCU-related commits, but is mostly MM. Was pulled in with
akpm's agreement.
- Per-callback-batch tracking of numbers of callbacks, which enables
better debugging information and smarter reactions to large numbers
of callbacks.
- The first round of changes to allow CPUs to be runtime switched
from and to callback-offloaded state.
- CONFIG_PREEMPT_RT-related changes.
- RCU CPU stall warning updates.
- Addition of polling grace-period APIs for SRCU.
- Torture-test and torture-test scripting updates, including a
"torture everything" script that runs rcutorture, locktorture,
scftorture, rcuscale, and refscale. Plus does an allmodconfig
build.
- nolibc fixes for the torture tests"
* tag 'core-rcu-2021-02-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (130 commits)
percpu_ref: Dump mem_dump_obj() info upon reference-count underflow
rcu: Make call_rcu() print mem_dump_obj() info for double-freed callback
mm: Make mem_obj_dump() vmalloc() dumps include start and length
mm: Make mem_dump_obj() handle vmalloc() memory
mm: Make mem_dump_obj() handle NULL and zero-sized pointers
mm: Add mem_dump_obj() to print source of memory block
tools/rcutorture: Fix position of -lgcc in mkinitrd.sh
tools/nolibc: Fix position of -lgcc in the documented example
tools/nolibc: Emit detailed error for missing alternate syscall number definitions
tools/nolibc: Remove incorrect definitions of __ARCH_WANT_*
tools/nolibc: Get timeval, timespec and timezone from linux/time.h
tools/nolibc: Implement poll() based on ppoll()
tools/nolibc: Implement fork() based on clone()
tools/nolibc: Make getpgrp() fall back to getpgid(0)
tools/nolibc: Make dup2() rely on dup3() when available
tools/nolibc: Add the definition for dup()
rcutorture: Add rcutree.use_softirq=0 to RUDE01 and TASKS01
torture: Maintain torture-specific set of CPUs-online books
torture: Clean up after torture-test CPU hotplugging
rcutorture: Make object_debug also double call_rcu() heap object
...
Diffstat (limited to 'kernel/rcu/tree_stall.h')
-rw-r--r-- | kernel/rcu/tree_stall.h | 60 |
1 files changed, 57 insertions, 3 deletions
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 70d48c52fabc..475b26171b20 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -266,6 +266,7 @@ static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags) struct task_struct *t; struct task_struct *ts[8]; + lockdep_assert_irqs_disabled(); if (!rcu_preempt_blocked_readers_cgp(rnp)) return 0; pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):", @@ -290,6 +291,7 @@ static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags) ".q"[rscr.rs.b.need_qs], ".e"[rscr.rs.b.exp_hint], ".l"[rscr.on_blkd_list]); + lockdep_assert_irqs_disabled(); put_task_struct(t); ndetected++; } @@ -333,9 +335,12 @@ static void rcu_dump_cpu_stacks(void) rcu_for_each_leaf_node(rnp) { raw_spin_lock_irqsave_rcu_node(rnp, flags); for_each_leaf_node_possible_cpu(rnp, cpu) - if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) - if (!trigger_single_cpu_backtrace(cpu)) + if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) { + if (cpu_is_offline(cpu)) + pr_err("Offline CPU %d blocking current GP.\n", cpu); + else if (!trigger_single_cpu_backtrace(cpu)) dump_cpu_task(cpu); + } raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } } @@ -449,25 +454,66 @@ static void print_cpu_stall_info(int cpu) /* Complain about starvation of grace-period kthread. */ static void rcu_check_gp_kthread_starvation(void) { + int cpu; struct task_struct *gpk = rcu_state.gp_kthread; unsigned long j; if (rcu_is_gp_kthread_starving(&j)) { + cpu = gpk ? task_cpu(gpk) : -1; pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n", rcu_state.name, j, (long)rcu_seq_current(&rcu_state.gp_seq), data_race(rcu_state.gp_flags), gp_state_getname(rcu_state.gp_state), rcu_state.gp_state, - gpk ? gpk->state : ~0, gpk ? task_cpu(gpk) : -1); + gpk ? gpk->state : ~0, cpu); if (gpk) { pr_err("\tUnless %s kthread gets sufficient CPU time, OOM is now expected behavior.\n", rcu_state.name); pr_err("RCU grace-period kthread stack dump:\n"); sched_show_task(gpk); + if (cpu >= 0) { + if (cpu_is_offline(cpu)) { + pr_err("RCU GP kthread last ran on offline CPU %d.\n", cpu); + } else { + pr_err("Stack dump where RCU GP kthread last ran:\n"); + if (!trigger_single_cpu_backtrace(cpu)) + dump_cpu_task(cpu); + } + } wake_up_process(gpk); } } } +/* Complain about missing wakeups from expired fqs wait timer */ +static void rcu_check_gp_kthread_expired_fqs_timer(void) +{ + struct task_struct *gpk = rcu_state.gp_kthread; + short gp_state; + unsigned long jiffies_fqs; + int cpu; + + /* + * Order reads of .gp_state and .jiffies_force_qs. + * Matching smp_wmb() is present in rcu_gp_fqs_loop(). + */ + gp_state = smp_load_acquire(&rcu_state.gp_state); + jiffies_fqs = READ_ONCE(rcu_state.jiffies_force_qs); + + if (gp_state == RCU_GP_WAIT_FQS && + time_after(jiffies, jiffies_fqs + RCU_STALL_MIGHT_MIN) && + gpk && !READ_ONCE(gpk->on_rq)) { + cpu = task_cpu(gpk); + pr_err("%s kthread timer wakeup didn't happen for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx\n", + rcu_state.name, (jiffies - jiffies_fqs), + (long)rcu_seq_current(&rcu_state.gp_seq), + data_race(rcu_state.gp_flags), + gp_state_getname(RCU_GP_WAIT_FQS), RCU_GP_WAIT_FQS, + gpk->state); + pr_err("\tPossible timer handling issue on cpu=%d timer-softirq=%u\n", + cpu, kstat_softirqs_cpu(TIMER_SOFTIRQ, cpu)); + } +} + static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) { int cpu; @@ -478,6 +524,8 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) struct rcu_node *rnp; long totqlen = 0; + lockdep_assert_irqs_disabled(); + /* Kick and suppress, if so configured. */ rcu_stall_kick_kthreads(); if (rcu_stall_is_suppressed()) @@ -499,6 +547,7 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) } } ndetected += rcu_print_task_stall(rnp, flags); // Releases rnp->lock. + lockdep_assert_irqs_disabled(); } for_each_possible_cpu(cpu) @@ -529,6 +578,7 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) WRITE_ONCE(rcu_state.jiffies_stall, jiffies + 3 * rcu_jiffies_till_stall_check() + 3); + rcu_check_gp_kthread_expired_fqs_timer(); rcu_check_gp_kthread_starvation(); panic_on_rcu_stall(); @@ -544,6 +594,8 @@ static void print_cpu_stall(unsigned long gps) struct rcu_node *rnp = rcu_get_root(); long totqlen = 0; + lockdep_assert_irqs_disabled(); + /* Kick and suppress, if so configured. */ rcu_stall_kick_kthreads(); if (rcu_stall_is_suppressed()) @@ -564,6 +616,7 @@ static void print_cpu_stall(unsigned long gps) jiffies - gps, (long)rcu_seq_current(&rcu_state.gp_seq), totqlen); + rcu_check_gp_kthread_expired_fqs_timer(); rcu_check_gp_kthread_starvation(); rcu_dump_cpu_stacks(); @@ -598,6 +651,7 @@ static void check_cpu_stall(struct rcu_data *rdp) unsigned long js; struct rcu_node *rnp; + lockdep_assert_irqs_disabled(); if ((rcu_stall_is_suppressed() && !READ_ONCE(rcu_kick_kthreads)) || !rcu_gp_in_progress()) return; |