From afc256e131bb0e1ecb5e2b1df310b20fa7bd714d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 2 Oct 2024 17:03:55 +0200 Subject: locking/spinlocks: Make __raw_* lock ops static If CONFIG_GENERIC_LOCKBREAK=y and CONFIG_DEBUG_LOCK_ALLOC=n (e.g. sh/sdk7786_defconfig): kernel/locking/spinlock.c:68:17: warning: no previous prototype for '__raw_spin_lock' [-Wmissing-prototypes] kernel/locking/spinlock.c:80:26: warning: no previous prototype for '__raw_spin_lock_irqsave' [-Wmissing-prototypes] kernel/locking/spinlock.c:98:17: warning: no previous prototype for '__raw_spin_lock_irq' [-Wmissing-prototypes] kernel/locking/spinlock.c:103:17: warning: no previous prototype for '__raw_spin_lock_bh' [-Wmissing-prototypes] kernel/locking/spinlock.c:68:17: warning: no previous prototype for '__raw_read_lock' [-Wmissing-prototypes] kernel/locking/spinlock.c:80:26: warning: no previous prototype for '__raw_read_lock_irqsave' [-Wmissing-prototypes] kernel/locking/spinlock.c:98:17: warning: no previous prototype for '__raw_read_lock_irq' [-Wmissing-prototypes] kernel/locking/spinlock.c:103:17: warning: no previous prototype for '__raw_read_lock_bh' [-Wmissing-prototypes] kernel/locking/spinlock.c:68:17: warning: no previous prototype for '__raw_write_lock' [-Wmissing-prototypes] kernel/locking/spinlock.c:80:26: warning: no previous prototype for '__raw_write_lock_irqsave' [-Wmissing-prototypes] kernel/locking/spinlock.c:98:17: warning: no previous prototype for '__raw_write_lock_irq' [-Wmissing-prototypes] kernel/locking/spinlock.c:103:17: warning: no previous prototype for '__raw_write_lock_bh' [-Wmissing-prototypes] All __raw_* lock ops are internal functions without external callers. Hence fix this by making them static. Note that if CONFIG_GENERIC_LOCKBREAK=y, no lock ops are inlined, as all of CONFIG_INLINE_*_LOCK* depend on !GENERIC_LOCKBREAK. Signed-off-by: Geert Uytterhoeven Signed-off-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Link: https://lkml.kernel.org/r/7201d7fb408375c6c4df541270d787b1b4a32354.1727879348.git.geert+renesas@glider.be --- kernel/locking/spinlock.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c index 438c6086d540..7685defd7c52 100644 --- a/kernel/locking/spinlock.c +++ b/kernel/locking/spinlock.c @@ -65,7 +65,7 @@ EXPORT_PER_CPU_SYMBOL(__mmiowb_state); * towards that other CPU that it should break the lock ASAP. */ #define BUILD_LOCK_OPS(op, locktype) \ -void __lockfunc __raw_##op##_lock(locktype##_t *lock) \ +static void __lockfunc __raw_##op##_lock(locktype##_t *lock) \ { \ for (;;) { \ preempt_disable(); \ @@ -77,7 +77,7 @@ void __lockfunc __raw_##op##_lock(locktype##_t *lock) \ } \ } \ \ -unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \ +static unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \ { \ unsigned long flags; \ \ @@ -95,12 +95,12 @@ unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \ return flags; \ } \ \ -void __lockfunc __raw_##op##_lock_irq(locktype##_t *lock) \ +static void __lockfunc __raw_##op##_lock_irq(locktype##_t *lock) \ { \ _raw_##op##_lock_irqsave(lock); \ } \ \ -void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \ +static void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \ { \ unsigned long flags; \ \ -- cgit v1.2.3-70-g09d2 From 823a566221a5639f6c69424897218e5d6431a970 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 9 Oct 2024 11:20:31 +0200 Subject: locking/ww_mutex: Adjust to lockdep nest_lock requirements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When using mutex_acquire_nest() with a nest_lock, lockdep refcounts the number of acquired lockdep_maps of mutexes of the same class, and also keeps a pointer to the first acquired lockdep_map of a class. That pointer is then used for various comparison-, printing- and checking purposes, but there is no mechanism to actively ensure that lockdep_map stays in memory. Instead, a warning is printed if the lockdep_map is freed and there are still held locks of the same lock class, even if the lockdep_map itself has been released. In the context of WW/WD transactions that means that if a user unlocks and frees a ww_mutex from within an ongoing ww transaction, and that mutex happens to be the first ww_mutex grabbed in the transaction, such a warning is printed and there might be a risk of a UAF. Note that this is only problem when lockdep is enabled and affects only dereferences of struct lockdep_map. Adjust to this by adding a fake lockdep_map to the acquired context and make sure it is the first acquired lockdep map of the associated ww_mutex class. Then hold it for the duration of the WW/WD transaction. This has the side effect that trying to lock a ww mutex *without* a ww_acquire_context but where a such context has been acquire, we'd see a lockdep splat. The test-ww_mutex.c selftest attempts to do that, so modify that particular test to not acquire a ww_acquire_context if it is not going to be used. Signed-off-by: Thomas Hellström Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20241009092031.6356-1-thomas.hellstrom@linux.intel.com --- include/linux/ww_mutex.h | 14 ++++++++++++++ kernel/locking/test-ww_mutex.c | 8 +++++--- 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index bb763085479a..a401a2f31a77 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -65,6 +65,16 @@ struct ww_acquire_ctx { #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; + /** + * @first_lock_dep_map: fake lockdep_map for first locked ww_mutex. + * + * lockdep requires the lockdep_map for the first locked ww_mutex + * in a ww transaction to remain in memory until all ww_mutexes of + * the transaction have been unlocked. Ensure this by keeping a + * fake locked ww_mutex lockdep map between ww_acquire_init() and + * ww_acquire_fini(). + */ + struct lockdep_map first_lock_dep_map; #endif #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH unsigned int deadlock_inject_interval; @@ -146,7 +156,10 @@ static inline void ww_acquire_init(struct ww_acquire_ctx *ctx, debug_check_no_locks_freed((void *)ctx, sizeof(*ctx)); lockdep_init_map(&ctx->dep_map, ww_class->acquire_name, &ww_class->acquire_key, 0); + lockdep_init_map(&ctx->first_lock_dep_map, ww_class->mutex_name, + &ww_class->mutex_key, 0); mutex_acquire(&ctx->dep_map, 0, 0, _RET_IP_); + mutex_acquire_nest(&ctx->first_lock_dep_map, 0, 0, &ctx->dep_map, _RET_IP_); #endif #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH ctx->deadlock_inject_interval = 1; @@ -185,6 +198,7 @@ static inline void ww_acquire_done(struct ww_acquire_ctx *ctx) static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) { #ifdef CONFIG_DEBUG_LOCK_ALLOC + mutex_release(&ctx->first_lock_dep_map, _THIS_IP_); mutex_release(&ctx->dep_map, _THIS_IP_); #endif #ifdef DEBUG_WW_MUTEXES diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index 10a5736a21c2..5d58b2c0ef98 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -62,7 +62,8 @@ static int __test_mutex(unsigned int flags) int ret; ww_mutex_init(&mtx.mutex, &ww_class); - ww_acquire_init(&ctx, &ww_class); + if (flags & TEST_MTX_CTX) + ww_acquire_init(&ctx, &ww_class); INIT_WORK_ONSTACK(&mtx.work, test_mutex_work); init_completion(&mtx.ready); @@ -90,7 +91,8 @@ static int __test_mutex(unsigned int flags) ret = wait_for_completion_timeout(&mtx.done, TIMEOUT); } ww_mutex_unlock(&mtx.mutex); - ww_acquire_fini(&ctx); + if (flags & TEST_MTX_CTX) + ww_acquire_fini(&ctx); if (ret) { pr_err("%s(flags=%x): mutual exclusion failure\n", @@ -679,7 +681,7 @@ static int __init test_ww_mutex_init(void) if (ret) return ret; - ret = stress(2047, hweight32(STRESS_ALL)*ncpus, STRESS_ALL); + ret = stress(2046, hweight32(STRESS_ALL)*ncpus, STRESS_ALL); if (ret) return ret; -- cgit v1.2.3-70-g09d2 From 19298f48694987fac843261c84e24834c255b451 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 10 Oct 2024 09:10:04 +0200 Subject: futex: Use atomic64_inc_return() in get_inode_sequence_number() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use atomic64_inc_return(&ref) instead of atomic64_add_return(1, &ref) to use optimized implementation and ease register pressure around the primitive for targets that implement optimized variant. Signed-off-by: Uros Bizjak Signed-off-by: Thomas Gleixner Reviewed-by: André Almeida Link: https://lore.kernel.org/all/20241010071023.21913-1-ubizjak@gmail.com --- kernel/futex/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 136768ae2637..3146730e55f7 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -181,7 +181,7 @@ static u64 get_inode_sequence_number(struct inode *inode) return old; for (;;) { - u64 new = atomic64_add_return(1, &i_seq); + u64 new = atomic64_inc_return(&i_seq); if (WARN_ON_ONCE(!new)) continue; -- cgit v1.2.3-70-g09d2 From 87347f148061b48c3495fb61dcbad384760da9cf Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 10 Oct 2024 09:10:05 +0200 Subject: futex: Use atomic64_try_cmpxchg_relaxed() in get_inode_sequence_number() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Optimize get_inode_sequence_number() to use simpler and faster: !atomic64_try_cmpxchg_relaxed(*ptr, &old, new) instead of: atomic64_cmpxchg relaxed(*ptr, old, new) != old The x86 CMPXCHG instruction returns success in ZF flag, so this change saves a compare after cmpxchg. The generated code improves from: 3da: 31 c0 xor %eax,%eax 3dc: f0 48 0f b1 8a 38 01 lock cmpxchg %rcx,0x138(%rdx) 3e3: 00 00 3e5: 48 85 c0 test %rax,%rax 3e8: 48 0f 44 c1 cmove %rcx,%rax to: 3da: 31 c0 xor %eax,%eax 3dc: f0 48 0f b1 8a 38 01 lock cmpxchg %rcx,0x138(%rdx) 3e3: 00 00 3e5: 48 0f 44 c1 cmove %rcx,%rax Signed-off-by: Uros Bizjak Signed-off-by: Thomas Gleixner Reviewed-by: André Almeida Link: https://lore.kernel.org/all/20241010071023.21913-2-ubizjak@gmail.com --- kernel/futex/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 3146730e55f7..692912bf1252 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -185,8 +185,8 @@ static u64 get_inode_sequence_number(struct inode *inode) if (WARN_ON_ONCE(!new)) continue; - old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new); - if (old) + old = 0; + if (!atomic64_try_cmpxchg_relaxed(&inode->i_sequence, &old, new)) return old; return new; } -- cgit v1.2.3-70-g09d2 From 0784181b44af831a3fa52e1e5ff77c388d699dba Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 26 Sep 2024 16:17:37 +0100 Subject: lockdep: Add lockdep_cleanup_dead_cpu() Add a function to check that an offline CPU has left the tracing infrastructure in a sane state. Commit 9bb69ba4c177 ("ACPI: processor_idle: use raw_safe_halt() in acpi_idle_play_dead()") fixed an issue where the acpi_idle_play_dead() function called safe_halt() instead of raw_safe_halt(), which had the side-effect of setting the hardirqs_enabled flag for the offline CPU. On x86 this triggered warnings from lockdep_assert_irqs_disabled() when the CPU was brought back online again later. These warnings were too early for the exception to be handled correctly, leading to a triple-fault. Add lockdep_cleanup_dead_cpu() to check for this kind of failure mode, print the events leading up to it, and correct it so that the CPU can come online again correctly. Re-introducing the original bug now merely results in this warning instead: [ 61.556652] smpboot: CPU 1 is now offline [ 61.556769] CPU 1 left hardirqs enabled! [ 61.556915] irq event stamp: 128149 [ 61.556965] hardirqs last enabled at (128149): [] acpi_idle_play_dead+0x46/0x70 [ 61.557055] hardirqs last disabled at (128148): [] do_idle+0x90/0xe0 [ 61.557117] softirqs last enabled at (128078): [] __do_softirq+0x31c/0x423 [ 61.557199] softirqs last disabled at (128065): [] __irq_exit_rcu+0x91/0x100 [boqun: Capitalize the title and reword the message a bit] Signed-off-by: David Woodhouse Reviewed-by: Thomas Gleixner Signed-off-by: Boqun Feng Link: https://lore.kernel.org/r/f7bd2b3b999051bb3ef4be34526a9262008285f5.camel@infradead.org --- include/linux/irqflags.h | 6 ++++++ kernel/cpu.c | 1 + kernel/locking/lockdep.c | 24 ++++++++++++++++++++++++ 3 files changed, 31 insertions(+) (limited to 'kernel') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 3f003d5fde53..57b074e0cfbb 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -18,6 +18,8 @@ #include #include +struct task_struct; + /* Currently lockdep_softirqs_on/off is used only by lockdep */ #ifdef CONFIG_PROVE_LOCKING extern void lockdep_softirqs_on(unsigned long ip); @@ -25,12 +27,16 @@ extern void lockdep_hardirqs_on_prepare(void); extern void lockdep_hardirqs_on(unsigned long ip); extern void lockdep_hardirqs_off(unsigned long ip); + extern void lockdep_cleanup_dead_cpu(unsigned int cpu, + struct task_struct *idle); #else static inline void lockdep_softirqs_on(unsigned long ip) { } static inline void lockdep_softirqs_off(unsigned long ip) { } static inline void lockdep_hardirqs_on_prepare(void) { } static inline void lockdep_hardirqs_on(unsigned long ip) { } static inline void lockdep_hardirqs_off(unsigned long ip) { } + static inline void lockdep_cleanup_dead_cpu(unsigned int cpu, + struct task_struct *idle) {} #endif #ifdef CONFIG_TRACE_IRQFLAGS diff --git a/kernel/cpu.c b/kernel/cpu.c index d293d52a3e00..c4aaf73dec9e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1338,6 +1338,7 @@ static int takedown_cpu(unsigned int cpu) cpuhp_bp_sync_dead(cpu); + lockdep_cleanup_dead_cpu(cpu, idle_thread_get(cpu)); tick_cleanup_dead_cpu(cpu); /* diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 536bd471557f..6fd4af217e71 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4586,6 +4586,30 @@ void lockdep_softirqs_off(unsigned long ip) debug_atomic_inc(redundant_softirqs_off); } +/** + * lockdep_cleanup_dead_cpu - Ensure CPU lockdep state is cleanly stopped + * + * @cpu: index of offlined CPU + * @idle: task pointer for offlined CPU's idle thread + * + * Invoked after the CPU is dead. Ensures that the tracing infrastructure + * is left in a suitable state for the CPU to be subsequently brought + * online again. + */ +void lockdep_cleanup_dead_cpu(unsigned int cpu, struct task_struct *idle) +{ + if (unlikely(!debug_locks)) + return; + + if (unlikely(per_cpu(hardirqs_enabled, cpu))) { + pr_warn("CPU %u left hardirqs enabled!", cpu); + if (idle) + print_irqtrace_events(idle); + /* Clean it up for when the CPU comes online again. */ + per_cpu(hardirqs_enabled, cpu) = 0; + } +} + static int mark_usage(struct task_struct *curr, struct held_lock *hlock, int check) { -- cgit v1.2.3-70-g09d2 From e48bf7ca6056297664eb260fa88cae8e50d9b698 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 7 Oct 2024 08:54:57 +0200 Subject: lockdep: Use info level for lockdep initial info messages All those: Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar ... MAX_LOCKDEP_SUBCLASSES: 8 ... MAX_LOCK_DEPTH: 48 ... MAX_LOCKDEP_KEYS: 8192 and so on are dumped with the KERN_WARNING level. It is due to missing KERN_* annotation. Use pr_info() instead of bare printk() to dump the info with the info level. Signed-off-by: Jiri Slaby (SUSE) Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Will Deacon Cc: Waiman Long Cc: Boqun Feng Reviewed-by: Waiman Long Signed-off-by: Boqun Feng Link: https://lore.kernel.org/r/20241007065457.20128-1-jirislaby@kernel.org --- kernel/locking/lockdep.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 6fd4af217e71..2d8ec0351ef9 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -6600,17 +6600,17 @@ EXPORT_SYMBOL_GPL(lockdep_unregister_key); void __init lockdep_init(void) { - printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n"); + pr_info("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n"); - printk("... MAX_LOCKDEP_SUBCLASSES: %lu\n", MAX_LOCKDEP_SUBCLASSES); - printk("... MAX_LOCK_DEPTH: %lu\n", MAX_LOCK_DEPTH); - printk("... MAX_LOCKDEP_KEYS: %lu\n", MAX_LOCKDEP_KEYS); - printk("... CLASSHASH_SIZE: %lu\n", CLASSHASH_SIZE); - printk("... MAX_LOCKDEP_ENTRIES: %lu\n", MAX_LOCKDEP_ENTRIES); - printk("... MAX_LOCKDEP_CHAINS: %lu\n", MAX_LOCKDEP_CHAINS); - printk("... CHAINHASH_SIZE: %lu\n", CHAINHASH_SIZE); + pr_info("... MAX_LOCKDEP_SUBCLASSES: %lu\n", MAX_LOCKDEP_SUBCLASSES); + pr_info("... MAX_LOCK_DEPTH: %lu\n", MAX_LOCK_DEPTH); + pr_info("... MAX_LOCKDEP_KEYS: %lu\n", MAX_LOCKDEP_KEYS); + pr_info("... CLASSHASH_SIZE: %lu\n", CLASSHASH_SIZE); + pr_info("... MAX_LOCKDEP_ENTRIES: %lu\n", MAX_LOCKDEP_ENTRIES); + pr_info("... MAX_LOCKDEP_CHAINS: %lu\n", MAX_LOCKDEP_CHAINS); + pr_info("... CHAINHASH_SIZE: %lu\n", CHAINHASH_SIZE); - printk(" memory used by lock dependency info: %zu kB\n", + pr_info(" memory used by lock dependency info: %zu kB\n", (sizeof(lock_classes) + sizeof(lock_classes_in_use) + sizeof(classhash_table) + @@ -6628,12 +6628,12 @@ void __init lockdep_init(void) ); #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) - printk(" memory used for stack traces: %zu kB\n", + pr_info(" memory used for stack traces: %zu kB\n", (sizeof(stack_trace) + sizeof(stack_trace_hash)) / 1024 ); #endif - printk(" per task-struct memory footprint: %zu bytes\n", + pr_info(" per task-struct memory footprint: %zu bytes\n", sizeof(((struct task_struct *)NULL)->held_locks)); } -- cgit v1.2.3-70-g09d2 From 2628cbd03924b91a360f72117a9b9c78cfd050e7 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Fri, 9 Aug 2024 09:48:02 +0800 Subject: locking/pvqspinlock: Convert fields of 'enum vcpu_state' to uppercase Convert the fields of 'enum vcpu_state' to uppercase for better readability. No functional changes intended. Acked-by: Waiman Long Signed-off-by: Qiuxu Zhuo Signed-off-by: Boqun Feng Link: https://lore.kernel.org/r/20240809014802.15320-1-qiuxu.zhuo@intel.com --- kernel/locking/qspinlock_paravirt.h | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index ac2e22502741..dc1cb90e3644 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -38,13 +38,13 @@ #define PV_PREV_CHECK_MASK 0xff /* - * Queue node uses: vcpu_running & vcpu_halted. - * Queue head uses: vcpu_running & vcpu_hashed. + * Queue node uses: VCPU_RUNNING & VCPU_HALTED. + * Queue head uses: VCPU_RUNNING & VCPU_HASHED. */ enum vcpu_state { - vcpu_running = 0, - vcpu_halted, /* Used only in pv_wait_node */ - vcpu_hashed, /* = pv_hash'ed + vcpu_halted */ + VCPU_RUNNING = 0, + VCPU_HALTED, /* Used only in pv_wait_node */ + VCPU_HASHED, /* = pv_hash'ed + VCPU_HALTED */ }; struct pv_node { @@ -266,7 +266,7 @@ pv_wait_early(struct pv_node *prev, int loop) if ((loop & PV_PREV_CHECK_MASK) != 0) return false; - return READ_ONCE(prev->state) != vcpu_running; + return READ_ONCE(prev->state) != VCPU_RUNNING; } /* @@ -279,7 +279,7 @@ static void pv_init_node(struct mcs_spinlock *node) BUILD_BUG_ON(sizeof(struct pv_node) > sizeof(struct qnode)); pn->cpu = smp_processor_id(); - pn->state = vcpu_running; + pn->state = VCPU_RUNNING; } /* @@ -308,26 +308,26 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev) /* * Order pn->state vs pn->locked thusly: * - * [S] pn->state = vcpu_halted [S] next->locked = 1 + * [S] pn->state = VCPU_HALTED [S] next->locked = 1 * MB MB - * [L] pn->locked [RmW] pn->state = vcpu_hashed + * [L] pn->locked [RmW] pn->state = VCPU_HASHED * * Matches the cmpxchg() from pv_kick_node(). */ - smp_store_mb(pn->state, vcpu_halted); + smp_store_mb(pn->state, VCPU_HALTED); if (!READ_ONCE(node->locked)) { lockevent_inc(pv_wait_node); lockevent_cond_inc(pv_wait_early, wait_early); - pv_wait(&pn->state, vcpu_halted); + pv_wait(&pn->state, VCPU_HALTED); } /* - * If pv_kick_node() changed us to vcpu_hashed, retain that + * If pv_kick_node() changed us to VCPU_HASHED, retain that * value so that pv_wait_head_or_lock() knows to not also try * to hash this lock. */ - cmpxchg(&pn->state, vcpu_halted, vcpu_running); + cmpxchg(&pn->state, VCPU_HALTED, VCPU_RUNNING); /* * If the locked flag is still not set after wakeup, it is a @@ -357,7 +357,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev) static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) { struct pv_node *pn = (struct pv_node *)node; - u8 old = vcpu_halted; + u8 old = VCPU_HALTED; /* * If the vCPU is indeed halted, advance its state to match that of * pv_wait_node(). If OTOH this fails, the vCPU was running and will @@ -374,7 +374,7 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) * subsequent writes. */ smp_mb__before_atomic(); - if (!try_cmpxchg_relaxed(&pn->state, &old, vcpu_hashed)) + if (!try_cmpxchg_relaxed(&pn->state, &old, VCPU_HASHED)) return; /* @@ -407,7 +407,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) * If pv_kick_node() already advanced our state, we don't need to * insert ourselves into the hash table anymore. */ - if (READ_ONCE(pn->state) == vcpu_hashed) + if (READ_ONCE(pn->state) == VCPU_HASHED) lp = (struct qspinlock **)1; /* @@ -420,7 +420,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) * Set correct vCPU state to be used by queue node wait-early * mechanism. */ - WRITE_ONCE(pn->state, vcpu_running); + WRITE_ONCE(pn->state, VCPU_RUNNING); /* * Set the pending bit in the active lock spinning loop to @@ -460,7 +460,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) goto gotlock; } } - WRITE_ONCE(pn->state, vcpu_hashed); + WRITE_ONCE(pn->state, VCPU_HASHED); lockevent_inc(pv_wait_head); lockevent_cond_inc(pv_wait_again, waitcnt); pv_wait(&lock->locked, _Q_SLOW_VAL); -- cgit v1.2.3-70-g09d2 From 168660b826a77fda28235e0b0b3027041d6a5240 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 12 Aug 2024 12:39:04 +0200 Subject: locking/rt: Add sparse annotation for RCU. Every lock, that becomes a sleeping lock on PREEMPT_RT, starts a RCU read side critical section. There is no sparse annotation for this and sparse complains about unbalanced locking. Add __acquires/ __releases for the RCU lock. This covers all but the trylock functions. A __cond_acquires() annotation didn't work. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20240812104200.2239232-4-bigeasy@linutronix.de --- kernel/locking/spinlock_rt.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c index 38e292454fcc..d1cf8b2b6dca 100644 --- a/kernel/locking/spinlock_rt.c +++ b/kernel/locking/spinlock_rt.c @@ -51,7 +51,7 @@ static __always_inline void __rt_spin_lock(spinlock_t *lock) migrate_disable(); } -void __sched rt_spin_lock(spinlock_t *lock) +void __sched rt_spin_lock(spinlock_t *lock) __acquires(RCU) { spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); __rt_spin_lock(lock); @@ -75,7 +75,7 @@ void __sched rt_spin_lock_nest_lock(spinlock_t *lock, EXPORT_SYMBOL(rt_spin_lock_nest_lock); #endif -void __sched rt_spin_unlock(spinlock_t *lock) +void __sched rt_spin_unlock(spinlock_t *lock) __releases(RCU) { spin_release(&lock->dep_map, _RET_IP_); migrate_enable(); @@ -225,7 +225,7 @@ int __sched rt_write_trylock(rwlock_t *rwlock) } EXPORT_SYMBOL(rt_write_trylock); -void __sched rt_read_lock(rwlock_t *rwlock) +void __sched rt_read_lock(rwlock_t *rwlock) __acquires(RCU) { rtlock_might_resched(); rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_); @@ -235,7 +235,7 @@ void __sched rt_read_lock(rwlock_t *rwlock) } EXPORT_SYMBOL(rt_read_lock); -void __sched rt_write_lock(rwlock_t *rwlock) +void __sched rt_write_lock(rwlock_t *rwlock) __acquires(RCU) { rtlock_might_resched(); rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); @@ -246,7 +246,7 @@ void __sched rt_write_lock(rwlock_t *rwlock) EXPORT_SYMBOL(rt_write_lock); #ifdef CONFIG_DEBUG_LOCK_ALLOC -void __sched rt_write_lock_nested(rwlock_t *rwlock, int subclass) +void __sched rt_write_lock_nested(rwlock_t *rwlock, int subclass) __acquires(RCU) { rtlock_might_resched(); rwlock_acquire(&rwlock->dep_map, subclass, 0, _RET_IP_); @@ -257,7 +257,7 @@ void __sched rt_write_lock_nested(rwlock_t *rwlock, int subclass) EXPORT_SYMBOL(rt_write_lock_nested); #endif -void __sched rt_read_unlock(rwlock_t *rwlock) +void __sched rt_read_unlock(rwlock_t *rwlock) __releases(RCU) { rwlock_release(&rwlock->dep_map, _RET_IP_); migrate_enable(); @@ -266,7 +266,7 @@ void __sched rt_read_unlock(rwlock_t *rwlock) } EXPORT_SYMBOL(rt_read_unlock); -void __sched rt_write_unlock(rwlock_t *rwlock) +void __sched rt_write_unlock(rwlock_t *rwlock) __releases(RCU) { rwlock_release(&rwlock->dep_map, _RET_IP_); rcu_read_unlock(); -- cgit v1.2.3-70-g09d2 From 77abd3b7d9bf384306872b6201b1dfeb1e899892 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 12 Aug 2024 12:39:05 +0200 Subject: locking/rt: Annotate unlock followed by lock for sparse. rt_mutex_slowlock_block() and rtlock_slowlock_locked() both unlock lock::wait_lock and then lock it later. This is unusual and sparse complains about it. Add __releases() + __acquires() annotation to mark that it is expected. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20240812104200.2239232-5-bigeasy@linutronix.de --- kernel/locking/rtmutex.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index ebebd0eec7f6..d3b72c2f983f 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1601,6 +1601,7 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock, unsigned int state, struct hrtimer_sleeper *timeout, struct rt_mutex_waiter *waiter) + __releases(&lock->wait_lock) __acquires(&lock->wait_lock) { struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex); struct task_struct *owner; @@ -1805,6 +1806,7 @@ static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock, * @lock: The underlying RT mutex */ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) + __releases(&lock->wait_lock) __acquires(&lock->wait_lock) { struct rt_mutex_waiter waiter; struct task_struct *owner; -- cgit v1.2.3-70-g09d2 From d12b802f183667d4c28589314c99c380a458d57e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Oct 2024 11:26:06 +0200 Subject: locking/rtmutex: Fix misleading comment Going through the RCU-boost and rtmutex code, I ran into this utterly confusing comment. Fix it to avoid confusing future readers. [ tglx: Wordsmithed the comment ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/all/20241008092606.GJ33184@noisy.programming.kicks-ass.net --- kernel/locking/rtmutex_api.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c index a6974d044593..7e79258feb27 100644 --- a/kernel/locking/rtmutex_api.c +++ b/kernel/locking/rtmutex_api.c @@ -175,10 +175,10 @@ bool __sched __rt_mutex_futex_unlock(struct rt_mutex_base *lock, } /* - * We've already deboosted, mark_wakeup_next_waiter() will - * retain preempt_disabled when we drop the wait_lock, to - * avoid inversion prior to the wakeup. preempt_disable() - * therein pairs with rt_mutex_postunlock(). + * mark_wakeup_next_waiter() deboosts and retains preemption + * disabled when dropping the wait_lock, to avoid inversion prior + * to the wakeup. preempt_disable() therein pairs with the + * preempt_enable() in rt_mutex_postunlock(). */ mark_wakeup_next_waiter(wqh, lock); -- cgit v1.2.3-70-g09d2 From 0d75e0c420e52b4057a2de274054a5274209a2ae Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 1 Oct 2024 13:45:57 +0200 Subject: locking/osq_lock: Use atomic_try_cmpxchg_release() in osq_unlock() Replace this pattern in osq_unlock(): atomic_cmpxchg(*ptr, old, new) == old ... with the simpler and faster: atomic_try_cmpxchg(*ptr, &old, new) The x86 CMPXCHG instruction returns success in the ZF flag, so this change saves a compare after the CMPXCHG. The code in the fast path of osq_unlock() improves from: 11b: 31 c9 xor %ecx,%ecx 11d: 8d 50 01 lea 0x1(%rax),%edx 120: 89 d0 mov %edx,%eax 122: f0 0f b1 0f lock cmpxchg %ecx,(%rdi) 126: 39 c2 cmp %eax,%edx 128: 75 05 jne 12f <...> to: 12b: 31 d2 xor %edx,%edx 12d: 83 c0 01 add $0x1,%eax 130: f0 0f b1 17 lock cmpxchg %edx,(%rdi) 134: 75 05 jne 13b <...> Signed-off-by: Uros Bizjak Signed-off-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Link: https://lore.kernel.org/r/20241001114606.820277-1-ubizjak@gmail.com --- kernel/locking/osq_lock.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c index 75a6f6133866..b4233dc2c2b0 100644 --- a/kernel/locking/osq_lock.c +++ b/kernel/locking/osq_lock.c @@ -215,8 +215,7 @@ void osq_unlock(struct optimistic_spin_queue *lock) /* * Fast path for the uncontended case. */ - if (likely(atomic_cmpxchg_release(&lock->tail, curr, - OSQ_UNLOCKED_VAL) == curr)) + if (atomic_try_cmpxchg_release(&lock->tail, &curr, OSQ_UNLOCKED_VAL)) return; /* -- cgit v1.2.3-70-g09d2 From 1139c71df5ca29a36f08e3a08c7cee160db21ec1 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 4 Nov 2024 16:43:05 +0100 Subject: time/sched_clock: Swap update_clock_read_data() latch writes Swap the writes to the odd and even copies to make the writer critical section look like all other seqcount_latch writers. Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20241104161910.780003-2-elver@google.com --- kernel/time/sched_clock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 68d6c1190ac7..85595fcf6aa2 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -119,9 +119,6 @@ unsigned long long notrace sched_clock(void) */ static void update_clock_read_data(struct clock_read_data *rd) { - /* update the backup (odd) copy with the new data */ - cd.read_data[1] = *rd; - /* steer readers towards the odd copy */ raw_write_seqcount_latch(&cd.seq); @@ -130,6 +127,9 @@ static void update_clock_read_data(struct clock_read_data *rd) /* switch readers back to the even copy */ raw_write_seqcount_latch(&cd.seq); + + /* update the backup (odd) copy with the new data */ + cd.read_data[1] = *rd; } /* -- cgit v1.2.3-70-g09d2 From 8ab40fc2b9086b915e46890bb9252dc7692f1da0 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 4 Nov 2024 16:43:06 +0100 Subject: time/sched_clock: Broaden sched_clock()'s instrumentation coverage Most of sched_clock()'s implementation is ineligible for instrumentation due to relying on sched_clock_noinstr(). Split the implementation off into an __always_inline function __sched_clock(), which is then used by the noinstr and instrumentable version, to allow more of sched_clock() to be covered by various instrumentation. This will allow instrumentation with the various sanitizers (KASAN, KCSAN, KMSAN, UBSAN). For KCSAN, we know that raw seqcount_latch usage without annotations will result in false positive reports: tell it that all of __sched_clock() is "atomic" for the latch reader; later changes in this series will take care of the writers. Co-developed-by: "Peter Zijlstra (Intel)" Signed-off-by: "Peter Zijlstra (Intel)" Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20241104161910.780003-3-elver@google.com --- kernel/time/sched_clock.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 85595fcf6aa2..29bdf309dae8 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -80,7 +80,7 @@ notrace int sched_clock_read_retry(unsigned int seq) return raw_read_seqcount_latch_retry(&cd.seq, seq); } -unsigned long long noinstr sched_clock_noinstr(void) +static __always_inline unsigned long long __sched_clock(void) { struct clock_read_data *rd; unsigned int seq; @@ -98,11 +98,23 @@ unsigned long long noinstr sched_clock_noinstr(void) return res; } +unsigned long long noinstr sched_clock_noinstr(void) +{ + return __sched_clock(); +} + unsigned long long notrace sched_clock(void) { unsigned long long ns; preempt_disable_notrace(); - ns = sched_clock_noinstr(); + /* + * All of __sched_clock() is a seqcount_latch reader critical section, + * but relies on the raw helpers which are uninstrumented. For KCSAN, + * mark all accesses in __sched_clock() as atomic. + */ + kcsan_nestable_atomic_begin(); + ns = __sched_clock(); + kcsan_nestable_atomic_end(); preempt_enable_notrace(); return ns; } -- cgit v1.2.3-70-g09d2 From 93190bc35d6d4364a4d8c38ac8961dabecbff4ed Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 4 Nov 2024 16:43:08 +0100 Subject: seqlock, treewide: Switch to non-raw seqcount_latch interface Switch all instrumentable users of the seqcount_latch interface over to the non-raw interface. Co-developed-by: "Peter Zijlstra (Intel)" Signed-off-by: "Peter Zijlstra (Intel)" Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20241104161910.780003-5-elver@google.com --- arch/x86/kernel/tsc.c | 5 +++-- include/linux/rbtree_latch.h | 20 +++++++++++--------- kernel/printk/printk.c | 9 +++++---- kernel/time/sched_clock.c | 12 +++++++----- kernel/time/timekeeping.c | 12 +++++++----- 5 files changed, 33 insertions(+), 25 deletions(-) (limited to 'kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index dfe6847fd99e..67aeaba4ba9c 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -174,10 +174,11 @@ static void __set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long ts c2n = per_cpu_ptr(&cyc2ns, cpu); - raw_write_seqcount_latch(&c2n->seq); + write_seqcount_latch_begin(&c2n->seq); c2n->data[0] = data; - raw_write_seqcount_latch(&c2n->seq); + write_seqcount_latch(&c2n->seq); c2n->data[1] = data; + write_seqcount_latch_end(&c2n->seq); } static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now) diff --git a/include/linux/rbtree_latch.h b/include/linux/rbtree_latch.h index 6a0999c26c7c..2f630eb8307e 100644 --- a/include/linux/rbtree_latch.h +++ b/include/linux/rbtree_latch.h @@ -14,7 +14,7 @@ * * If we need to allow unconditional lookups (say as required for NMI context * usage) we need a more complex setup; this data structure provides this by - * employing the latch technique -- see @raw_write_seqcount_latch -- to + * employing the latch technique -- see @write_seqcount_latch_begin -- to * implement a latched RB-tree which does allow for unconditional lookups by * virtue of always having (at least) one stable copy of the tree. * @@ -132,7 +132,7 @@ __lt_find(void *key, struct latch_tree_root *ltr, int idx, * @ops: operators defining the node order * * It inserts @node into @root in an ordered fashion such that we can always - * observe one complete tree. See the comment for raw_write_seqcount_latch(). + * observe one complete tree. See the comment for write_seqcount_latch_begin(). * * The inserts use rcu_assign_pointer() to publish the element such that the * tree structure is stored before we can observe the new @node. @@ -145,10 +145,11 @@ latch_tree_insert(struct latch_tree_node *node, struct latch_tree_root *root, const struct latch_tree_ops *ops) { - raw_write_seqcount_latch(&root->seq); + write_seqcount_latch_begin(&root->seq); __lt_insert(node, root, 0, ops->less); - raw_write_seqcount_latch(&root->seq); + write_seqcount_latch(&root->seq); __lt_insert(node, root, 1, ops->less); + write_seqcount_latch_end(&root->seq); } /** @@ -159,7 +160,7 @@ latch_tree_insert(struct latch_tree_node *node, * * Removes @node from the trees @root in an ordered fashion such that we can * always observe one complete tree. See the comment for - * raw_write_seqcount_latch(). + * write_seqcount_latch_begin(). * * It is assumed that @node will observe one RCU quiescent state before being * reused of freed. @@ -172,10 +173,11 @@ latch_tree_erase(struct latch_tree_node *node, struct latch_tree_root *root, const struct latch_tree_ops *ops) { - raw_write_seqcount_latch(&root->seq); + write_seqcount_latch_begin(&root->seq); __lt_erase(node, root, 0); - raw_write_seqcount_latch(&root->seq); + write_seqcount_latch(&root->seq); __lt_erase(node, root, 1); + write_seqcount_latch_end(&root->seq); } /** @@ -204,9 +206,9 @@ latch_tree_find(void *key, struct latch_tree_root *root, unsigned int seq; do { - seq = raw_read_seqcount_latch(&root->seq); + seq = read_seqcount_latch(&root->seq); node = __lt_find(key, root, seq & 1, ops->comp); - } while (raw_read_seqcount_latch_retry(&root->seq, seq)); + } while (read_seqcount_latch_retry(&root->seq, seq)); return node; } diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index beb808f4c367..19911c8fa7b6 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -560,10 +560,11 @@ bool printk_percpu_data_ready(void) /* Must be called under syslog_lock. */ static void latched_seq_write(struct latched_seq *ls, u64 val) { - raw_write_seqcount_latch(&ls->latch); + write_seqcount_latch_begin(&ls->latch); ls->val[0] = val; - raw_write_seqcount_latch(&ls->latch); + write_seqcount_latch(&ls->latch); ls->val[1] = val; + write_seqcount_latch_end(&ls->latch); } /* Can be called from any context. */ @@ -574,10 +575,10 @@ static u64 latched_seq_read_nolock(struct latched_seq *ls) u64 val; do { - seq = raw_read_seqcount_latch(&ls->latch); + seq = read_seqcount_latch(&ls->latch); idx = seq & 0x1; val = ls->val[idx]; - } while (raw_read_seqcount_latch_retry(&ls->latch, seq)); + } while (read_seqcount_latch_retry(&ls->latch, seq)); return val; } diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 29bdf309dae8..fcca4e72f1ef 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -71,13 +71,13 @@ static __always_inline u64 cyc_to_ns(u64 cyc, u32 mult, u32 shift) notrace struct clock_read_data *sched_clock_read_begin(unsigned int *seq) { - *seq = raw_read_seqcount_latch(&cd.seq); + *seq = read_seqcount_latch(&cd.seq); return cd.read_data + (*seq & 1); } notrace int sched_clock_read_retry(unsigned int seq) { - return raw_read_seqcount_latch_retry(&cd.seq, seq); + return read_seqcount_latch_retry(&cd.seq, seq); } static __always_inline unsigned long long __sched_clock(void) @@ -132,16 +132,18 @@ unsigned long long notrace sched_clock(void) static void update_clock_read_data(struct clock_read_data *rd) { /* steer readers towards the odd copy */ - raw_write_seqcount_latch(&cd.seq); + write_seqcount_latch_begin(&cd.seq); /* now its safe for us to update the normal (even) copy */ cd.read_data[0] = *rd; /* switch readers back to the even copy */ - raw_write_seqcount_latch(&cd.seq); + write_seqcount_latch(&cd.seq); /* update the backup (odd) copy with the new data */ cd.read_data[1] = *rd; + + write_seqcount_latch_end(&cd.seq); } /* @@ -279,7 +281,7 @@ void __init generic_sched_clock_init(void) */ static u64 notrace suspended_sched_clock_read(void) { - unsigned int seq = raw_read_seqcount_latch(&cd.seq); + unsigned int seq = read_seqcount_latch(&cd.seq); return cd.read_data[seq & 1].epoch_cyc; } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 7e6f409bf311..18752983e834 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -411,7 +411,7 @@ static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr) * We want to use this from any context including NMI and tracing / * instrumenting the timekeeping code itself. * - * Employ the latch technique; see @raw_write_seqcount_latch. + * Employ the latch technique; see @write_seqcount_latch. * * So if a NMI hits the update of base[0] then it will use base[1] * which is still consistent. In the worst case this can result is a @@ -424,16 +424,18 @@ static void update_fast_timekeeper(const struct tk_read_base *tkr, struct tk_read_base *base = tkf->base; /* Force readers off to base[1] */ - raw_write_seqcount_latch(&tkf->seq); + write_seqcount_latch_begin(&tkf->seq); /* Update base[0] */ memcpy(base, tkr, sizeof(*base)); /* Force readers back to base[0] */ - raw_write_seqcount_latch(&tkf->seq); + write_seqcount_latch(&tkf->seq); /* Update base[1] */ memcpy(base + 1, base, sizeof(*base)); + + write_seqcount_latch_end(&tkf->seq); } static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) @@ -443,11 +445,11 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) u64 now; do { - seq = raw_read_seqcount_latch(&tkf->seq); + seq = read_seqcount_latch(&tkf->seq); tkr = tkf->base + (seq & 0x01); now = ktime_to_ns(tkr->base); now += __timekeeping_get_ns(tkr); - } while (raw_read_seqcount_latch_retry(&tkf->seq, seq)); + } while (read_seqcount_latch_retry(&tkf->seq, seq)); return now; } -- cgit v1.2.3-70-g09d2