diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-11-26 16:02:40 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-11-26 16:02:40 -0800 |
commit | 168829ad09ca9cdfdc664b2110d0e3569932c12d (patch) | |
tree | 1b6351ab5766a272dec1fc08f77272a199bba978 /include/linux | |
parent | 1ae78780eda54023a0fb49ee743dbba39da148e0 (diff) | |
parent | 500543c53a54134ced386aed85cd93cf1363f981 (diff) |
Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molnar:
"The main changes in this cycle were:
- A comprehensive rewrite of the robust/PI futex code's exit handling
to fix various exit races. (Thomas Gleixner et al)
- Rework the generic REFCOUNT_FULL implementation using
atomic_fetch_* operations so that the performance impact of the
cmpxchg() loops is mitigated for common refcount operations.
With these performance improvements the generic implementation of
refcount_t should be good enough for everybody - and this got
confirmed by performance testing, so remove ARCH_HAS_REFCOUNT and
REFCOUNT_FULL entirely, leaving the generic implementation enabled
unconditionally. (Will Deacon)
- Other misc changes, fixes, cleanups"
* 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (27 commits)
lkdtm: Remove references to CONFIG_REFCOUNT_FULL
locking/refcount: Remove unused 'refcount_error_report()' function
locking/refcount: Consolidate implementations of refcount_t
locking/refcount: Consolidate REFCOUNT_{MAX,SATURATED} definitions
locking/refcount: Move saturation warnings out of line
locking/refcount: Improve performance of generic REFCOUNT_FULL code
locking/refcount: Move the bulk of the REFCOUNT_FULL implementation into the <linux/refcount.h> header
locking/refcount: Remove unused refcount_*_checked() variants
locking/refcount: Ensure integer operands are treated as signed
locking/refcount: Define constants for saturation and max refcount values
futex: Prevent exit livelock
futex: Provide distinct return value when owner is exiting
futex: Add mutex around futex exit
futex: Provide state handling for exec() as well
futex: Sanitize exit state handling
futex: Mark the begin of futex exit explicitly
futex: Set task::futex_state to DEAD right after handling futex exit
futex: Split futex_mm_release() for exit/exec
exit/exec: Seperate mm_release()
futex: Replace PF_EXITPIDONE with a state
...
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/compat.h | 2 | ||||
-rw-r--r-- | include/linux/futex.h | 40 | ||||
-rw-r--r-- | include/linux/jbd2.h | 2 | ||||
-rw-r--r-- | include/linux/kernel.h | 7 | ||||
-rw-r--r-- | include/linux/lockdep.h | 21 | ||||
-rw-r--r-- | include/linux/percpu-rwsem.h | 4 | ||||
-rw-r--r-- | include/linux/rcupdate.h | 2 | ||||
-rw-r--r-- | include/linux/refcount.h | 269 | ||||
-rw-r--r-- | include/linux/rwlock_api_smp.h | 16 | ||||
-rw-r--r-- | include/linux/sched.h | 3 | ||||
-rw-r--r-- | include/linux/sched/mm.h | 6 | ||||
-rw-r--r-- | include/linux/seqlock.h | 4 | ||||
-rw-r--r-- | include/linux/spinlock_api_smp.h | 8 | ||||
-rw-r--r-- | include/linux/ww_mutex.h | 2 |
14 files changed, 291 insertions, 95 deletions
diff --git a/include/linux/compat.h b/include/linux/compat.h index 16dafd9f4b86..c4c389c7e1b4 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -410,8 +410,6 @@ struct compat_kexec_segment; struct compat_mq_attr; struct compat_msgbuf; -extern void compat_exit_robust_list(struct task_struct *curr); - #define BITS_PER_COMPAT_LONG (8*sizeof(compat_long_t)) #define BITS_TO_COMPAT_LONGS(bits) DIV_ROUND_UP(bits, BITS_PER_COMPAT_LONG) diff --git a/include/linux/futex.h b/include/linux/futex.h index ccaef0097785..5cc3fed27d4c 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -2,7 +2,9 @@ #ifndef _LINUX_FUTEX_H #define _LINUX_FUTEX_H +#include <linux/sched.h> #include <linux/ktime.h> + #include <uapi/linux/futex.h> struct inode; @@ -48,15 +50,35 @@ union futex_key { #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } } #ifdef CONFIG_FUTEX -extern void exit_robust_list(struct task_struct *curr); +enum { + FUTEX_STATE_OK, + FUTEX_STATE_EXITING, + FUTEX_STATE_DEAD, +}; -long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, - u32 __user *uaddr2, u32 val2, u32 val3); -#else -static inline void exit_robust_list(struct task_struct *curr) +static inline void futex_init_task(struct task_struct *tsk) { + tsk->robust_list = NULL; +#ifdef CONFIG_COMPAT + tsk->compat_robust_list = NULL; +#endif + INIT_LIST_HEAD(&tsk->pi_state_list); + tsk->pi_state_cache = NULL; + tsk->futex_state = FUTEX_STATE_OK; + mutex_init(&tsk->futex_exit_mutex); } +void futex_exit_recursive(struct task_struct *tsk); +void futex_exit_release(struct task_struct *tsk); +void futex_exec_release(struct task_struct *tsk); + +long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, + u32 __user *uaddr2, u32 val2, u32 val3); +#else +static inline void futex_init_task(struct task_struct *tsk) { } +static inline void futex_exit_recursive(struct task_struct *tsk) { } +static inline void futex_exit_release(struct task_struct *tsk) { } +static inline void futex_exec_release(struct task_struct *tsk) { } static inline long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3) @@ -65,12 +87,4 @@ static inline long do_futex(u32 __user *uaddr, int op, u32 val, } #endif -#ifdef CONFIG_FUTEX_PI -extern void exit_pi_state_list(struct task_struct *curr); -#else -static inline void exit_pi_state_list(struct task_struct *curr) -{ -} -#endif - #endif diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 603fbc4e2f70..564793c24d12 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1170,7 +1170,7 @@ struct journal_s #define jbd2_might_wait_for_commit(j) \ do { \ rwsem_acquire(&j->j_trans_commit_map, 0, 0, _THIS_IP_); \ - rwsem_release(&j->j_trans_commit_map, 1, _THIS_IP_); \ + rwsem_release(&j->j_trans_commit_map, _THIS_IP_); \ } while (0) /* journal feature predicate functions */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d83d403dac2e..09f759228e3f 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -328,13 +328,6 @@ extern int oops_may_print(void); void do_exit(long error_code) __noreturn; void complete_and_exit(struct completion *, long) __noreturn; -#ifdef CONFIG_ARCH_HAS_REFCOUNT -void refcount_error_report(struct pt_regs *regs, const char *err); -#else -static inline void refcount_error_report(struct pt_regs *regs, const char *err) -{ } -#endif - /* Internal, do not use. */ int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res); int __must_check _kstrtol(const char *s, unsigned int base, long *res); diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index b8a835fd611b..c50d01ef1414 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -349,8 +349,7 @@ extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass, int trylock, int read, int check, struct lockdep_map *nest_lock, unsigned long ip); -extern void lock_release(struct lockdep_map *lock, int nested, - unsigned long ip); +extern void lock_release(struct lockdep_map *lock, unsigned long ip); /* * Same "read" as for lock_acquire(), except -1 means any. @@ -428,7 +427,7 @@ static inline void lockdep_set_selftest_task(struct task_struct *task) } # define lock_acquire(l, s, t, r, c, n, i) do { } while (0) -# define lock_release(l, n, i) do { } while (0) +# define lock_release(l, i) do { } while (0) # define lock_downgrade(l, i) do { } while (0) # define lock_set_class(l, n, k, s, i) do { } while (0) # define lock_set_subclass(l, s, i) do { } while (0) @@ -591,42 +590,42 @@ static inline void print_irqtrace_events(struct task_struct *curr) #define spin_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) #define spin_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i) -#define spin_release(l, n, i) lock_release(l, n, i) +#define spin_release(l, i) lock_release(l, i) #define rwlock_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) #define rwlock_acquire_read(l, s, t, i) lock_acquire_shared_recursive(l, s, t, NULL, i) -#define rwlock_release(l, n, i) lock_release(l, n, i) +#define rwlock_release(l, i) lock_release(l, i) #define seqcount_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) #define seqcount_acquire_read(l, s, t, i) lock_acquire_shared_recursive(l, s, t, NULL, i) -#define seqcount_release(l, n, i) lock_release(l, n, i) +#define seqcount_release(l, i) lock_release(l, i) #define mutex_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) #define mutex_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i) -#define mutex_release(l, n, i) lock_release(l, n, i) +#define mutex_release(l, i) lock_release(l, i) #define rwsem_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) #define rwsem_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i) #define rwsem_acquire_read(l, s, t, i) lock_acquire_shared(l, s, t, NULL, i) -#define rwsem_release(l, n, i) lock_release(l, n, i) +#define rwsem_release(l, i) lock_release(l, i) #define lock_map_acquire(l) lock_acquire_exclusive(l, 0, 0, NULL, _THIS_IP_) #define lock_map_acquire_read(l) lock_acquire_shared_recursive(l, 0, 0, NULL, _THIS_IP_) #define lock_map_acquire_tryread(l) lock_acquire_shared_recursive(l, 0, 1, NULL, _THIS_IP_) -#define lock_map_release(l) lock_release(l, 1, _THIS_IP_) +#define lock_map_release(l) lock_release(l, _THIS_IP_) #ifdef CONFIG_PROVE_LOCKING # define might_lock(lock) \ do { \ typecheck(struct lockdep_map *, &(lock)->dep_map); \ lock_acquire(&(lock)->dep_map, 0, 0, 0, 1, NULL, _THIS_IP_); \ - lock_release(&(lock)->dep_map, 0, _THIS_IP_); \ + lock_release(&(lock)->dep_map, _THIS_IP_); \ } while (0) # define might_lock_read(lock) \ do { \ typecheck(struct lockdep_map *, &(lock)->dep_map); \ lock_acquire(&(lock)->dep_map, 0, 0, 1, 1, NULL, _THIS_IP_); \ - lock_release(&(lock)->dep_map, 0, _THIS_IP_); \ + lock_release(&(lock)->dep_map, _THIS_IP_); \ } while (0) #define lockdep_assert_irqs_enabled() do { \ diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 3998cdf9cd14..ad2ca2a89d5b 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -93,7 +93,7 @@ static inline void percpu_up_read(struct percpu_rw_semaphore *sem) __percpu_up_read(sem); /* Unconditional memory barrier */ preempt_enable(); - rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_); + rwsem_release(&sem->rw_sem.dep_map, _RET_IP_); } extern void percpu_down_write(struct percpu_rw_semaphore *); @@ -118,7 +118,7 @@ extern void percpu_free_rwsem(struct percpu_rw_semaphore *); static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem, bool read, unsigned long ip) { - lock_release(&sem->rw_sem.dep_map, 1, ip); + lock_release(&sem->rw_sem.dep_map, ip); #ifdef CONFIG_RWSEM_SPIN_ON_OWNER if (!read) atomic_long_set(&sem->rw_sem.owner, RWSEM_OWNER_UNKNOWN); diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 185dd9736863..0b7506330c87 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -210,7 +210,7 @@ static inline void rcu_lock_acquire(struct lockdep_map *map) static inline void rcu_lock_release(struct lockdep_map *map) { - lock_release(map, 1, _THIS_IP_); + lock_release(map, _THIS_IP_); } extern struct lockdep_map rcu_lock_map; diff --git a/include/linux/refcount.h b/include/linux/refcount.h index e28cce21bad6..0ac50cf62d06 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -1,9 +1,88 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * Variant of atomic_t specialized for reference counts. + * + * The interface matches the atomic_t interface (to aid in porting) but only + * provides the few functions one should use for reference counting. + * + * Saturation semantics + * ==================== + * + * refcount_t differs from atomic_t in that the counter saturates at + * REFCOUNT_SATURATED and will not move once there. This avoids wrapping the + * counter and causing 'spurious' use-after-free issues. In order to avoid the + * cost associated with introducing cmpxchg() loops into all of the saturating + * operations, we temporarily allow the counter to take on an unchecked value + * and then explicitly set it to REFCOUNT_SATURATED on detecting that underflow + * or overflow has occurred. Although this is racy when multiple threads + * access the refcount concurrently, by placing REFCOUNT_SATURATED roughly + * equidistant from 0 and INT_MAX we minimise the scope for error: + * + * INT_MAX REFCOUNT_SATURATED UINT_MAX + * 0 (0x7fff_ffff) (0xc000_0000) (0xffff_ffff) + * +--------------------------------+----------------+----------------+ + * <---------- bad value! ----------> + * + * (in a signed view of the world, the "bad value" range corresponds to + * a negative counter value). + * + * As an example, consider a refcount_inc() operation that causes the counter + * to overflow: + * + * int old = atomic_fetch_add_relaxed(r); + * // old is INT_MAX, refcount now INT_MIN (0x8000_0000) + * if (old < 0) + * atomic_set(r, REFCOUNT_SATURATED); + * + * If another thread also performs a refcount_inc() operation between the two + * atomic operations, then the count will continue to edge closer to 0. If it + * reaches a value of 1 before /any/ of the threads reset it to the saturated + * value, then a concurrent refcount_dec_and_test() may erroneously free the + * underlying object. Given the precise timing details involved with the + * round-robin scheduling of each thread manipulating the refcount and the need + * to hit the race multiple times in succession, there doesn't appear to be a + * practical avenue of attack even if using refcount_add() operations with + * larger increments. + * + * Memory ordering + * =============== + * + * Memory ordering rules are slightly relaxed wrt regular atomic_t functions + * and provide only what is strictly required for refcounts. + * + * The increments are fully relaxed; these will not provide ordering. The + * rationale is that whatever is used to obtain the object we're increasing the + * reference count on will provide the ordering. For locked data structures, + * its the lock acquire, for RCU/lockless data structures its the dependent + * load. + * + * Do note that inc_not_zero() provides a control dependency which will order + * future stores against the inc, this ensures we'll never modify the object + * if we did not in fact acquire a reference. + * + * The decrements will provide release order, such that all the prior loads and + * stores will be issued before, it also provides a control dependency, which + * will order us against the subsequent free(). + * + * The control dependency is against the load of the cmpxchg (ll/sc) that + * succeeded. This means the stores aren't fully ordered, but this is fine + * because the 1->0 transition indicates no concurrency. + * + * Note that the allocator is responsible for ordering things between free() + * and alloc(). + * + * The decrements dec_and_test() and sub_and_test() also provide acquire + * ordering on success. + * + */ + #ifndef _LINUX_REFCOUNT_H #define _LINUX_REFCOUNT_H #include <linux/atomic.h> +#include <linux/bug.h> #include <linux/compiler.h> +#include <linux/limits.h> #include <linux/spinlock_types.h> struct mutex; @@ -12,7 +91,7 @@ struct mutex; * struct refcount_t - variant of atomic_t specialized for reference counts * @refs: atomic_t counter field * - * The counter saturates at UINT_MAX and will not move once + * The counter saturates at REFCOUNT_SATURATED and will not move once * there. This avoids wrapping the counter and causing 'spurious' * use-after-free bugs. */ @@ -21,13 +100,25 @@ typedef struct refcount_struct { } refcount_t; #define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), } +#define REFCOUNT_MAX INT_MAX +#define REFCOUNT_SATURATED (INT_MIN / 2) + +enum refcount_saturation_type { + REFCOUNT_ADD_NOT_ZERO_OVF, + REFCOUNT_ADD_OVF, + REFCOUNT_ADD_UAF, + REFCOUNT_SUB_UAF, + REFCOUNT_DEC_LEAK, +}; + +void refcount_warn_saturate(refcount_t *r, enum refcount_saturation_type t); /** * refcount_set - set a refcount's value * @r: the refcount * @n: value to which the refcount will be set */ -static inline void refcount_set(refcount_t *r, unsigned int n) +static inline void refcount_set(refcount_t *r, int n) { atomic_set(&r->refs, n); } @@ -43,70 +134,168 @@ static inline unsigned int refcount_read(const refcount_t *r) return atomic_read(&r->refs); } -extern __must_check bool refcount_add_not_zero_checked(unsigned int i, refcount_t *r); -extern void refcount_add_checked(unsigned int i, refcount_t *r); - -extern __must_check bool refcount_inc_not_zero_checked(refcount_t *r); -extern void refcount_inc_checked(refcount_t *r); - -extern __must_check bool refcount_sub_and_test_checked(unsigned int i, refcount_t *r); - -extern __must_check bool refcount_dec_and_test_checked(refcount_t *r); -extern void refcount_dec_checked(refcount_t *r); - -#ifdef CONFIG_REFCOUNT_FULL - -#define refcount_add_not_zero refcount_add_not_zero_checked -#define refcount_add refcount_add_checked - -#define refcount_inc_not_zero refcount_inc_not_zero_checked -#define refcount_inc refcount_inc_checked +/** + * refcount_add_not_zero - add a value to a refcount unless it is 0 + * @i: the value to add to the refcount + * @r: the refcount + * + * Will saturate at REFCOUNT_SATURATED and WARN. + * + * Provides no memory ordering, it is assumed the caller has guaranteed the + * object memory to be stable (RCU, etc.). It does provide a control dependency + * and thereby orders future stores. See the comment on top. + * + * Use of this function is not recommended for the normal reference counting + * use case in which references are taken and released one at a time. In these + * cases, refcount_inc(), or one of its variants, should instead be used to + * increment a reference count. + * + * Return: false if the passed refcount is 0, true otherwise + */ +static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) +{ + int old = refcount_read(r); -#define refcount_sub_and_test refcount_sub_and_test_checked + do { + if (!old) + break; + } while (!atomic_try_cmpxchg_relaxed(&r->refs, &old, old + i)); -#define refcount_dec_and_test refcount_dec_and_test_checked -#define refcount_dec refcount_dec_checked + if (unlikely(old < 0 || old + i < 0)) + refcount_warn_saturate(r, REFCOUNT_ADD_NOT_ZERO_OVF); -#else -# ifdef CONFIG_ARCH_HAS_REFCOUNT -# include <asm/refcount.h> -# else -static inline __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r) -{ - return atomic_add_unless(&r->refs, i, 0); + return old; } -static inline void refcount_add(unsigned int i, refcount_t *r) +/** + * refcount_add - add a value to a refcount + * @i: the value to add to the refcount + * @r: the refcount + * + * Similar to atomic_add(), but will saturate at REFCOUNT_SATURATED and WARN. + * + * Provides no memory ordering, it is assumed the caller has guaranteed the + * object memory to be stable (RCU, etc.). It does provide a control dependency + * and thereby orders future stores. See the comment on top. + * + * Use of this function is not recommended for the normal reference counting + * use case in which references are taken and released one at a time. In these + * cases, refcount_inc(), or one of its variants, should instead be used to + * increment a reference count. + */ +static inline void refcount_add(int i, refcount_t *r) { - atomic_add(i, &r->refs); + int old = atomic_fetch_add_relaxed(i, &r->refs); + + if (unlikely(!old)) + refcount_warn_saturate(r, REFCOUNT_ADD_UAF); + else if (unlikely(old < 0 || old + i < 0)) + refcount_warn_saturate(r, REFCOUNT_ADD_OVF); } +/** + * refcount_inc_not_zero - increment a refcount unless it is 0 + * @r: the refcount to increment + * + * Similar to atomic_inc_not_zero(), but will saturate at REFCOUNT_SATURATED + * and WARN. + * + * Provides no memory ordering, it is assumed the caller has guaranteed the + * object memory to be stable (RCU, etc.). It does provide a control dependency + * and thereby orders future stores. See the comment on top. + * + * Return: true if the increment was successful, false otherwise + */ static inline __must_check bool refcount_inc_not_zero(refcount_t *r) { - return atomic_add_unless(&r->refs, 1, 0); + return refcount_add_not_zero(1, r); } +/** + * refcount_inc - increment a refcount + * @r: the refcount to increment + * + * Similar to atomic_inc(), but will saturate at REFCOUNT_SATURATED and WARN. + * + * Provides no memory ordering, it is assumed the caller already has a + * reference on the object. + * + * Will WARN if the refcount is 0, as this represents a possible use-after-free + * condition. + */ static inline void refcount_inc(refcount_t *r) { - atomic_inc(&r->refs); + refcount_add(1, r); } -static inline __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r) +/** + * refcount_sub_and_test - subtract from a refcount and test if it is 0 + * @i: amount to subtract from the refcount + * @r: the refcount + * + * Similar to atomic_dec_and_test(), but it will WARN, return false and + * ultimately leak on underflow and will fail to decrement when saturated + * at REFCOUNT_SATURATED. + * + * Provides release memory ordering, such that prior loads and stores are done + * before, and provides an acquire ordering on success such that free() + * must come after. + * + * Use of this function is not recommended for the normal reference counting + * use case in which references are taken and released one at a time. In these + * cases, refcount_dec(), or one of its variants, should instead be used to + * decrement a reference count. + * + * Return: true if the resulting refcount is 0, false otherwise + */ +static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r) { - return atomic_sub_and_test(i, &r->refs); + int old = atomic_fetch_sub_release(i, &r->refs); + + if (old == i) { + smp_acquire__after_ctrl_dep(); + return true; + } + + if (unlikely(old < 0 || old - i < 0)) + refcount_warn_saturate(r, REFCOUNT_SUB_UAF); + + return false; } +/** + * refcount_dec_and_test - decrement a refcount and test if it is 0 + * @r: the refcount + * + * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to + * decrement when saturated at REFCOUNT_SATURATED. + * + * Provides release memory ordering, such that prior loads and stores are done + * before, and provides an acquire ordering on success such that free() + * must come after. + * + * Return: true if the resulting refcount is 0, false otherwise + */ static inline __must_check bool refcount_dec_and_test(refcount_t *r) { - return atomic_dec_and_test(&r->refs); + return refcount_sub_and_test(1, r); } +/** + * refcount_dec - decrement a refcount + * @r: the refcount + * + * Similar to atomic_dec(), it will WARN on underflow and fail to decrement + * when saturated at REFCOUNT_SATURATED. + * + * Provides release memory ordering, such that prior loads and stores are done + * before. + */ static inline void refcount_dec(refcount_t *r) { - atomic_dec(&r->refs); + if (unlikely(atomic_fetch_sub_release(1, &r->refs) <= 1)) + refcount_warn_saturate(r, REFCOUNT_DEC_LEAK); } -# endif /* !CONFIG_ARCH_HAS_REFCOUNT */ -#endif /* CONFIG_REFCOUNT_FULL */ extern __must_check bool refcount_dec_if_one(refcount_t *r); extern __must_check bool refcount_dec_not_one(refcount_t *r); diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h index 86ebb4bf9c6e..abfb53ab11be 100644 --- a/include/linux/rwlock_api_smp.h +++ b/include/linux/rwlock_api_smp.h @@ -215,14 +215,14 @@ static inline void __raw_write_lock(rwlock_t *lock) static inline void __raw_write_unlock(rwlock_t *lock) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); + rwlock_release(&lock->dep_map, _RET_IP_); do_raw_write_unlock(lock); preempt_enable(); } static inline void __raw_read_unlock(rwlock_t *lock) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); + rwlock_release(&lock->dep_map, _RET_IP_); do_raw_read_unlock(lock); preempt_enable(); } @@ -230,7 +230,7 @@ static inline void __raw_read_unlock(rwlock_t *lock) static inline void __raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); + rwlock_release(&lock->dep_map, _RET_IP_); do_raw_read_unlock(lock); local_irq_restore(flags); preempt_enable(); @@ -238,7 +238,7 @@ __raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) static inline void __raw_read_unlock_irq(rwlock_t *lock) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); + rwlock_release(&lock->dep_map, _RET_IP_); do_raw_read_unlock(lock); local_irq_enable(); preempt_enable(); @@ -246,7 +246,7 @@ static inline void __raw_read_unlock_irq(rwlock_t *lock) static inline void __raw_read_unlock_bh(rwlock_t *lock) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); + rwlock_release(&lock->dep_map, _RET_IP_); do_raw_read_unlock(lock); __local_bh_enable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET); } @@ -254,7 +254,7 @@ static inline void __raw_read_unlock_bh(rwlock_t *lock) static inline void __raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); + rwlock_release(&lock->dep_map, _RET_IP_); do_raw_write_unlock(lock); local_irq_restore(flags); preempt_enable(); @@ -262,7 +262,7 @@ static inline void __raw_write_unlock_irqrestore(rwlock_t *lock, static inline void __raw_write_unlock_irq(rwlock_t *lock) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); + rwlock_release(&lock->dep_map, _RET_IP_); do_raw_write_unlock(lock); local_irq_enable(); preempt_enable(); @@ -270,7 +270,7 @@ static inline void __raw_write_unlock_irq(rwlock_t *lock) static inline void __raw_write_unlock_bh(rwlock_t *lock) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); + rwlock_release(&lock->dep_map, _RET_IP_); do_raw_write_unlock(lock); __local_bh_enable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET); } diff --git a/include/linux/sched.h b/include/linux/sched.h index f72984f94a5c..07e68d9f5dc4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1059,6 +1059,8 @@ struct task_struct { #endif struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; + struct mutex futex_exit_mutex; + unsigned int futex_state; #endif #ifdef CONFIG_PERF_EVENTS struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; @@ -1447,7 +1449,6 @@ extern struct pid *cad_pid; */ #define PF_IDLE 0x00000002 /* I am an IDLE thread */ #define PF_EXITING 0x00000004 /* Getting shut down */ -#define PF_EXITPIDONE 0x00000008 /* PI exit done on shut down */ #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ #define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ #define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */ diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index e6770012db18..c49257a3b510 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -117,8 +117,10 @@ extern struct mm_struct *get_task_mm(struct task_struct *task); * succeeds. */ extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); -/* Remove the current tasks stale references to the old mm_struct */ -extern void mm_release(struct task_struct *, struct mm_struct *); +/* Remove the current tasks stale references to the old mm_struct on exit() */ +extern void exit_mm_release(struct task_struct *, struct mm_struct *); +/* Remove the current tasks stale references to the old mm_struct on exec() */ +extern void exec_mm_release(struct task_struct *, struct mm_struct *); #ifdef CONFIG_MEMCG extern void mm_update_next_owner(struct mm_struct *mm); diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index bcf4cf26b8c8..0491d963d47e 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -79,7 +79,7 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) local_irq_save(flags); seqcount_acquire_read(&l->dep_map, 0, 0, _RET_IP_); - seqcount_release(&l->dep_map, 1, _RET_IP_); + seqcount_release(&l->dep_map, _RET_IP_); local_irq_restore(flags); } @@ -384,7 +384,7 @@ static inline void write_seqcount_begin(seqcount_t *s) static inline void write_seqcount_end(seqcount_t *s) { - seqcount_release(&s->dep_map, 1, _RET_IP_); + seqcount_release(&s->dep_map, _RET_IP_); raw_write_seqcount_end(s); } diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index b762eaba4cdf..19a9be9d97ee 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -147,7 +147,7 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock) static inline void __raw_spin_unlock(raw_spinlock_t *lock) { - spin_release(&lock->dep_map, 1, _RET_IP_); + spin_release(&lock->dep_map, _RET_IP_); do_raw_spin_unlock(lock); preempt_enable(); } @@ -155,7 +155,7 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock) static inline void __raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags) { - spin_release(&lock->dep_map, 1, _RET_IP_); + spin_release(&lock->dep_map, _RET_IP_); do_raw_spin_unlock(lock); local_irq_restore(flags); preempt_enable(); @@ -163,7 +163,7 @@ static inline void __raw_spin_unlock_irqrestore(raw_spinlock_t *lock, static inline void __raw_spin_unlock_irq(raw_spinlock_t *lock) { - spin_release(&lock->dep_map, 1, _RET_IP_); + spin_release(&lock->dep_map, _RET_IP_); do_raw_spin_unlock(lock); local_irq_enable(); preempt_enable(); @@ -171,7 +171,7 @@ static inline void __raw_spin_unlock_irq(raw_spinlock_t *lock) static inline void __raw_spin_unlock_bh(raw_spinlock_t *lock) { - spin_release(&lock->dep_map, 1, _RET_IP_); + spin_release(&lock->dep_map, _RET_IP_); do_raw_spin_unlock(lock); __local_bh_enable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET); } diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index 3af7c0e03be5..d7554252404c 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -182,7 +182,7 @@ static inline void ww_acquire_done(struct ww_acquire_ctx *ctx) static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) { #ifdef CONFIG_DEBUG_MUTEXES - mutex_release(&ctx->dep_map, 0, _THIS_IP_); + mutex_release(&ctx->dep_map, _THIS_IP_); DEBUG_LOCKS_WARN_ON(ctx->acquired); if (!IS_ENABLED(CONFIG_PROVE_LOCKING)) |