summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-11-26 16:02:40 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2019-11-26 16:02:40 -0800
commit168829ad09ca9cdfdc664b2110d0e3569932c12d (patch)
tree1b6351ab5766a272dec1fc08f77272a199bba978 /include
parent1ae78780eda54023a0fb49ee743dbba39da148e0 (diff)
parent500543c53a54134ced386aed85cd93cf1363f981 (diff)
Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molnar: "The main changes in this cycle were: - A comprehensive rewrite of the robust/PI futex code's exit handling to fix various exit races. (Thomas Gleixner et al) - Rework the generic REFCOUNT_FULL implementation using atomic_fetch_* operations so that the performance impact of the cmpxchg() loops is mitigated for common refcount operations. With these performance improvements the generic implementation of refcount_t should be good enough for everybody - and this got confirmed by performance testing, so remove ARCH_HAS_REFCOUNT and REFCOUNT_FULL entirely, leaving the generic implementation enabled unconditionally. (Will Deacon) - Other misc changes, fixes, cleanups" * 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (27 commits) lkdtm: Remove references to CONFIG_REFCOUNT_FULL locking/refcount: Remove unused 'refcount_error_report()' function locking/refcount: Consolidate implementations of refcount_t locking/refcount: Consolidate REFCOUNT_{MAX,SATURATED} definitions locking/refcount: Move saturation warnings out of line locking/refcount: Improve performance of generic REFCOUNT_FULL code locking/refcount: Move the bulk of the REFCOUNT_FULL implementation into the <linux/refcount.h> header locking/refcount: Remove unused refcount_*_checked() variants locking/refcount: Ensure integer operands are treated as signed locking/refcount: Define constants for saturation and max refcount values futex: Prevent exit livelock futex: Provide distinct return value when owner is exiting futex: Add mutex around futex exit futex: Provide state handling for exec() as well futex: Sanitize exit state handling futex: Mark the begin of futex exit explicitly futex: Set task::futex_state to DEAD right after handling futex exit futex: Split futex_mm_release() for exit/exec exit/exec: Seperate mm_release() futex: Replace PF_EXITPIDONE with a state ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/compat.h2
-rw-r--r--include/linux/futex.h40
-rw-r--r--include/linux/jbd2.h2
-rw-r--r--include/linux/kernel.h7
-rw-r--r--include/linux/lockdep.h21
-rw-r--r--include/linux/percpu-rwsem.h4
-rw-r--r--include/linux/rcupdate.h2
-rw-r--r--include/linux/refcount.h269
-rw-r--r--include/linux/rwlock_api_smp.h16
-rw-r--r--include/linux/sched.h3
-rw-r--r--include/linux/sched/mm.h6
-rw-r--r--include/linux/seqlock.h4
-rw-r--r--include/linux/spinlock_api_smp.h8
-rw-r--r--include/linux/ww_mutex.h2
-rw-r--r--include/net/sock.h2
15 files changed, 292 insertions, 96 deletions
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 16dafd9f4b86..c4c389c7e1b4 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -410,8 +410,6 @@ struct compat_kexec_segment;
struct compat_mq_attr;
struct compat_msgbuf;
-extern void compat_exit_robust_list(struct task_struct *curr);
-
#define BITS_PER_COMPAT_LONG (8*sizeof(compat_long_t))
#define BITS_TO_COMPAT_LONGS(bits) DIV_ROUND_UP(bits, BITS_PER_COMPAT_LONG)
diff --git a/include/linux/futex.h b/include/linux/futex.h
index ccaef0097785..5cc3fed27d4c 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -2,7 +2,9 @@
#ifndef _LINUX_FUTEX_H
#define _LINUX_FUTEX_H
+#include <linux/sched.h>
#include <linux/ktime.h>
+
#include <uapi/linux/futex.h>
struct inode;
@@ -48,15 +50,35 @@ union futex_key {
#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } }
#ifdef CONFIG_FUTEX
-extern void exit_robust_list(struct task_struct *curr);
+enum {
+ FUTEX_STATE_OK,
+ FUTEX_STATE_EXITING,
+ FUTEX_STATE_DEAD,
+};
-long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
- u32 __user *uaddr2, u32 val2, u32 val3);
-#else
-static inline void exit_robust_list(struct task_struct *curr)
+static inline void futex_init_task(struct task_struct *tsk)
{
+ tsk->robust_list = NULL;
+#ifdef CONFIG_COMPAT
+ tsk->compat_robust_list = NULL;
+#endif
+ INIT_LIST_HEAD(&tsk->pi_state_list);
+ tsk->pi_state_cache = NULL;
+ tsk->futex_state = FUTEX_STATE_OK;
+ mutex_init(&tsk->futex_exit_mutex);
}
+void futex_exit_recursive(struct task_struct *tsk);
+void futex_exit_release(struct task_struct *tsk);
+void futex_exec_release(struct task_struct *tsk);
+
+long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
+ u32 __user *uaddr2, u32 val2, u32 val3);
+#else
+static inline void futex_init_task(struct task_struct *tsk) { }
+static inline void futex_exit_recursive(struct task_struct *tsk) { }
+static inline void futex_exit_release(struct task_struct *tsk) { }
+static inline void futex_exec_release(struct task_struct *tsk) { }
static inline long do_futex(u32 __user *uaddr, int op, u32 val,
ktime_t *timeout, u32 __user *uaddr2,
u32 val2, u32 val3)
@@ -65,12 +87,4 @@ static inline long do_futex(u32 __user *uaddr, int op, u32 val,
}
#endif
-#ifdef CONFIG_FUTEX_PI
-extern void exit_pi_state_list(struct task_struct *curr);
-#else
-static inline void exit_pi_state_list(struct task_struct *curr)
-{
-}
-#endif
-
#endif
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 603fbc4e2f70..564793c24d12 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1170,7 +1170,7 @@ struct journal_s
#define jbd2_might_wait_for_commit(j) \
do { \
rwsem_acquire(&j->j_trans_commit_map, 0, 0, _THIS_IP_); \
- rwsem_release(&j->j_trans_commit_map, 1, _THIS_IP_); \
+ rwsem_release(&j->j_trans_commit_map, _THIS_IP_); \
} while (0)
/* journal feature predicate functions */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d83d403dac2e..09f759228e3f 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -328,13 +328,6 @@ extern int oops_may_print(void);
void do_exit(long error_code) __noreturn;
void complete_and_exit(struct completion *, long) __noreturn;
-#ifdef CONFIG_ARCH_HAS_REFCOUNT
-void refcount_error_report(struct pt_regs *regs, const char *err);
-#else
-static inline void refcount_error_report(struct pt_regs *regs, const char *err)
-{ }
-#endif
-
/* Internal, do not use. */
int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res);
int __must_check _kstrtol(const char *s, unsigned int base, long *res);
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index b8a835fd611b..c50d01ef1414 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -349,8 +349,7 @@ extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
int trylock, int read, int check,
struct lockdep_map *nest_lock, unsigned long ip);
-extern void lock_release(struct lockdep_map *lock, int nested,
- unsigned long ip);
+extern void lock_release(struct lockdep_map *lock, unsigned long ip);
/*
* Same "read" as for lock_acquire(), except -1 means any.
@@ -428,7 +427,7 @@ static inline void lockdep_set_selftest_task(struct task_struct *task)
}
# define lock_acquire(l, s, t, r, c, n, i) do { } while (0)
-# define lock_release(l, n, i) do { } while (0)
+# define lock_release(l, i) do { } while (0)
# define lock_downgrade(l, i) do { } while (0)
# define lock_set_class(l, n, k, s, i) do { } while (0)
# define lock_set_subclass(l, s, i) do { } while (0)
@@ -591,42 +590,42 @@ static inline void print_irqtrace_events(struct task_struct *curr)
#define spin_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i)
#define spin_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i)
-#define spin_release(l, n, i) lock_release(l, n, i)
+#define spin_release(l, i) lock_release(l, i)
#define rwlock_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i)
#define rwlock_acquire_read(l, s, t, i) lock_acquire_shared_recursive(l, s, t, NULL, i)
-#define rwlock_release(l, n, i) lock_release(l, n, i)
+#define rwlock_release(l, i) lock_release(l, i)
#define seqcount_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i)
#define seqcount_acquire_read(l, s, t, i) lock_acquire_shared_recursive(l, s, t, NULL, i)
-#define seqcount_release(l, n, i) lock_release(l, n, i)
+#define seqcount_release(l, i) lock_release(l, i)
#define mutex_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i)
#define mutex_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i)
-#define mutex_release(l, n, i) lock_release(l, n, i)
+#define mutex_release(l, i) lock_release(l, i)
#define rwsem_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i)
#define rwsem_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i)
#define rwsem_acquire_read(l, s, t, i) lock_acquire_shared(l, s, t, NULL, i)
-#define rwsem_release(l, n, i) lock_release(l, n, i)
+#define rwsem_release(l, i) lock_release(l, i)
#define lock_map_acquire(l) lock_acquire_exclusive(l, 0, 0, NULL, _THIS_IP_)
#define lock_map_acquire_read(l) lock_acquire_shared_recursive(l, 0, 0, NULL, _THIS_IP_)
#define lock_map_acquire_tryread(l) lock_acquire_shared_recursive(l, 0, 1, NULL, _THIS_IP_)
-#define lock_map_release(l) lock_release(l, 1, _THIS_IP_)
+#define lock_map_release(l) lock_release(l, _THIS_IP_)
#ifdef CONFIG_PROVE_LOCKING
# define might_lock(lock) \
do { \
typecheck(struct lockdep_map *, &(lock)->dep_map); \
lock_acquire(&(lock)->dep_map, 0, 0, 0, 1, NULL, _THIS_IP_); \
- lock_release(&(lock)->dep_map, 0, _THIS_IP_); \
+ lock_release(&(lock)->dep_map, _THIS_IP_); \
} while (0)
# define might_lock_read(lock) \
do { \
typecheck(struct lockdep_map *, &(lock)->dep_map); \
lock_acquire(&(lock)->dep_map, 0, 0, 1, 1, NULL, _THIS_IP_); \
- lock_release(&(lock)->dep_map, 0, _THIS_IP_); \
+ lock_release(&(lock)->dep_map, _THIS_IP_); \
} while (0)
#define lockdep_assert_irqs_enabled() do { \
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 3998cdf9cd14..ad2ca2a89d5b 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -93,7 +93,7 @@ static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
__percpu_up_read(sem); /* Unconditional memory barrier */
preempt_enable();
- rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_);
+ rwsem_release(&sem->rw_sem.dep_map, _RET_IP_);
}
extern void percpu_down_write(struct percpu_rw_semaphore *);
@@ -118,7 +118,7 @@ extern void percpu_free_rwsem(struct percpu_rw_semaphore *);
static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
bool read, unsigned long ip)
{
- lock_release(&sem->rw_sem.dep_map, 1, ip);
+ lock_release(&sem->rw_sem.dep_map, ip);
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
if (!read)
atomic_long_set(&sem->rw_sem.owner, RWSEM_OWNER_UNKNOWN);
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 185dd9736863..0b7506330c87 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -210,7 +210,7 @@ static inline void rcu_lock_acquire(struct lockdep_map *map)
static inline void rcu_lock_release(struct lockdep_map *map)
{
- lock_release(map, 1, _THIS_IP_);
+ lock_release(map, _THIS_IP_);
}
extern struct lockdep_map rcu_lock_map;
diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index e28cce21bad6..0ac50cf62d06 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -1,9 +1,88 @@
/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Variant of atomic_t specialized for reference counts.
+ *
+ * The interface matches the atomic_t interface (to aid in porting) but only
+ * provides the few functions one should use for reference counting.
+ *
+ * Saturation semantics
+ * ====================
+ *
+ * refcount_t differs from atomic_t in that the counter saturates at
+ * REFCOUNT_SATURATED and will not move once there. This avoids wrapping the
+ * counter and causing 'spurious' use-after-free issues. In order to avoid the
+ * cost associated with introducing cmpxchg() loops into all of the saturating
+ * operations, we temporarily allow the counter to take on an unchecked value
+ * and then explicitly set it to REFCOUNT_SATURATED on detecting that underflow
+ * or overflow has occurred. Although this is racy when multiple threads
+ * access the refcount concurrently, by placing REFCOUNT_SATURATED roughly
+ * equidistant from 0 and INT_MAX we minimise the scope for error:
+ *
+ * INT_MAX REFCOUNT_SATURATED UINT_MAX
+ * 0 (0x7fff_ffff) (0xc000_0000) (0xffff_ffff)
+ * +--------------------------------+----------------+----------------+
+ * <---------- bad value! ---------->
+ *
+ * (in a signed view of the world, the "bad value" range corresponds to
+ * a negative counter value).
+ *
+ * As an example, consider a refcount_inc() operation that causes the counter
+ * to overflow:
+ *
+ * int old = atomic_fetch_add_relaxed(r);
+ * // old is INT_MAX, refcount now INT_MIN (0x8000_0000)
+ * if (old < 0)
+ * atomic_set(r, REFCOUNT_SATURATED);
+ *
+ * If another thread also performs a refcount_inc() operation between the two
+ * atomic operations, then the count will continue to edge closer to 0. If it
+ * reaches a value of 1 before /any/ of the threads reset it to the saturated
+ * value, then a concurrent refcount_dec_and_test() may erroneously free the
+ * underlying object. Given the precise timing details involved with the
+ * round-robin scheduling of each thread manipulating the refcount and the need
+ * to hit the race multiple times in succession, there doesn't appear to be a
+ * practical avenue of attack even if using refcount_add() operations with
+ * larger increments.
+ *
+ * Memory ordering
+ * ===============
+ *
+ * Memory ordering rules are slightly relaxed wrt regular atomic_t functions
+ * and provide only what is strictly required for refcounts.
+ *
+ * The increments are fully relaxed; these will not provide ordering. The
+ * rationale is that whatever is used to obtain the object we're increasing the
+ * reference count on will provide the ordering. For locked data structures,
+ * its the lock acquire, for RCU/lockless data structures its the dependent
+ * load.
+ *
+ * Do note that inc_not_zero() provides a control dependency which will order
+ * future stores against the inc, this ensures we'll never modify the object
+ * if we did not in fact acquire a reference.
+ *
+ * The decrements will provide release order, such that all the prior loads and
+ * stores will be issued before, it also provides a control dependency, which
+ * will order us against the subsequent free().
+ *
+ * The control dependency is against the load of the cmpxchg (ll/sc) that
+ * succeeded. This means the stores aren't fully ordered, but this is fine
+ * because the 1->0 transition indicates no concurrency.
+ *
+ * Note that the allocator is responsible for ordering things between free()
+ * and alloc().
+ *
+ * The decrements dec_and_test() and sub_and_test() also provide acquire
+ * ordering on success.
+ *
+ */
+
#ifndef _LINUX_REFCOUNT_H
#define _LINUX_REFCOUNT_H
#include <linux/atomic.h>
+#include <linux/bug.h>
#include <linux/compiler.h>
+#include <linux/limits.h>
#include <linux/spinlock_types.h>
struct mutex;
@@ -12,7 +91,7 @@ struct mutex;
* struct refcount_t - variant of atomic_t specialized for reference counts
* @refs: atomic_t counter field
*
- * The counter saturates at UINT_MAX and will not move once
+ * The counter saturates at REFCOUNT_SATURATED and will not move once
* there. This avoids wrapping the counter and causing 'spurious'
* use-after-free bugs.
*/
@@ -21,13 +100,25 @@ typedef struct refcount_struct {
} refcount_t;
#define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), }
+#define REFCOUNT_MAX INT_MAX
+#define REFCOUNT_SATURATED (INT_MIN / 2)
+
+enum refcount_saturation_type {
+ REFCOUNT_ADD_NOT_ZERO_OVF,
+ REFCOUNT_ADD_OVF,
+ REFCOUNT_ADD_UAF,
+ REFCOUNT_SUB_UAF,
+ REFCOUNT_DEC_LEAK,
+};
+
+void refcount_warn_saturate(refcount_t *r, enum refcount_saturation_type t);
/**
* refcount_set - set a refcount's value
* @r: the refcount
* @n: value to which the refcount will be set
*/
-static inline void refcount_set(refcount_t *r, unsigned int n)
+static inline void refcount_set(refcount_t *r, int n)
{
atomic_set(&r->refs, n);
}
@@ -43,70 +134,168 @@ static inline unsigned int refcount_read(const refcount_t *r)
return atomic_read(&r->refs);
}
-extern __must_check bool refcount_add_not_zero_checked(unsigned int i, refcount_t *r);
-extern void refcount_add_checked(unsigned int i, refcount_t *r);
-
-extern __must_check bool refcount_inc_not_zero_checked(refcount_t *r);
-extern void refcount_inc_checked(refcount_t *r);
-
-extern __must_check bool refcount_sub_and_test_checked(unsigned int i, refcount_t *r);
-
-extern __must_check bool refcount_dec_and_test_checked(refcount_t *r);
-extern void refcount_dec_checked(refcount_t *r);
-
-#ifdef CONFIG_REFCOUNT_FULL
-
-#define refcount_add_not_zero refcount_add_not_zero_checked
-#define refcount_add refcount_add_checked
-
-#define refcount_inc_not_zero refcount_inc_not_zero_checked
-#define refcount_inc refcount_inc_checked
+/**
+ * refcount_add_not_zero - add a value to a refcount unless it is 0
+ * @i: the value to add to the refcount
+ * @r: the refcount
+ *
+ * Will saturate at REFCOUNT_SATURATED and WARN.
+ *
+ * Provides no memory ordering, it is assumed the caller has guaranteed the
+ * object memory to be stable (RCU, etc.). It does provide a control dependency
+ * and thereby orders future stores. See the comment on top.
+ *
+ * Use of this function is not recommended for the normal reference counting
+ * use case in which references are taken and released one at a time. In these
+ * cases, refcount_inc(), or one of its variants, should instead be used to
+ * increment a reference count.
+ *
+ * Return: false if the passed refcount is 0, true otherwise
+ */
+static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r)
+{
+ int old = refcount_read(r);
-#define refcount_sub_and_test refcount_sub_and_test_checked
+ do {
+ if (!old)
+ break;
+ } while (!atomic_try_cmpxchg_relaxed(&r->refs, &old, old + i));
-#define refcount_dec_and_test refcount_dec_and_test_checked
-#define refcount_dec refcount_dec_checked
+ if (unlikely(old < 0 || old + i < 0))
+ refcount_warn_saturate(r, REFCOUNT_ADD_NOT_ZERO_OVF);
-#else
-# ifdef CONFIG_ARCH_HAS_REFCOUNT
-# include <asm/refcount.h>
-# else
-static inline __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r)
-{
- return atomic_add_unless(&r->refs, i, 0);
+ return old;
}
-static inline void refcount_add(unsigned int i, refcount_t *r)
+/**
+ * refcount_add - add a value to a refcount
+ * @i: the value to add to the refcount
+ * @r: the refcount
+ *
+ * Similar to atomic_add(), but will saturate at REFCOUNT_SATURATED and WARN.
+ *
+ * Provides no memory ordering, it is assumed the caller has guaranteed the
+ * object memory to be stable (RCU, etc.). It does provide a control dependency
+ * and thereby orders future stores. See the comment on top.
+ *
+ * Use of this function is not recommended for the normal reference counting
+ * use case in which references are taken and released one at a time. In these
+ * cases, refcount_inc(), or one of its variants, should instead be used to
+ * increment a reference count.
+ */
+static inline void refcount_add(int i, refcount_t *r)
{
- atomic_add(i, &r->refs);
+ int old = atomic_fetch_add_relaxed(i, &r->refs);
+
+ if (unlikely(!old))
+ refcount_warn_saturate(r, REFCOUNT_ADD_UAF);
+ else if (unlikely(old < 0 || old + i < 0))
+ refcount_warn_saturate(r, REFCOUNT_ADD_OVF);
}
+/**
+ * refcount_inc_not_zero - increment a refcount unless it is 0
+ * @r: the refcount to increment
+ *
+ * Similar to atomic_inc_not_zero(), but will saturate at REFCOUNT_SATURATED
+ * and WARN.
+ *
+ * Provides no memory ordering, it is assumed the caller has guaranteed the
+ * object memory to be stable (RCU, etc.). It does provide a control dependency
+ * and thereby orders future stores. See the comment on top.
+ *
+ * Return: true if the increment was successful, false otherwise
+ */
static inline __must_check bool refcount_inc_not_zero(refcount_t *r)
{
- return atomic_add_unless(&r->refs, 1, 0);
+ return refcount_add_not_zero(1, r);
}
+/**
+ * refcount_inc - increment a refcount
+ * @r: the refcount to increment
+ *
+ * Similar to atomic_inc(), but will saturate at REFCOUNT_SATURATED and WARN.
+ *
+ * Provides no memory ordering, it is assumed the caller already has a
+ * reference on the object.
+ *
+ * Will WARN if the refcount is 0, as this represents a possible use-after-free
+ * condition.
+ */
static inline void refcount_inc(refcount_t *r)
{
- atomic_inc(&r->refs);
+ refcount_add(1, r);
}
-static inline __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r)
+/**
+ * refcount_sub_and_test - subtract from a refcount and test if it is 0
+ * @i: amount to subtract from the refcount
+ * @r: the refcount
+ *
+ * Similar to atomic_dec_and_test(), but it will WARN, return false and
+ * ultimately leak on underflow and will fail to decrement when saturated
+ * at REFCOUNT_SATURATED.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before, and provides an acquire ordering on success such that free()
+ * must come after.
+ *
+ * Use of this function is not recommended for the normal reference counting
+ * use case in which references are taken and released one at a time. In these
+ * cases, refcount_dec(), or one of its variants, should instead be used to
+ * decrement a reference count.
+ *
+ * Return: true if the resulting refcount is 0, false otherwise
+ */
+static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r)
{
- return atomic_sub_and_test(i, &r->refs);
+ int old = atomic_fetch_sub_release(i, &r->refs);
+
+ if (old == i) {
+ smp_acquire__after_ctrl_dep();
+ return true;
+ }
+
+ if (unlikely(old < 0 || old - i < 0))
+ refcount_warn_saturate(r, REFCOUNT_SUB_UAF);
+
+ return false;
}
+/**
+ * refcount_dec_and_test - decrement a refcount and test if it is 0
+ * @r: the refcount
+ *
+ * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to
+ * decrement when saturated at REFCOUNT_SATURATED.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before, and provides an acquire ordering on success such that free()
+ * must come after.
+ *
+ * Return: true if the resulting refcount is 0, false otherwise
+ */
static inline __must_check bool refcount_dec_and_test(refcount_t *r)
{
- return atomic_dec_and_test(&r->refs);
+ return refcount_sub_and_test(1, r);
}
+/**
+ * refcount_dec - decrement a refcount
+ * @r: the refcount
+ *
+ * Similar to atomic_dec(), it will WARN on underflow and fail to decrement
+ * when saturated at REFCOUNT_SATURATED.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before.
+ */
static inline void refcount_dec(refcount_t *r)
{
- atomic_dec(&r->refs);
+ if (unlikely(atomic_fetch_sub_release(1, &r->refs) <= 1))
+ refcount_warn_saturate(r, REFCOUNT_DEC_LEAK);
}
-# endif /* !CONFIG_ARCH_HAS_REFCOUNT */
-#endif /* CONFIG_REFCOUNT_FULL */
extern __must_check bool refcount_dec_if_one(refcount_t *r);
extern __must_check bool refcount_dec_not_one(refcount_t *r);
diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h
index 86ebb4bf9c6e..abfb53ab11be 100644
--- a/include/linux/rwlock_api_smp.h
+++ b/include/linux/rwlock_api_smp.h
@@ -215,14 +215,14 @@ static inline void __raw_write_lock(rwlock_t *lock)
static inline void __raw_write_unlock(rwlock_t *lock)
{
- rwlock_release(&lock->dep_map, 1, _RET_IP_);
+ rwlock_release(&lock->dep_map, _RET_IP_);
do_raw_write_unlock(lock);
preempt_enable();
}
static inline void __raw_read_unlock(rwlock_t *lock)
{
- rwlock_release(&lock->dep_map, 1, _RET_IP_);
+ rwlock_release(&lock->dep_map, _RET_IP_);
do_raw_read_unlock(lock);
preempt_enable();
}
@@ -230,7 +230,7 @@ static inline void __raw_read_unlock(rwlock_t *lock)
static inline void
__raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
{
- rwlock_release(&lock->dep_map, 1, _RET_IP_);
+ rwlock_release(&lock->dep_map, _RET_IP_);
do_raw_read_unlock(lock);
local_irq_restore(flags);
preempt_enable();
@@ -238,7 +238,7 @@ __raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
static inline void __raw_read_unlock_irq(rwlock_t *lock)
{
- rwlock_release(&lock->dep_map, 1, _RET_IP_);
+ rwlock_release(&lock->dep_map, _RET_IP_);
do_raw_read_unlock(lock);
local_irq_enable();
preempt_enable();
@@ -246,7 +246,7 @@ static inline void __raw_read_unlock_irq(rwlock_t *lock)
static inline void __raw_read_unlock_bh(rwlock_t *lock)
{
- rwlock_release(&lock->dep_map, 1, _RET_IP_);
+ rwlock_release(&lock->dep_map, _RET_IP_);
do_raw_read_unlock(lock);
__local_bh_enable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET);
}
@@ -254,7 +254,7 @@ static inline void __raw_read_unlock_bh(rwlock_t *lock)
static inline void __raw_write_unlock_irqrestore(rwlock_t *lock,
unsigned long flags)
{
- rwlock_release(&lock->dep_map, 1, _RET_IP_);
+ rwlock_release(&lock->dep_map, _RET_IP_);
do_raw_write_unlock(lock);
local_irq_restore(flags);
preempt_enable();
@@ -262,7 +262,7 @@ static inline void __raw_write_unlock_irqrestore(rwlock_t *lock,
static inline void __raw_write_unlock_irq(rwlock_t *lock)
{
- rwlock_release(&lock->dep_map, 1, _RET_IP_);
+ rwlock_release(&lock->dep_map, _RET_IP_);
do_raw_write_unlock(lock);
local_irq_enable();
preempt_enable();
@@ -270,7 +270,7 @@ static inline void __raw_write_unlock_irq(rwlock_t *lock)
static inline void __raw_write_unlock_bh(rwlock_t *lock)
{
- rwlock_release(&lock->dep_map, 1, _RET_IP_);
+ rwlock_release(&lock->dep_map, _RET_IP_);
do_raw_write_unlock(lock);
__local_bh_enable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET);
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f72984f94a5c..07e68d9f5dc4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1059,6 +1059,8 @@ struct task_struct {
#endif
struct list_head pi_state_list;
struct futex_pi_state *pi_state_cache;
+ struct mutex futex_exit_mutex;
+ unsigned int futex_state;
#endif
#ifdef CONFIG_PERF_EVENTS
struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
@@ -1447,7 +1449,6 @@ extern struct pid *cad_pid;
*/
#define PF_IDLE 0x00000002 /* I am an IDLE thread */
#define PF_EXITING 0x00000004 /* Getting shut down */
-#define PF_EXITPIDONE 0x00000008 /* PI exit done on shut down */
#define PF_VCPU 0x00000010 /* I'm a virtual CPU */
#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
#define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index e6770012db18..c49257a3b510 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -117,8 +117,10 @@ extern struct mm_struct *get_task_mm(struct task_struct *task);
* succeeds.
*/
extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
-/* Remove the current tasks stale references to the old mm_struct */
-extern void mm_release(struct task_struct *, struct mm_struct *);
+/* Remove the current tasks stale references to the old mm_struct on exit() */
+extern void exit_mm_release(struct task_struct *, struct mm_struct *);
+/* Remove the current tasks stale references to the old mm_struct on exec() */
+extern void exec_mm_release(struct task_struct *, struct mm_struct *);
#ifdef CONFIG_MEMCG
extern void mm_update_next_owner(struct mm_struct *mm);
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index bcf4cf26b8c8..0491d963d47e 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -79,7 +79,7 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
local_irq_save(flags);
seqcount_acquire_read(&l->dep_map, 0, 0, _RET_IP_);
- seqcount_release(&l->dep_map, 1, _RET_IP_);
+ seqcount_release(&l->dep_map, _RET_IP_);
local_irq_restore(flags);
}
@@ -384,7 +384,7 @@ static inline void write_seqcount_begin(seqcount_t *s)
static inline void write_seqcount_end(seqcount_t *s)
{
- seqcount_release(&s->dep_map, 1, _RET_IP_);
+ seqcount_release(&s->dep_map, _RET_IP_);
raw_write_seqcount_end(s);
}
diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index b762eaba4cdf..19a9be9d97ee 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -147,7 +147,7 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock)
static inline void __raw_spin_unlock(raw_spinlock_t *lock)
{
- spin_release(&lock->dep_map, 1, _RET_IP_);
+ spin_release(&lock->dep_map, _RET_IP_);
do_raw_spin_unlock(lock);
preempt_enable();
}
@@ -155,7 +155,7 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock)
static inline void __raw_spin_unlock_irqrestore(raw_spinlock_t *lock,
unsigned long flags)
{
- spin_release(&lock->dep_map, 1, _RET_IP_);
+ spin_release(&lock->dep_map, _RET_IP_);
do_raw_spin_unlock(lock);
local_irq_restore(flags);
preempt_enable();
@@ -163,7 +163,7 @@ static inline void __raw_spin_unlock_irqrestore(raw_spinlock_t *lock,
static inline void __raw_spin_unlock_irq(raw_spinlock_t *lock)
{
- spin_release(&lock->dep_map, 1, _RET_IP_);
+ spin_release(&lock->dep_map, _RET_IP_);
do_raw_spin_unlock(lock);
local_irq_enable();
preempt_enable();
@@ -171,7 +171,7 @@ static inline void __raw_spin_unlock_irq(raw_spinlock_t *lock)
static inline void __raw_spin_unlock_bh(raw_spinlock_t *lock)
{
- spin_release(&lock->dep_map, 1, _RET_IP_);
+ spin_release(&lock->dep_map, _RET_IP_);
do_raw_spin_unlock(lock);
__local_bh_enable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET);
}
diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
index 3af7c0e03be5..d7554252404c 100644
--- a/include/linux/ww_mutex.h
+++ b/include/linux/ww_mutex.h
@@ -182,7 +182,7 @@ static inline void ww_acquire_done(struct ww_acquire_ctx *ctx)
static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx)
{
#ifdef CONFIG_DEBUG_MUTEXES
- mutex_release(&ctx->dep_map, 0, _THIS_IP_);
+ mutex_release(&ctx->dep_map, _THIS_IP_);
DEBUG_LOCKS_WARN_ON(ctx->acquired);
if (!IS_ENABLED(CONFIG_PROVE_LOCKING))
diff --git a/include/net/sock.h b/include/net/sock.h
index e7f697174f84..87d54ef57f00 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1488,7 +1488,7 @@ static inline void sock_release_ownership(struct sock *sk)
sk->sk_lock.owned = 0;
/* The sk_lock has mutex_unlock() semantics: */
- mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
+ mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
}
}