From b5c089880723b2c18531c40e445235bd646a51d1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 29 May 2024 07:46:48 -0700 Subject: af_unix: Remove dead code in unix_stream_read_generic(). When splice() support was added in commit 2b514574f7e8 ("net: af_unix: implement splice for stream af_unix sockets"), we had to release unix_sk(sk)->readlock (current iolock) before calling splice_to_pipe(). Due to the unlock, commit 73ed5d25dce0 ("af-unix: fix use-after-free with concurrent readers while splicing") added a safeguard in unix_stream_read_generic(); we had to bump the skb refcount before calling ->recv_actor() and then check if the skb was consumed by a concurrent reader. However, the pipe side locking was refactored, and since commit 25869262ef7a ("skb_splice_bits(): get rid of callback"), we can call splice_to_pipe() without releasing unix_sk(sk)->iolock. Now, the skb is always alive after the ->recv_actor() callback, so let's remove the unnecessary drop_skb logic. This is mostly the revert of commit 73ed5d25dce0 ("af-unix: fix use-after-free with concurrent readers while splicing"). Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240529144648.68591-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/unix/af_unix.c | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 25b49efc0926..861793b489f6 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -654,8 +654,8 @@ static void unix_release_sock(struct sock *sk, int embrion) while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { if (state == TCP_LISTEN) unix_release_sock(skb->sk, 1); + /* passed fds are erased in the kfree_skb hook */ - UNIXCB(skb).consumed = skb->len; kfree_skb(skb); } @@ -2704,9 +2704,8 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, skip = max(sk_peek_offset(sk, flags), 0); do { - int chunk; - bool drop_skb; struct sk_buff *skb, *last; + int chunk; redo: unix_state_lock(sk); @@ -2802,11 +2801,7 @@ unlock: } chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); - skb_get(skb); chunk = state->recv_actor(skb, skip, chunk, state); - drop_skb = !unix_skb_len(skb); - /* skb is only safe to use if !drop_skb */ - consume_skb(skb); if (chunk < 0) { if (copied == 0) copied = -EFAULT; @@ -2815,18 +2810,6 @@ unlock: copied += chunk; size -= chunk; - if (drop_skb) { - /* the skb was touched by a concurrent reader; - * we should not expect anything from this skb - * anymore and assume it invalid - we can be - * sure it was dropped from the socket queue - * - * let's report a short read - */ - err = 0; - break; - } - /* Mark read part of skb as used */ if (!(flags & MSG_PEEK)) { UNIXCB(skb).consumed += chunk; -- cgit v1.2.3-70-g09d2 From 3955802f160b5c61ac00d7e54da8d746f2e4a2d5 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 20 Jun 2024 13:56:13 -0700 Subject: af_unix: Define locking order for unix_table_double_lock(). When created, AF_UNIX socket is put into net->unx.table.buckets[], and the hash is stored in sk->sk_hash. * unbound socket : 0 <= sk_hash <= UNIX_HASH_MOD When bind() is called, the socket could be moved to another bucket. * pathname socket : 0 <= sk_hash <= UNIX_HASH_MOD * abstract socket : UNIX_HASH_MOD + 1 <= sk_hash <= UNIX_HASH_MOD * 2 + 1 Then, we call unix_table_double_lock() which locks a single bucket or two. Let's define the order as unix_table_lock_cmp_fn() instead of using spin_lock_nested(). The locking is always done in ascending order of sk->sk_hash, which is the index of buckets/locks array allocated by kvmalloc_array(). sk_hash_A < sk_hash_B <=> &locks[sk_hash_A].dep_map < &locks[sk_hash_B].dep_map So, the relation of two sk->sk_hash can be derived from the addresses of dep_map in the array of locks. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Kent Overstreet Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e9c941e6a464..7889d4723959 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -126,6 +126,15 @@ static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2]; * hash table is protected with spinlock. * each socket state is protected by separate spinlock. */ +#ifdef CONFIG_PROVE_LOCKING +#define cmp_ptr(l, r) (((l) > (r)) - ((l) < (r))) + +static int unix_table_lock_cmp_fn(const struct lockdep_map *a, + const struct lockdep_map *b) +{ + return cmp_ptr(a, b); +} +#endif static unsigned int unix_unbound_hash(struct sock *sk) { @@ -168,7 +177,7 @@ static void unix_table_double_lock(struct net *net, swap(hash1, hash2); spin_lock(&net->unx.table.locks[hash1]); - spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING); + spin_lock(&net->unx.table.locks[hash2]); } static void unix_table_double_unlock(struct net *net, @@ -3578,6 +3587,7 @@ static int __net_init unix_net_init(struct net *net) for (i = 0; i < UNIX_HASH_SIZE; i++) { spin_lock_init(&net->unx.table.locks[i]); + lock_set_cmp_fn(&net->unx.table.locks[i], unix_table_lock_cmp_fn, NULL); INIT_HLIST_HEAD(&net->unx.table.buckets[i]); } -- cgit v1.2.3-70-g09d2 From ed99822817cb728eee8786c1c921c69c6be206fe Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 20 Jun 2024 13:56:14 -0700 Subject: af_unix: Define locking order for U_LOCK_SECOND in unix_state_double_lock(). unix_dgram_connect() and unix_dgram_{send,recv}msg() lock the socket and peer in ascending order of the socket address. Let's define the order as unix_state_lock_cmp_fn() instead of using unix_state_lock_nested(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Kent Overstreet Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 7889d4723959..0657f599bbef 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -134,6 +134,18 @@ static int unix_table_lock_cmp_fn(const struct lockdep_map *a, { return cmp_ptr(a, b); } + +static int unix_state_lock_cmp_fn(const struct lockdep_map *_a, + const struct lockdep_map *_b) +{ + const struct unix_sock *a, *b; + + a = container_of(_a, struct unix_sock, lock.dep_map); + b = container_of(_b, struct unix_sock, lock.dep_map); + + /* unix_state_double_lock(): ascending address order. */ + return cmp_ptr(a, b); +} #endif static unsigned int unix_unbound_hash(struct sock *sk) @@ -987,6 +999,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, u->path.dentry = NULL; u->path.mnt = NULL; spin_lock_init(&u->lock); + lock_set_cmp_fn(&u->lock, unix_state_lock_cmp_fn, NULL); mutex_init(&u->iolock); /* single task reading lock */ mutex_init(&u->bindlock); /* single task binding lock */ init_waitqueue_head(&u->peer_wait); @@ -1335,11 +1348,12 @@ static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) unix_state_lock(sk1); return; } + if (sk1 > sk2) swap(sk1, sk2); unix_state_lock(sk1); - unix_state_lock_nested(sk2, U_LOCK_SECOND); + unix_state_lock(sk2); } static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) -- cgit v1.2.3-70-g09d2 From 1ca27e0c8c13ac50a4acf9cdf77069e2d94a547d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 20 Jun 2024 13:56:15 -0700 Subject: af_unix: Don't retry after unix_state_lock_nested() in unix_stream_connect(). When a SOCK_(STREAM|SEQPACKET) socket connect()s to another one, we need to lock the two sockets to check their states in unix_stream_connect(). We use unix_state_lock() for the server and unix_state_lock_nested() for client with tricky sk->sk_state check to avoid deadlock. The possible deadlock scenario are the following: 1) Self connect() 2) Simultaneous connect() The former is simple, attempt to grab the same lock, and the latter is AB-BA deadlock. After the server's unix_state_lock(), we check the server socket's state, and if it's not TCP_LISTEN, connect() fails with -EINVAL. Then, we avoid the former deadlock by checking the client's state before unix_state_lock_nested(). If its state is not TCP_LISTEN, we can make sure that the client and the server are not identical based on the state. Also, the latter deadlock can be avoided in the same way. Due to the server sk->sk_state requirement, AB-BA deadlock could happen only with TCP_LISTEN sockets. So, if the client's state is TCP_LISTEN, we can give up the second lock to avoid the deadlock. CPU 1 CPU 2 CPU 3 connect(A -> B) connect(B -> A) listen(A) --- --- --- unix_state_lock(B) B->sk_state == TCP_LISTEN READ_ONCE(A->sk_state) == TCP_CLOSE ^^^^^^^^^ ok, will lock A unix_state_lock(A) .--------------' WRITE_ONCE(A->sk_state, TCP_LISTEN) | unix_state_unlock(A) | | unix_state_lock(A) | A->sk_sk_state == TCP_LISTEN | READ_ONCE(B->sk_state) == TCP_LISTEN v ^^^^^^^^^^ unix_state_lock_nested(A) Don't lock B !! Currently, while checking the client's state, we also check if it's TCP_ESTABLISHED, but this is unlikely and can be checked after we know the state is not TCP_CLOSE. Moreover, if it happens after the second lock, we now jump to the restart label, but it's unlikely that the server is not found during the retry, so the jump is mostly to revist the client state check. Let's remove the retry logic and check the state against TCP_CLOSE first. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 34 +++++++++------------------------- 1 file changed, 9 insertions(+), 25 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 0657f599bbef..88f2c5d039c4 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1496,6 +1496,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, struct unix_sock *u = unix_sk(sk), *newu, *otheru; struct net *net = sock_net(sk); struct sk_buff *skb = NULL; + unsigned char state; long timeo; int err; @@ -1546,7 +1547,6 @@ restart: goto out; } - /* Latch state of peer */ unix_state_lock(other); /* Apparently VFS overslept socket death. Retry. */ @@ -1576,37 +1576,21 @@ restart: goto restart; } - /* Latch our state. - - It is tricky place. We need to grab our state lock and cannot - drop lock on peer. It is dangerous because deadlock is - possible. Connect to self case and simultaneous - attempt to connect are eliminated by checking socket - state. other is TCP_LISTEN, if sk is TCP_LISTEN we - check this before attempt to grab lock. - - Well, and we have to recheck the state after socket locked. + /* self connect and simultaneous connect are eliminated + * by rejecting TCP_LISTEN socket to avoid deadlock. */ - switch (READ_ONCE(sk->sk_state)) { - case TCP_CLOSE: - /* This is ok... continue with connect */ - break; - case TCP_ESTABLISHED: - /* Socket is already connected */ - err = -EISCONN; - goto out_unlock; - default: - err = -EINVAL; + state = READ_ONCE(sk->sk_state); + if (unlikely(state != TCP_CLOSE)) { + err = state == TCP_ESTABLISHED ? -EISCONN : -EINVAL; goto out_unlock; } unix_state_lock_nested(sk, U_LOCK_SECOND); - if (sk->sk_state != TCP_CLOSE) { + if (unlikely(sk->sk_state != TCP_CLOSE)) { + err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EINVAL; unix_state_unlock(sk); - unix_state_unlock(other); - sock_put(other); - goto restart; + goto out_unlock; } err = security_unix_stream_connect(sk, other, newsk); -- cgit v1.2.3-70-g09d2 From 98f706de445b464f25220360210a4bcb9cc6c41a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 20 Jun 2024 13:56:16 -0700 Subject: af_unix: Define locking order for U_LOCK_SECOND in unix_stream_connect(). While a SOCK_(STREAM|SEQPACKET) socket connect()s to another, we hold two locks of them by unix_state_lock() and unix_state_lock_nested() in unix_stream_connect(). Before unix_state_lock_nested(), the following is guaranteed by checking sk->sk_state: 1. The first socket is TCP_LISTEN 2. The second socket is not the first one 3. Simultaneous connect() must fail So, the client state can be TCP_CLOSE or TCP_LISTEN or TCP_ESTABLISHED. Let's define the expected states as unix_state_lock_cmp_fn() instead of using unix_state_lock_nested(). Note that 2. is detected by debug_spin_lock_before() and 3. cannot be expressed as lock_cmp_fn. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- include/net/af_unix.h | 1 - net/unix/af_unix.c | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index b6eedf7650da..fd813ad73ab8 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -98,7 +98,6 @@ struct unix_sock { #define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) enum unix_socket_lock_class { U_LOCK_NORMAL, - U_LOCK_SECOND, /* for double locking, see unix_state_double_lock(). */ U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */ U_LOCK_GC_LISTENER, /* used for listening socket while determining gc * candidates to close a small race window. diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 88f2c5d039c4..a092d6999ae0 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -143,6 +143,41 @@ static int unix_state_lock_cmp_fn(const struct lockdep_map *_a, a = container_of(_a, struct unix_sock, lock.dep_map); b = container_of(_b, struct unix_sock, lock.dep_map); + if (a->sk.sk_state == TCP_LISTEN) { + /* unix_stream_connect(): Before the 2nd unix_state_lock(), + * + * 1. a is TCP_LISTEN. + * 2. b is not a. + * 3. concurrent connect(b -> a) must fail. + * + * Except for 2. & 3., the b's state can be any possible + * value due to concurrent connect() or listen(). + * + * 2. is detected in debug_spin_lock_before(), and 3. cannot + * be expressed as lock_cmp_fn. + */ + switch (b->sk.sk_state) { + case TCP_CLOSE: + case TCP_ESTABLISHED: + case TCP_LISTEN: + return -1; + default: + /* Invalid case. */ + return 0; + } + } + + /* Should never happen. Just to be symmetric. */ + if (b->sk.sk_state == TCP_LISTEN) { + switch (b->sk.sk_state) { + case TCP_CLOSE: + case TCP_ESTABLISHED: + return 1; + default: + return 0; + } + } + /* unix_state_double_lock(): ascending address order. */ return cmp_ptr(a, b); } @@ -1585,7 +1620,7 @@ restart: goto out_unlock; } - unix_state_lock_nested(sk, U_LOCK_SECOND); + unix_state_lock(sk); if (unlikely(sk->sk_state != TCP_CLOSE)) { err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EINVAL; -- cgit v1.2.3-70-g09d2 From 8647ece4814f3bfdb5f7a8e19f882c9b89299a07 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 20 Jun 2024 13:56:20 -0700 Subject: af_unix: Define locking order for U_RECVQ_LOCK_EMBRYO in unix_collect_skb(). While GC is cleaning up cyclic references by SCM_RIGHTS, unix_collect_skb() collects skb in the socket's recvq. If the socket is TCP_LISTEN, we need to collect skb in the embryo's queue. Then, both the listener's recvq lock and the embroy's one are held. The locking is always done in the listener -> embryo order. Let's define it as unix_recvq_lock_cmp_fn() instead of using spin_lock_nested(). Note that the reverse order is defined for consistency. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 21 +++++++++++++++++++++ net/unix/garbage.c | 8 +------- 2 files changed, 22 insertions(+), 7 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index a092d6999ae0..89675879038d 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -181,6 +181,25 @@ static int unix_state_lock_cmp_fn(const struct lockdep_map *_a, /* unix_state_double_lock(): ascending address order. */ return cmp_ptr(a, b); } + +static int unix_recvq_lock_cmp_fn(const struct lockdep_map *_a, + const struct lockdep_map *_b) +{ + const struct sock *a, *b; + + a = container_of(_a, struct sock, sk_receive_queue.lock.dep_map); + b = container_of(_b, struct sock, sk_receive_queue.lock.dep_map); + + /* unix_collect_skb(): listener -> embryo order. */ + if (a->sk_state == TCP_LISTEN && unix_sk(b)->listener == a) + return -1; + + /* Should never happen. Just to be symmetric. */ + if (b->sk_state == TCP_LISTEN && unix_sk(a)->listener == b) + return 1; + + return 0; +} #endif static unsigned int unix_unbound_hash(struct sock *sk) @@ -1028,6 +1047,8 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, sk->sk_write_space = unix_write_space; sk->sk_max_ack_backlog = READ_ONCE(net->unx.sysctl_max_dgram_qlen); sk->sk_destruct = unix_sock_destructor; + lock_set_cmp_fn(&sk->sk_receive_queue.lock, unix_recvq_lock_cmp_fn, NULL); + u = unix_sk(sk); u->listener = NULL; u->vertex = NULL; diff --git a/net/unix/garbage.c b/net/unix/garbage.c index dfe94a90ece4..eb8aa5171a68 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -337,11 +337,6 @@ static bool unix_vertex_dead(struct unix_vertex *vertex) return true; } -enum unix_recv_queue_lock_class { - U_RECVQ_LOCK_NORMAL, - U_RECVQ_LOCK_EMBRYO, -}; - static void unix_collect_queue(struct unix_sock *u, struct sk_buff_head *hitlist) { skb_queue_splice_init(&u->sk.sk_receive_queue, hitlist); @@ -375,8 +370,7 @@ static void unix_collect_skb(struct list_head *scc, struct sk_buff_head *hitlist skb_queue_walk(queue, skb) { struct sk_buff_head *embryo_queue = &skb->sk->sk_receive_queue; - /* listener -> embryo order, the inversion never happens. */ - spin_lock_nested(&embryo_queue->lock, U_RECVQ_LOCK_EMBRYO); + spin_lock(&embryo_queue->lock); unix_collect_queue(unix_sk(skb->sk), hitlist); spin_unlock(&embryo_queue->lock); } -- cgit v1.2.3-70-g09d2 From faf489e6896d645a679d3d90a2d1d5d12c6b3e13 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 20 Jun 2024 13:56:21 -0700 Subject: af_unix: Set sk_peer_pid/sk_peer_cred locklessly for new socket. init_peercred() is called in 3 places: 1. socketpair() : both sockets 2. connect() : child socket 3. listen() : listening socket The first two need not hold sk_peer_lock because no one can touch the socket. Let's set cred/pid without holding lock for the two cases and rename the old init_peercred() to update_peercred() to properly reflect the use case. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 89675879038d..d11664c2faad 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -750,6 +750,12 @@ static void unix_release_sock(struct sock *sk, int embrion) } static void init_peercred(struct sock *sk) +{ + sk->sk_peer_pid = get_pid(task_tgid(current)); + sk->sk_peer_cred = get_current_cred(); +} + +static void update_peercred(struct sock *sk) { const struct cred *old_cred; struct pid *old_pid; @@ -757,8 +763,7 @@ static void init_peercred(struct sock *sk) spin_lock(&sk->sk_peer_lock); old_pid = sk->sk_peer_pid; old_cred = sk->sk_peer_cred; - sk->sk_peer_pid = get_pid(task_tgid(current)); - sk->sk_peer_cred = get_current_cred(); + init_peercred(sk); spin_unlock(&sk->sk_peer_lock); put_pid(old_pid); @@ -810,7 +815,7 @@ static int unix_listen(struct socket *sock, int backlog) WRITE_ONCE(sk->sk_state, TCP_LISTEN); /* set credentials so connect can copy them */ - init_peercred(sk); + update_peercred(sk); err = 0; out_unlock: -- cgit v1.2.3-70-g09d2 From e4bd881d987121dbf1a288641491955a53d9f8f7 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 20 Jun 2024 13:56:22 -0700 Subject: af_unix: Remove put_pid()/put_cred() in copy_peercred(). When (AF_UNIX, SOCK_STREAM) socket connect()s to a listening socket, the listener's sk_peer_pid/sk_peer_cred are copied to the client in copy_peercred(). Then, the client's sk_peer_pid and sk_peer_cred are always NULL, so we need not call put_pid() and put_cred() there. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index d11664c2faad..3d0ace7ca017 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -772,9 +772,6 @@ static void update_peercred(struct sock *sk) static void copy_peercred(struct sock *sk, struct sock *peersk) { - const struct cred *old_cred; - struct pid *old_pid; - if (sk < peersk) { spin_lock(&sk->sk_peer_lock); spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING); @@ -782,16 +779,12 @@ static void copy_peercred(struct sock *sk, struct sock *peersk) spin_lock(&peersk->sk_peer_lock); spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING); } - old_pid = sk->sk_peer_pid; - old_cred = sk->sk_peer_cred; + sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); spin_unlock(&sk->sk_peer_lock); spin_unlock(&peersk->sk_peer_lock); - - put_pid(old_pid); - put_cred(old_cred); } static int unix_listen(struct socket *sock, int backlog) -- cgit v1.2.3-70-g09d2 From 22e5751b0524fedd4f345412d8d3394387471ab7 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 20 Jun 2024 13:56:23 -0700 Subject: af_unix: Don't use spin_lock_nested() in copy_peercred(). When (AF_UNIX, SOCK_STREAM) socket connect()s to a listening socket, the listener's sk_peer_pid/sk_peer_cred are copied to the client in copy_peercred(). Then, two sk_peer_locks are held there; one is client's and another is listener's. However, the latter is not needed because we hold the listner's unix_state_lock() there and unix_listen() cannot update the cred concurrently. Let's drop the unnecessary spin_lock() and use the bare spin_lock() for the client to protect concurrent read by getsockopt(SO_PEERCRED). Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3d0ace7ca017..103a7909cb1a 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -772,19 +772,12 @@ static void update_peercred(struct sock *sk) static void copy_peercred(struct sock *sk, struct sock *peersk) { - if (sk < peersk) { - spin_lock(&sk->sk_peer_lock); - spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING); - } else { - spin_lock(&peersk->sk_peer_lock); - spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING); - } + lockdep_assert_held(&unix_sk(peersk)->lock); - sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); + spin_lock(&sk->sk_peer_lock); + sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); - spin_unlock(&sk->sk_peer_lock); - spin_unlock(&peersk->sk_peer_lock); } static int unix_listen(struct socket *sock, int backlog) -- cgit v1.2.3-70-g09d2