diff options
| author | Thomas Zimmermann <tzimmermann@suse.de> | 2022-08-17 08:40:32 +0200 | 
|---|---|---|
| committer | Thomas Zimmermann <tzimmermann@suse.de> | 2022-08-17 08:40:32 +0200 | 
| commit | cf36ae3e5802716617b9e4d902a31048240396b0 (patch) | |
| tree | 2729abc9dae00a6ae10608fbd80bef8335442ed2 /net/unix/af_unix.c | |
| parent | 82a1356a933d8443139f8886f11b63c974a09a67 (diff) | |
| parent | 568035b01cfb107af8d2e4bd2fb9aea22cf5b868 (diff) | |
Merge drm/drm-fixes into drm-misc-fixes
Backmerging for v6.0-rc1.
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Diffstat (limited to 'net/unix/af_unix.c')
| -rw-r--r-- | net/unix/af_unix.c | 296 | 
1 files changed, 168 insertions, 128 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 654dcef7cfb3..bf338b782fc4 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -118,15 +118,13 @@  #include "scm.h" -spinlock_t unix_table_locks[2 * UNIX_HASH_SIZE]; -EXPORT_SYMBOL_GPL(unix_table_locks); -struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; -EXPORT_SYMBOL_GPL(unix_socket_table);  static atomic_long_t unix_nr_socks; +static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2]; +static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];  /* SMP locking strategy: - *    hash table is protected with spinlock unix_table_locks - *    each socket state is protected by separate spin lock. + *    hash table is protected with spinlock. + *    each socket state is protected by separate spinlock.   */  static unsigned int unix_unbound_hash(struct sock *sk) @@ -137,12 +135,12 @@ static unsigned int unix_unbound_hash(struct sock *sk)  	hash ^= hash >> 8;  	hash ^= sk->sk_type; -	return UNIX_HASH_SIZE + (hash & (UNIX_HASH_SIZE - 1)); +	return hash & UNIX_HASH_MOD;  }  static unsigned int unix_bsd_hash(struct inode *i)  { -	return i->i_ino & (UNIX_HASH_SIZE - 1); +	return i->i_ino & UNIX_HASH_MOD;  }  static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr, @@ -155,26 +153,34 @@ static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,  	hash ^= hash >> 8;  	hash ^= type; -	return hash & (UNIX_HASH_SIZE - 1); +	return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD);  } -static void unix_table_double_lock(unsigned int hash1, unsigned int hash2) +static void unix_table_double_lock(struct net *net, +				   unsigned int hash1, unsigned int hash2)  { -	/* hash1 and hash2 is never the same because -	 * one is between 0 and UNIX_HASH_SIZE - 1, and -	 * another is between UNIX_HASH_SIZE and UNIX_HASH_SIZE * 2. -	 */ +	if (hash1 == hash2) { +		spin_lock(&net->unx.table.locks[hash1]); +		return; +	} +  	if (hash1 > hash2)  		swap(hash1, hash2); -	spin_lock(&unix_table_locks[hash1]); -	spin_lock_nested(&unix_table_locks[hash2], SINGLE_DEPTH_NESTING); +	spin_lock(&net->unx.table.locks[hash1]); +	spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);  } -static void unix_table_double_unlock(unsigned int hash1, unsigned int hash2) +static void unix_table_double_unlock(struct net *net, +				     unsigned int hash1, unsigned int hash2)  { -	spin_unlock(&unix_table_locks[hash1]); -	spin_unlock(&unix_table_locks[hash2]); +	if (hash1 == hash2) { +		spin_unlock(&net->unx.table.locks[hash1]); +		return; +	} + +	spin_unlock(&net->unx.table.locks[hash1]); +	spin_unlock(&net->unx.table.locks[hash2]);  }  #ifdef CONFIG_SECURITY_NETWORK @@ -300,34 +306,52 @@ static void __unix_remove_socket(struct sock *sk)  	sk_del_node_init(sk);  } -static void __unix_insert_socket(struct sock *sk) +static void __unix_insert_socket(struct net *net, struct sock *sk)  { -	WARN_ON(!sk_unhashed(sk)); -	sk_add_node(sk, &unix_socket_table[sk->sk_hash]); +	DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); +	sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]);  } -static void __unix_set_addr_hash(struct sock *sk, struct unix_address *addr, -				 unsigned int hash) +static void __unix_set_addr_hash(struct net *net, struct sock *sk, +				 struct unix_address *addr, unsigned int hash)  {  	__unix_remove_socket(sk);  	smp_store_release(&unix_sk(sk)->addr, addr);  	sk->sk_hash = hash; -	__unix_insert_socket(sk); +	__unix_insert_socket(net, sk);  } -static void unix_remove_socket(struct sock *sk) +static void unix_remove_socket(struct net *net, struct sock *sk)  { -	spin_lock(&unix_table_locks[sk->sk_hash]); +	spin_lock(&net->unx.table.locks[sk->sk_hash]);  	__unix_remove_socket(sk); -	spin_unlock(&unix_table_locks[sk->sk_hash]); +	spin_unlock(&net->unx.table.locks[sk->sk_hash]); +} + +static void unix_insert_unbound_socket(struct net *net, struct sock *sk) +{ +	spin_lock(&net->unx.table.locks[sk->sk_hash]); +	__unix_insert_socket(net, sk); +	spin_unlock(&net->unx.table.locks[sk->sk_hash]); +} + +static void unix_insert_bsd_socket(struct sock *sk) +{ +	spin_lock(&bsd_socket_locks[sk->sk_hash]); +	sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]); +	spin_unlock(&bsd_socket_locks[sk->sk_hash]);  } -static void unix_insert_unbound_socket(struct sock *sk) +static void unix_remove_bsd_socket(struct sock *sk)  { -	spin_lock(&unix_table_locks[sk->sk_hash]); -	__unix_insert_socket(sk); -	spin_unlock(&unix_table_locks[sk->sk_hash]); +	if (!hlist_unhashed(&sk->sk_bind_node)) { +		spin_lock(&bsd_socket_locks[sk->sk_hash]); +		__sk_del_bind_node(sk); +		spin_unlock(&bsd_socket_locks[sk->sk_hash]); + +		sk_node_init(&sk->sk_bind_node); +	}  }  static struct sock *__unix_find_socket_byname(struct net *net, @@ -336,12 +360,9 @@ static struct sock *__unix_find_socket_byname(struct net *net,  {  	struct sock *s; -	sk_for_each(s, &unix_socket_table[hash]) { +	sk_for_each(s, &net->unx.table.buckets[hash]) {  		struct unix_sock *u = unix_sk(s); -		if (!net_eq(sock_net(s), net)) -			continue; -  		if (u->addr->len == len &&  		    !memcmp(u->addr->name, sunname, len))  			return s; @@ -355,11 +376,11 @@ static inline struct sock *unix_find_socket_byname(struct net *net,  {  	struct sock *s; -	spin_lock(&unix_table_locks[hash]); +	spin_lock(&net->unx.table.locks[hash]);  	s = __unix_find_socket_byname(net, sunname, len, hash);  	if (s)  		sock_hold(s); -	spin_unlock(&unix_table_locks[hash]); +	spin_unlock(&net->unx.table.locks[hash]);  	return s;  } @@ -368,17 +389,17 @@ static struct sock *unix_find_socket_byinode(struct inode *i)  	unsigned int hash = unix_bsd_hash(i);  	struct sock *s; -	spin_lock(&unix_table_locks[hash]); -	sk_for_each(s, &unix_socket_table[hash]) { +	spin_lock(&bsd_socket_locks[hash]); +	sk_for_each_bound(s, &bsd_socket_buckets[hash]) {  		struct dentry *dentry = unix_sk(s)->path.dentry;  		if (dentry && d_backing_inode(dentry) == i) {  			sock_hold(s); -			spin_unlock(&unix_table_locks[hash]); +			spin_unlock(&bsd_socket_locks[hash]);  			return s;  		}  	} -	spin_unlock(&unix_table_locks[hash]); +	spin_unlock(&bsd_socket_locks[hash]);  	return NULL;  } @@ -490,7 +511,7 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)  	 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs  	 * to other and its full, we will hang waiting for POLLOUT.  	 */ -	if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD)) +	if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))  		return 1;  	if (connected) @@ -554,9 +575,9 @@ static void unix_sock_destructor(struct sock *sk)  		u->oob_skb = NULL;  	}  #endif -	WARN_ON(refcount_read(&sk->sk_wmem_alloc)); -	WARN_ON(!sk_unhashed(sk)); -	WARN_ON(sk->sk_socket); +	DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc)); +	DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); +	DEBUG_NET_WARN_ON_ONCE(sk->sk_socket);  	if (!sock_flag(sk, SOCK_DEAD)) {  		pr_info("Attempt to release alive unix socket: %p\n", sk);  		return; @@ -576,12 +597,13 @@ static void unix_sock_destructor(struct sock *sk)  static void unix_release_sock(struct sock *sk, int embrion)  {  	struct unix_sock *u = unix_sk(sk); -	struct path path;  	struct sock *skpair;  	struct sk_buff *skb; +	struct path path;  	int state; -	unix_remove_socket(sk); +	unix_remove_socket(sock_net(sk), sk); +	unix_remove_bsd_socket(sk);  	/* Clear state */  	unix_state_lock(sk); @@ -741,10 +763,8 @@ static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,  				       unsigned int flags);  static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);  static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); -static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, -			  sk_read_actor_t recv_actor); -static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc, -				 sk_read_actor_t recv_actor); +static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor); +static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor);  static int unix_dgram_connect(struct socket *, struct sockaddr *,  			      int, int);  static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); @@ -798,7 +818,7 @@ static const struct proto_ops unix_stream_ops = {  	.shutdown =	unix_shutdown,  	.sendmsg =	unix_stream_sendmsg,  	.recvmsg =	unix_stream_recvmsg, -	.read_sock =	unix_stream_read_sock, +	.read_skb =	unix_stream_read_skb,  	.mmap =		sock_no_mmap,  	.sendpage =	unix_stream_sendpage,  	.splice_read =	unix_stream_splice_read, @@ -823,7 +843,7 @@ static const struct proto_ops unix_dgram_ops = {  	.listen =	sock_no_listen,  	.shutdown =	unix_shutdown,  	.sendmsg =	unix_dgram_sendmsg, -	.read_sock =	unix_read_sock, +	.read_skb =	unix_read_skb,  	.recvmsg =	unix_dgram_recvmsg,  	.mmap =		sock_no_mmap,  	.sendpage =	sock_no_sendpage, @@ -930,9 +950,9 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,  	init_waitqueue_head(&u->peer_wait);  	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);  	memset(&u->scm_stat, 0, sizeof(struct scm_stat)); -	unix_insert_unbound_socket(sk); +	unix_insert_unbound_socket(net, sk); -	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); +	sock_prot_inuse_add(net, sk->sk_prot, 1);  	return sk; @@ -993,8 +1013,8 @@ static int unix_release(struct socket *sock)  	return 0;  } -static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr, -				  int addr_len, int type) +static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len, +				  int type)  {  	struct inode *inode;  	struct path path; @@ -1063,7 +1083,7 @@ static struct sock *unix_find_other(struct net *net,  	struct sock *sk;  	if (sunaddr->sun_path[0]) -		sk = unix_find_bsd(net, sunaddr, addr_len, type); +		sk = unix_find_bsd(sunaddr, addr_len, type);  	else  		sk = unix_find_abstract(net, sunaddr, addr_len, type); @@ -1074,6 +1094,7 @@ static int unix_autobind(struct sock *sk)  {  	unsigned int new_hash, old_hash = sk->sk_hash;  	struct unix_sock *u = unix_sk(sk); +	struct net *net = sock_net(sk);  	struct unix_address *addr;  	u32 lastnum, ordernum;  	int err; @@ -1102,11 +1123,10 @@ retry:  	sprintf(addr->name->sun_path + 1, "%05x", ordernum);  	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); -	unix_table_double_lock(old_hash, new_hash); +	unix_table_double_lock(net, old_hash, new_hash); -	if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len, -				      new_hash)) { -		unix_table_double_unlock(old_hash, new_hash); +	if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) { +		unix_table_double_unlock(net, old_hash, new_hash);  		/* __unix_find_socket_byname() may take long time if many names  		 * are already in use. @@ -1123,8 +1143,8 @@ retry:  		goto retry;  	} -	__unix_set_addr_hash(sk, addr, new_hash); -	unix_table_double_unlock(old_hash, new_hash); +	__unix_set_addr_hash(net, sk, addr, new_hash); +	unix_table_double_unlock(net, old_hash, new_hash);  	err = 0;  out:	mutex_unlock(&u->bindlock); @@ -1138,6 +1158,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,  	       (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());  	unsigned int new_hash, old_hash = sk->sk_hash;  	struct unix_sock *u = unix_sk(sk); +	struct net *net = sock_net(sk);  	struct user_namespace *ns; // barf...  	struct unix_address *addr;  	struct dentry *dentry; @@ -1178,11 +1199,12 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,  		goto out_unlock;  	new_hash = unix_bsd_hash(d_backing_inode(dentry)); -	unix_table_double_lock(old_hash, new_hash); +	unix_table_double_lock(net, old_hash, new_hash);  	u->path.mnt = mntget(parent.mnt);  	u->path.dentry = dget(dentry); -	__unix_set_addr_hash(sk, addr, new_hash); -	unix_table_double_unlock(old_hash, new_hash); +	__unix_set_addr_hash(net, sk, addr, new_hash); +	unix_table_double_unlock(net, old_hash, new_hash); +	unix_insert_bsd_socket(sk);  	mutex_unlock(&u->bindlock);  	done_path_create(&parent, dentry);  	return 0; @@ -1205,6 +1227,7 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,  {  	unsigned int new_hash, old_hash = sk->sk_hash;  	struct unix_sock *u = unix_sk(sk); +	struct net *net = sock_net(sk);  	struct unix_address *addr;  	int err; @@ -1222,19 +1245,18 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,  	}  	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); -	unix_table_double_lock(old_hash, new_hash); +	unix_table_double_lock(net, old_hash, new_hash); -	if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len, -				      new_hash)) +	if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))  		goto out_spin; -	__unix_set_addr_hash(sk, addr, new_hash); -	unix_table_double_unlock(old_hash, new_hash); +	__unix_set_addr_hash(net, sk, addr, new_hash); +	unix_table_double_unlock(net, old_hash, new_hash);  	mutex_unlock(&u->bindlock);  	return 0;  out_spin: -	unix_table_double_unlock(old_hash, new_hash); +	unix_table_double_unlock(net, old_hash, new_hash);  	err = -EADDRINUSE;  out_mutex:  	mutex_unlock(&u->bindlock); @@ -1293,9 +1315,8 @@ static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)  static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,  			      int alen, int flags)  { -	struct sock *sk = sock->sk; -	struct net *net = sock_net(sk);  	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; +	struct sock *sk = sock->sk;  	struct sock *other;  	int err; @@ -1316,7 +1337,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,  		}  restart: -		other = unix_find_other(net, sunaddr, alen, sock->type); +		other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type);  		if (IS_ERR(other)) {  			err = PTR_ERR(other);  			goto out; @@ -1404,15 +1425,13 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,  			       int addr_len, int flags)  {  	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; -	struct sock *sk = sock->sk; -	struct net *net = sock_net(sk); +	struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;  	struct unix_sock *u = unix_sk(sk), *newu, *otheru; -	struct sock *newsk = NULL; -	struct sock *other = NULL; +	struct net *net = sock_net(sk);  	struct sk_buff *skb = NULL; -	int st; -	int err;  	long timeo; +	int err; +	int st;  	err = unix_validate_addr(sunaddr, addr_len);  	if (err) @@ -1432,7 +1451,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,  	 */  	/* create new sock for complete connection */ -	newsk = unix_create1(sock_net(sk), NULL, 0, sock->type); +	newsk = unix_create1(net, NULL, 0, sock->type);  	if (IS_ERR(newsk)) {  		err = PTR_ERR(newsk);  		newsk = NULL; @@ -1541,9 +1560,9 @@ restart:  	 *  	 * The contents of *(otheru->addr) and otheru->path  	 * are seen fully set up here, since we have found -	 * otheru in hash under unix_table_locks.  Insertion -	 * into the hash chain we'd found it in had been done -	 * in an earlier critical area protected by unix_table_locks, +	 * otheru in hash under its lock.  Insertion into the +	 * hash chain we'd found it in had been done in an +	 * earlier critical area protected by the chain's lock,  	 * the same one where we'd set *(otheru->addr) contents,  	 * as well as otheru->path and otheru->addr itself.  	 * @@ -1840,17 +1859,15 @@ static void scm_stat_del(struct sock *sk, struct sk_buff *skb)  static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,  			      size_t len)  { -	struct sock *sk = sock->sk; -	struct net *net = sock_net(sk); -	struct unix_sock *u = unix_sk(sk);  	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); -	struct sock *other = NULL; -	int err; -	struct sk_buff *skb; -	long timeo; +	struct sock *sk = sock->sk, *other = NULL; +	struct unix_sock *u = unix_sk(sk);  	struct scm_cookie scm; +	struct sk_buff *skb;  	int data_len = 0;  	int sk_locked; +	long timeo; +	int err;  	wait_for_unix_gc();  	err = scm_send(sock, msg, &scm, false); @@ -1917,7 +1934,7 @@ restart:  		if (sunaddr == NULL)  			goto out_free; -		other = unix_find_other(net, sunaddr, msg->msg_namelen, +		other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen,  					sk->sk_type);  		if (IS_ERR(other)) {  			err = PTR_ERR(other); @@ -2487,8 +2504,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t si  	return __unix_dgram_recvmsg(sk, msg, size, flags);  } -static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, -			  sk_read_actor_t recv_actor) +static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)  {  	int copied = 0; @@ -2503,7 +2519,7 @@ static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,  		if (!skb)  			return err; -		used = recv_actor(desc, skb, 0, skb->len); +		used = recv_actor(sk, skb);  		if (used <= 0) {  			if (!copied)  				copied = used; @@ -2514,8 +2530,7 @@ static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,  		}  		kfree_skb(skb); -		if (!desc->count) -			break; +		break;  	}  	return copied; @@ -2650,13 +2665,12 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,  }  #endif -static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc, -				 sk_read_actor_t recv_actor) +static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)  {  	if (unlikely(sk->sk_state != TCP_ESTABLISHED))  		return -ENOTCONN; -	return unix_read_sock(sk, desc, recv_actor); +	return unix_read_skb(sk, recv_actor);  }  static int unix_stream_read_generic(struct unix_stream_read_state *state, @@ -3226,12 +3240,11 @@ static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)  {  	unsigned long offset = get_offset(*pos);  	unsigned long bucket = get_bucket(*pos); -	struct sock *sk;  	unsigned long count = 0; +	struct sock *sk; -	for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) { -		if (sock_net(sk) != seq_file_net(seq)) -			continue; +	for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]); +	     sk; sk = sk_next(sk)) {  		if (++count == offset)  			break;  	} @@ -3242,16 +3255,17 @@ static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)  static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)  {  	unsigned long bucket = get_bucket(*pos); +	struct net *net = seq_file_net(seq);  	struct sock *sk; -	while (bucket < ARRAY_SIZE(unix_socket_table)) { -		spin_lock(&unix_table_locks[bucket]); +	while (bucket < UNIX_HASH_SIZE) { +		spin_lock(&net->unx.table.locks[bucket]);  		sk = unix_from_bucket(seq, pos);  		if (sk)  			return sk; -		spin_unlock(&unix_table_locks[bucket]); +		spin_unlock(&net->unx.table.locks[bucket]);  		*pos = set_bucket_offset(++bucket, 1);  	} @@ -3264,11 +3278,12 @@ static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,  {  	unsigned long bucket = get_bucket(*pos); -	for (sk = sk_next(sk); sk; sk = sk_next(sk)) -		if (sock_net(sk) == seq_file_net(seq)) -			return sk; +	sk = sk_next(sk); +	if (sk) +		return sk; -	spin_unlock(&unix_table_locks[bucket]); + +	spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]);  	*pos = set_bucket_offset(++bucket, 1); @@ -3298,7 +3313,7 @@ static void unix_seq_stop(struct seq_file *seq, void *v)  	struct sock *sk = v;  	if (sk) -		spin_unlock(&unix_table_locks[sk->sk_hash]); +		spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]);  }  static int unix_seq_show(struct seq_file *seq, void *v) @@ -3323,7 +3338,7 @@ static int unix_seq_show(struct seq_file *seq, void *v)  			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),  			sock_i_ino(s)); -		if (u->addr) {	// under unix_table_locks here +		if (u->addr) {	// under a hash table lock here  			int i, len;  			seq_putc(seq, ' '); @@ -3393,9 +3408,6 @@ static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)  	iter->batch[iter->end_sk++] = start_sk;  	for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) { -		if (sock_net(sk) != seq_file_net(seq)) -			continue; -  		if (iter->end_sk < iter->max_sk) {  			sock_hold(sk);  			iter->batch[iter->end_sk++] = sk; @@ -3404,7 +3416,7 @@ static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)  		expected++;  	} -	spin_unlock(&unix_table_locks[start_sk->sk_hash]); +	spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]);  	return expected;  } @@ -3564,7 +3576,7 @@ static const struct net_proto_family unix_family_ops = {  static int __net_init unix_net_init(struct net *net)  { -	int error = -ENOMEM; +	int i;  	net->unx.sysctl_max_dgram_qlen = 10;  	if (unix_sysctl_register(net)) @@ -3572,18 +3584,44 @@ static int __net_init unix_net_init(struct net *net)  #ifdef CONFIG_PROC_FS  	if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, -			sizeof(struct seq_net_private))) { -		unix_sysctl_unregister(net); -		goto out; +			     sizeof(struct seq_net_private))) +		goto err_sysctl; +#endif + +	net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE, +					      sizeof(spinlock_t), GFP_KERNEL); +	if (!net->unx.table.locks) +		goto err_proc; + +	net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE, +						sizeof(struct hlist_head), +						GFP_KERNEL); +	if (!net->unx.table.buckets) +		goto free_locks; + +	for (i = 0; i < UNIX_HASH_SIZE; i++) { +		spin_lock_init(&net->unx.table.locks[i]); +		INIT_HLIST_HEAD(&net->unx.table.buckets[i]);  	} + +	return 0; + +free_locks: +	kvfree(net->unx.table.locks); +err_proc: +#ifdef CONFIG_PROC_FS +	remove_proc_entry("unix", net->proc_net); +err_sysctl:  #endif -	error = 0; +	unix_sysctl_unregister(net);  out: -	return error; +	return -ENOMEM;  }  static void __net_exit unix_net_exit(struct net *net)  { +	kvfree(net->unx.table.buckets); +	kvfree(net->unx.table.locks);  	unix_sysctl_unregister(net);  	remove_proc_entry("unix", net->proc_net);  } @@ -3671,8 +3709,10 @@ static int __init af_unix_init(void)  	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); -	for (i = 0; i < 2 * UNIX_HASH_SIZE; i++) -		spin_lock_init(&unix_table_locks[i]); +	for (i = 0; i < UNIX_HASH_SIZE / 2; i++) { +		spin_lock_init(&bsd_socket_locks[i]); +		INIT_HLIST_HEAD(&bsd_socket_buckets[i]); +	}  	rc = proto_register(&unix_dgram_proto, 1);  	if (rc != 0) {  | 
