diff options
Diffstat (limited to 'net/mptcp/protocol.c')
| -rw-r--r-- | net/mptcp/protocol.c | 140 | 
1 files changed, 78 insertions, 62 deletions
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 08dc53f56bc2..67311e7d5b21 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -90,8 +90,8 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)  	if (err)  		return err; -	msk->first = ssock->sk; -	msk->subflow = ssock; +	WRITE_ONCE(msk->first, ssock->sk); +	WRITE_ONCE(msk->subflow, ssock);  	subflow = mptcp_subflow_ctx(ssock->sk);  	list_add(&subflow->node, &msk->conn_list);  	sock_hold(ssock->sk); @@ -603,7 +603,7 @@ static bool mptcp_check_data_fin(struct sock *sk)  		WRITE_ONCE(msk->ack_seq, msk->ack_seq + 1);  		WRITE_ONCE(msk->rcv_data_fin, 0); -		sk->sk_shutdown |= RCV_SHUTDOWN; +		WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN);  		smp_mb__before_atomic(); /* SHUTDOWN must be visible first */  		switch (sk->sk_state) { @@ -825,6 +825,13 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)  	mptcp_data_unlock(sk);  } +static void mptcp_subflow_joined(struct mptcp_sock *msk, struct sock *ssk) +{ +	mptcp_subflow_ctx(ssk)->map_seq = READ_ONCE(msk->ack_seq); +	WRITE_ONCE(msk->allow_infinite_fallback, false); +	mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC); +} +  static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)  {  	struct sock *sk = (struct sock *)msk; @@ -839,6 +846,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)  		mptcp_sock_graft(ssk, sk->sk_socket);  	mptcp_sockopt_sync_locked(msk, ssk); +	mptcp_subflow_joined(msk, ssk);  	return true;  } @@ -910,7 +918,7 @@ static void mptcp_check_for_eof(struct mptcp_sock *msk)  		/* hopefully temporary hack: propagate shutdown status  		 * to msk, when all subflows agree on it  		 */ -		sk->sk_shutdown |= RCV_SHUTDOWN; +		WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN);  		smp_mb__before_atomic(); /* SHUTDOWN must be visible first */  		sk->sk_data_ready(sk); @@ -1702,7 +1710,6 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,  	lock_sock(ssk);  	msg->msg_flags |= MSG_DONTWAIT; -	msk->connect_flags = O_NONBLOCK;  	msk->fastopening = 1;  	ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL);  	msk->fastopening = 0; @@ -2283,7 +2290,7 @@ static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk)  {  	if (msk->subflow) {  		iput(SOCK_INODE(msk->subflow)); -		msk->subflow = NULL; +		WRITE_ONCE(msk->subflow, NULL);  	}  } @@ -2420,7 +2427,7 @@ out_release:  	sock_put(ssk);  	if (ssk == msk->first) -		msk->first = NULL; +		WRITE_ONCE(msk->first, NULL);  out:  	if (ssk == msk->last_snd) @@ -2527,7 +2534,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)  	}  	inet_sk_state_store(sk, TCP_CLOSE); -	sk->sk_shutdown = SHUTDOWN_MASK; +	WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);  	smp_mb__before_atomic(); /* SHUTDOWN must be visible first */  	set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags); @@ -2721,7 +2728,7 @@ static int __mptcp_init_sock(struct sock *sk)  	WRITE_ONCE(msk->rmem_released, 0);  	msk->timer_ival = TCP_RTO_MIN; -	msk->first = NULL; +	WRITE_ONCE(msk->first, NULL);  	inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;  	WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));  	WRITE_ONCE(msk->allow_infinite_fallback, true); @@ -2959,7 +2966,7 @@ bool __mptcp_close(struct sock *sk, long timeout)  	bool do_cancel_work = false;  	int subflows_alive = 0; -	sk->sk_shutdown = SHUTDOWN_MASK; +	WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);  	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) {  		mptcp_listen_inuse_dec(sk); @@ -3039,7 +3046,7 @@ static void mptcp_close(struct sock *sk, long timeout)  	sock_put(sk);  } -void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk) +static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)  {  #if IS_ENABLED(CONFIG_MPTCP_IPV6)  	const struct ipv6_pinfo *ssk6 = inet6_sk(ssk); @@ -3102,7 +3109,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)  	mptcp_pm_data_reset(msk);  	mptcp_ca_reset(sk); -	sk->sk_shutdown = 0; +	WRITE_ONCE(sk->sk_shutdown, 0);  	sk_error_report(sk);  	return 0;  } @@ -3116,9 +3123,10 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)  }  #endif -struct sock *mptcp_sk_clone(const struct sock *sk, -			    const struct mptcp_options_received *mp_opt, -			    struct request_sock *req) +struct sock *mptcp_sk_clone_init(const struct sock *sk, +				 const struct mptcp_options_received *mp_opt, +				 struct sock *ssk, +				 struct request_sock *req)  {  	struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);  	struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC); @@ -3137,7 +3145,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,  	msk = mptcp_sk(nsk);  	msk->local_key = subflow_req->local_key;  	msk->token = subflow_req->token; -	msk->subflow = NULL; +	WRITE_ONCE(msk->subflow, NULL);  	msk->in_accept_queue = 1;  	WRITE_ONCE(msk->fully_established, false);  	if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD) @@ -3150,10 +3158,30 @@ struct sock *mptcp_sk_clone(const struct sock *sk,  	msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;  	sock_reset_flag(nsk, SOCK_RCU_FREE); -	/* will be fully established after successful MPC subflow creation */ -	inet_sk_state_store(nsk, TCP_SYN_RECV); -  	security_inet_csk_clone(nsk, req); + +	/* this can't race with mptcp_close(), as the msk is +	 * not yet exposted to user-space +	 */ +	inet_sk_state_store(nsk, TCP_ESTABLISHED); + +	/* The msk maintain a ref to each subflow in the connections list */ +	WRITE_ONCE(msk->first, ssk); +	list_add(&mptcp_subflow_ctx(ssk)->node, &msk->conn_list); +	sock_hold(ssk); + +	/* new mpc subflow takes ownership of the newly +	 * created mptcp socket +	 */ +	mptcp_token_accept(subflow_req, msk); + +	/* set msk addresses early to ensure mptcp_pm_get_local_id() +	 * uses the correct data +	 */ +	mptcp_copy_inaddrs(nsk, ssk); +	mptcp_propagate_sndbuf(nsk, ssk); + +	mptcp_rcv_space_init(msk, ssk);  	bh_unlock_sock(nsk);  	/* note: the newly allocated socket refcount is 2 now */ @@ -3185,7 +3213,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,  	struct socket *listener;  	struct sock *newsk; -	listener = msk->subflow; +	listener = READ_ONCE(msk->subflow);  	if (WARN_ON_ONCE(!listener)) {  		*err = -EINVAL;  		return NULL; @@ -3465,14 +3493,16 @@ bool mptcp_finish_join(struct sock *ssk)  		return false;  	} -	if (!list_empty(&subflow->node)) -		goto out; +	/* active subflow, already present inside the conn_list */ +	if (!list_empty(&subflow->node)) { +		mptcp_subflow_joined(msk, ssk); +		return true; +	}  	if (!mptcp_pm_allow_new_subflow(msk))  		goto err_prohibited; -	/* active connections are already on conn_list. -	 * If we can't acquire msk socket lock here, let the release callback +	/* If we can't acquire msk socket lock here, let the release callback  	 * handle it  	 */  	mptcp_data_lock(parent); @@ -3495,11 +3525,6 @@ err_prohibited:  		return false;  	} -	subflow->map_seq = READ_ONCE(msk->ack_seq); -	WRITE_ONCE(msk->allow_infinite_fallback, false); - -out: -	mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC);  	return true;  } @@ -3617,9 +3642,9 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)  	 * acquired the subflow socket lock, too.  	 */  	if (msk->fastopening) -		err = __inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags, 1); +		err = __inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK, 1);  	else -		err = inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags); +		err = inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK);  	inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;  	/* on successful connect, the msk state will be moved to established by @@ -3632,12 +3657,10 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)  	mptcp_copy_inaddrs(sk, ssock->sk); -	/* unblocking connect, mptcp-level inet_stream_connect will error out -	 * without changing the socket state, update it here. +	/* silence EINPROGRESS and let the caller inet_stream_connect +	 * handle the connection in progress  	 */ -	if (err == -EINPROGRESS) -		sk->sk_socket->state = ssock->state; -	return err; +	return 0;  }  static struct proto mptcp_prot = { @@ -3696,18 +3719,6 @@ unlock:  	return err;  } -static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, -				int addr_len, int flags) -{ -	int ret; - -	lock_sock(sock->sk); -	mptcp_sk(sock->sk)->connect_flags = flags; -	ret = __inet_stream_connect(sock, uaddr, addr_len, flags, 0); -	release_sock(sock->sk); -	return ret; -} -  static int mptcp_listen(struct socket *sock, int backlog)  {  	struct mptcp_sock *msk = mptcp_sk(sock->sk); @@ -3751,10 +3762,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,  	pr_debug("msk=%p", msk); -	/* buggy applications can call accept on socket states other then LISTEN +	/* Buggy applications can call accept on socket states other then LISTEN  	 * but no need to allocate the first subflow just to error out.  	 */ -	ssock = msk->subflow; +	ssock = READ_ONCE(msk->subflow);  	if (!ssock)  		return -EINVAL; @@ -3800,9 +3811,6 @@ static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)  {  	struct sock *sk = (struct sock *)msk; -	if (unlikely(sk->sk_shutdown & SEND_SHUTDOWN)) -		return EPOLLOUT | EPOLLWRNORM; -  	if (sk_stream_is_writeable(sk))  		return EPOLLOUT | EPOLLWRNORM; @@ -3820,6 +3828,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,  	struct sock *sk = sock->sk;  	struct mptcp_sock *msk;  	__poll_t mask = 0; +	u8 shutdown;  	int state;  	msk = mptcp_sk(sk); @@ -3828,23 +3837,30 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,  	state = inet_sk_state_load(sk);  	pr_debug("msk=%p state=%d flags=%lx", msk, state, msk->flags);  	if (state == TCP_LISTEN) { -		if (WARN_ON_ONCE(!msk->subflow || !msk->subflow->sk)) +		struct socket *ssock = READ_ONCE(msk->subflow); + +		if (WARN_ON_ONCE(!ssock || !ssock->sk))  			return 0; -		return inet_csk_listen_poll(msk->subflow->sk); +		return inet_csk_listen_poll(ssock->sk);  	} +	shutdown = READ_ONCE(sk->sk_shutdown); +	if (shutdown == SHUTDOWN_MASK || state == TCP_CLOSE) +		mask |= EPOLLHUP; +	if (shutdown & RCV_SHUTDOWN) +		mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; +  	if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) {  		mask |= mptcp_check_readable(msk); -		mask |= mptcp_check_writeable(msk); +		if (shutdown & SEND_SHUTDOWN) +			mask |= EPOLLOUT | EPOLLWRNORM; +		else +			mask |= mptcp_check_writeable(msk);  	} else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) {  		/* cf tcp_poll() note about TFO */  		mask |= EPOLLOUT | EPOLLWRNORM;  	} -	if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE) -		mask |= EPOLLHUP; -	if (sk->sk_shutdown & RCV_SHUTDOWN) -		mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;  	/* This barrier is coupled with smp_wmb() in __mptcp_error_report() */  	smp_rmb(); @@ -3859,7 +3875,7 @@ static const struct proto_ops mptcp_stream_ops = {  	.owner		   = THIS_MODULE,  	.release	   = inet_release,  	.bind		   = mptcp_bind, -	.connect	   = mptcp_stream_connect, +	.connect	   = inet_stream_connect,  	.socketpair	   = sock_no_socketpair,  	.accept		   = mptcp_stream_accept,  	.getname	   = inet_getname, @@ -3954,7 +3970,7 @@ static const struct proto_ops mptcp_v6_stream_ops = {  	.owner		   = THIS_MODULE,  	.release	   = inet6_release,  	.bind		   = mptcp_bind, -	.connect	   = mptcp_stream_connect, +	.connect	   = inet_stream_connect,  	.socketpair	   = sock_no_socketpair,  	.accept		   = mptcp_stream_accept,  	.getname	   = inet6_getname,  | 
