diff options
Diffstat (limited to 'net/ipv4/tcp_timer.c')
| -rw-r--r-- | net/ipv4/tcp_timer.c | 93 | 
1 files changed, 62 insertions, 31 deletions
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 470f581eedd4..984ab4a0421e 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -26,14 +26,15 @@  static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)  {  	struct inet_connection_sock *icsk = inet_csk(sk); -	u32 elapsed, start_ts; +	u32 elapsed, start_ts, user_timeout;  	s32 remaining;  	start_ts = tcp_sk(sk)->retrans_stamp; -	if (!icsk->icsk_user_timeout) +	user_timeout = READ_ONCE(icsk->icsk_user_timeout); +	if (!user_timeout)  		return icsk->icsk_rto;  	elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts; -	remaining = icsk->icsk_user_timeout - elapsed; +	remaining = user_timeout - elapsed;  	if (remaining <= 0)  		return 1; /* user timeout has passed; fire ASAP */ @@ -43,16 +44,17 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)  u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when)  {  	struct inet_connection_sock *icsk = inet_csk(sk); -	u32 remaining; +	u32 remaining, user_timeout;  	s32 elapsed; -	if (!icsk->icsk_user_timeout || !icsk->icsk_probes_tstamp) +	user_timeout = READ_ONCE(icsk->icsk_user_timeout); +	if (!user_timeout || !icsk->icsk_probes_tstamp)  		return when;  	elapsed = tcp_jiffies32 - icsk->icsk_probes_tstamp;  	if (unlikely(elapsed < 0))  		elapsed = 0; -	remaining = msecs_to_jiffies(icsk->icsk_user_timeout) - elapsed; +	remaining = msecs_to_jiffies(user_timeout) - elapsed;  	remaining = max_t(u32, remaining, TCP_TIMEOUT_MIN);  	return min_t(u32, remaining, when); @@ -239,7 +241,8 @@ static int tcp_write_timeout(struct sock *sk)  	if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {  		if (icsk->icsk_retransmits)  			__dst_negative_advice(sk); -		retry_until = icsk->icsk_syn_retries ? : +		/* Paired with WRITE_ONCE() in tcp_sock_set_syncnt() */ +		retry_until = READ_ONCE(icsk->icsk_syn_retries) ? :  			READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);  		max_retransmits = retry_until; @@ -269,7 +272,7 @@ static int tcp_write_timeout(struct sock *sk)  	}  	if (!expired)  		expired = retransmits_timed_out(sk, retry_until, -						icsk->icsk_user_timeout); +						READ_ONCE(icsk->icsk_user_timeout));  	tcp_fastopen_active_detect_blackhole(sk, expired);  	if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG)) @@ -383,13 +386,16 @@ static void tcp_probe_timer(struct sock *sk)  	 * corresponding system limit. We also implement similar policy when  	 * we use RTO to probe window in tcp_retransmit_timer().  	 */ -	if (!icsk->icsk_probes_tstamp) +	if (!icsk->icsk_probes_tstamp) {  		icsk->icsk_probes_tstamp = tcp_jiffies32; -	else if (icsk->icsk_user_timeout && -		 (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >= -		 msecs_to_jiffies(icsk->icsk_user_timeout)) -		goto abort; +	} else { +		u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout); +		if (user_timeout && +		    (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >= +		     msecs_to_jiffies(user_timeout)) +		goto abort; +	}  	max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2);  	if (sock_flag(sk, SOCK_DEAD)) {  		const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX; @@ -421,8 +427,10 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)  	req->rsk_ops->syn_ack_timeout(req); -	/* add one more retry for fastopen */ -	max_retries = icsk->icsk_syn_retries ? : +	/* Add one more retry for fastopen. +	 * Paired with WRITE_ONCE() in tcp_sock_set_syncnt() +	 */ +	max_retries = READ_ONCE(icsk->icsk_syn_retries) ? :  		READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_synack_retries) + 1;  	if (req->num_timeout >= max_retries) { @@ -446,6 +454,22 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)  			  req->timeout << req->num_timeout, TCP_RTO_MAX);  } +static bool tcp_rtx_probe0_timed_out(const struct sock *sk, +				     const struct sk_buff *skb) +{ +	const struct tcp_sock *tp = tcp_sk(sk); +	const int timeout = TCP_RTO_MAX * 2; +	u32 rcv_delta, rtx_delta; + +	rcv_delta = inet_csk(sk)->icsk_timeout - tp->rcv_tstamp; +	if (rcv_delta <= timeout) +		return false; + +	rtx_delta = (u32)msecs_to_jiffies(tcp_time_stamp(tp) - +			(tp->retrans_stamp ?: tcp_skb_timestamp(skb))); + +	return rtx_delta > timeout; +}  /**   *  tcp_retransmit_timer() - The TCP retransmit timeout handler @@ -495,23 +519,26 @@ void tcp_retransmit_timer(struct sock *sk)  		 * we cannot allow such beasts to hang infinitely.  		 */  		struct inet_sock *inet = inet_sk(sk); +		u32 rtx_delta; + +		rtx_delta = tcp_time_stamp(tp) - (tp->retrans_stamp ?: tcp_skb_timestamp(skb));  		if (sk->sk_family == AF_INET) { -			net_dbg_ratelimited("Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", -					    &inet->inet_daddr, -					    ntohs(inet->inet_dport), -					    inet->inet_num, -					    tp->snd_una, tp->snd_nxt); +			net_dbg_ratelimited("Probing zero-window on %pI4:%u/%u, seq=%u:%u, recv %ums ago, lasting %ums\n", +				&inet->inet_daddr, ntohs(inet->inet_dport), +				inet->inet_num, tp->snd_una, tp->snd_nxt, +				jiffies_to_msecs(jiffies - tp->rcv_tstamp), +				rtx_delta);  		}  #if IS_ENABLED(CONFIG_IPV6)  		else if (sk->sk_family == AF_INET6) { -			net_dbg_ratelimited("Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", -					    &sk->sk_v6_daddr, -					    ntohs(inet->inet_dport), -					    inet->inet_num, -					    tp->snd_una, tp->snd_nxt); +			net_dbg_ratelimited("Probing zero-window on %pI6:%u/%u, seq=%u:%u, recv %ums ago, lasting %ums\n", +				&sk->sk_v6_daddr, ntohs(inet->inet_dport), +				inet->inet_num, tp->snd_una, tp->snd_nxt, +				jiffies_to_msecs(jiffies - tp->rcv_tstamp), +				rtx_delta);  		}  #endif -		if (tcp_jiffies32 - tp->rcv_tstamp > TCP_RTO_MAX) { +		if (tcp_rtx_probe0_timed_out(sk, skb)) {  			tcp_write_err(sk);  			goto out;  		} @@ -591,7 +618,9 @@ out_reset_timer:  	    tcp_stream_is_thin(tp) &&  	    icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {  		icsk->icsk_backoff = 0; -		icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX); +		icsk->icsk_rto = clamp(__tcp_set_rto(tp), +				       tcp_rto_min(sk), +				       TCP_RTO_MAX);  	} else if (sk->sk_state != TCP_SYN_SENT ||  		   icsk->icsk_backoff >  		   READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts)) { @@ -706,7 +735,7 @@ static void tcp_keepalive_timer (struct timer_list *t)  	tcp_mstamp_refresh(tp);  	if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) { -		if (tp->linger2 >= 0) { +		if (READ_ONCE(tp->linger2) >= 0) {  			const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;  			if (tmo > 0) { @@ -731,13 +760,15 @@ static void tcp_keepalive_timer (struct timer_list *t)  	elapsed = keepalive_time_elapsed(tp);  	if (elapsed >= keepalive_time_when(tp)) { +		u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout); +  		/* If the TCP_USER_TIMEOUT option is enabled, use that  		 * to determine when to timeout instead.  		 */ -		if ((icsk->icsk_user_timeout != 0 && -		    elapsed >= msecs_to_jiffies(icsk->icsk_user_timeout) && +		if ((user_timeout != 0 && +		    elapsed >= msecs_to_jiffies(user_timeout) &&  		    icsk->icsk_probes_out > 0) || -		    (icsk->icsk_user_timeout == 0 && +		    (user_timeout == 0 &&  		    icsk->icsk_probes_out >= keepalive_probes(tp))) {  			tcp_send_active_reset(sk, GFP_ATOMIC);  			tcp_write_err(sk);  | 
