diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
| -rw-r--r-- | net/ipv4/tcp_input.c | 107 | 
1 files changed, 50 insertions, 57 deletions
| diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 52b5c2d0ecd0..976034f82320 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -105,7 +105,6 @@ int sysctl_tcp_abc __read_mostly;  #define FLAG_SYN_ACKED		0x10 /* This ACK acknowledged SYN.		*/  #define FLAG_DATA_SACKED	0x20 /* New SACK.				*/  #define FLAG_ECE		0x40 /* ECE in this ACK				*/ -#define FLAG_DATA_LOST		0x80 /* SACK detected data lossage.		*/  #define FLAG_SLOWPATH		0x100 /* Do not skip RFC checks for window update.*/  #define FLAG_ONLY_ORIG_SACKED	0x200 /* SACKs only non-rexmit sent before RTO */  #define FLAG_SND_UNA_ADVANCED	0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ @@ -322,7 +321,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)  	/* Check #1 */  	if (tp->rcv_ssthresh < tp->window_clamp &&  	    (int)tp->rcv_ssthresh < tcp_space(sk) && -	    !tcp_memory_pressure) { +	    !sk_under_memory_pressure(sk)) {  		int incr;  		/* Check #2. Increase window, if skb with such overhead @@ -411,8 +410,8 @@ static void tcp_clamp_window(struct sock *sk)  	if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&  	    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && -	    !tcp_memory_pressure && -	    atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { +	    !sk_under_memory_pressure(sk) && +	    sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {  		sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),  				    sysctl_tcp_rmem[2]);  	} @@ -865,13 +864,13 @@ static void tcp_disable_fack(struct tcp_sock *tp)  	/* RFC3517 uses different metric in lost marker => reset on change */  	if (tcp_is_fack(tp))  		tp->lost_skb_hint = NULL; -	tp->rx_opt.sack_ok &= ~2; +	tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED;  }  /* Take a notice that peer is sending D-SACKs */  static void tcp_dsack_seen(struct tcp_sock *tp)  { -	tp->rx_opt.sack_ok |= 4; +	tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;  }  /* Initialize metrics on socket. */ @@ -1040,13 +1039,11 @@ static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,   * These 6 states form finite state machine, controlled by the following events:   * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue())   * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue()) - * 3. Loss detection event of one of three flavors: + * 3. Loss detection event of two flavors:   *	A. Scoreboard estimator decided the packet is lost.   *	   A'. Reno "three dupacks" marks head of queue lost. - *	   A''. Its FACK modfication, head until snd.fack is lost. - *	B. SACK arrives sacking data transmitted after never retransmitted - *	   hole was sent out. - *	C. SACK arrives sacking SND.NXT at the moment, when the + *	   A''. Its FACK modification, head until snd.fack is lost. + *	B. SACK arrives sacking SND.NXT at the moment, when the   *	   segment was retransmitted.   * 4. D-SACK added new rule: D-SACK changes any tag to S.   * @@ -1153,7 +1150,7 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,  }  /* Check for lost retransmit. This superb idea is borrowed from "ratehalving". - * Event "C". Later note: FACK people cheated me again 8), we have to account + * Event "B". Later note: FACK people cheated me again 8), we have to account   * for reordering! Ugly, but should help.   *   * Search retransmitted skbs from write_queue that were sent when snd_nxt was @@ -1844,10 +1841,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,  		if (found_dup_sack && ((i + 1) == first_sack_index))  			next_dup = &sp[i + 1]; -		/* Event "B" in the comment above. */ -		if (after(end_seq, tp->high_seq)) -			state.flag |= FLAG_DATA_LOST; -  		/* Skip too early cached blocks */  		while (tcp_sack_cache_ok(tp, cache) &&  		       !before(start_seq, cache->end_seq)) @@ -2515,8 +2508,11 @@ static void tcp_timeout_skbs(struct sock *sk)  	tcp_verify_left_out(tp);  } -/* Mark head of queue up as lost. With RFC3517 SACK, the packets is - * is against sacked "cnt", otherwise it's against facked "cnt" +/* Detect loss in event "A" above by marking head of queue up as lost. + * For FACK or non-SACK(Reno) senders, the first "packets" number of segments + * are considered lost. For RFC3517 SACK, a segment is considered lost if it + * has at least tp->reordering SACKed seqments above it; "packets" refers to + * the maximum SACKed segments to pass before reaching this limit.   */  static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)  { @@ -2525,6 +2521,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)  	int cnt, oldcnt;  	int err;  	unsigned int mss; +	/* Use SACK to deduce losses of new sequences sent during recovery */ +	const u32 loss_high = tcp_is_sack(tp) ?  tp->snd_nxt : tp->high_seq;  	WARN_ON(packets > tp->packets_out);  	if (tp->lost_skb_hint) { @@ -2546,7 +2544,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)  		tp->lost_skb_hint = skb;  		tp->lost_cnt_hint = cnt; -		if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq)) +		if (after(TCP_SKB_CB(skb)->end_seq, loss_high))  			break;  		oldcnt = cnt; @@ -2663,7 +2661,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)  		       tp->snd_ssthresh, tp->prior_ssthresh,  		       tp->packets_out);  	} -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6)  	else if (sk->sk_family == AF_INET6) {  		struct ipv6_pinfo *np = inet6_sk(sk);  		printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", @@ -2858,7 +2856,7 @@ static void tcp_try_keep_open(struct sock *sk)  	struct tcp_sock *tp = tcp_sk(sk);  	int state = TCP_CA_Open; -	if (tcp_left_out(tp) || tcp_any_retrans_done(sk) || tp->undo_marker) +	if (tcp_left_out(tp) || tcp_any_retrans_done(sk))  		state = TCP_CA_Disorder;  	if (inet_csk(sk)->icsk_ca_state != state) { @@ -2881,7 +2879,8 @@ static void tcp_try_to_open(struct sock *sk, int flag)  	if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {  		tcp_try_keep_open(sk); -		tcp_moderate_cwnd(tp); +		if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open) +			tcp_moderate_cwnd(tp);  	} else {  		tcp_cwnd_down(sk, flag);  	} @@ -3009,11 +3008,11 @@ static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,   * tcp_xmit_retransmit_queue().   */  static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, -				  int newly_acked_sacked, int flag) +				  int newly_acked_sacked, bool is_dupack, +				  int flag)  {  	struct inet_connection_sock *icsk = inet_csk(sk);  	struct tcp_sock *tp = tcp_sk(sk); -	int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));  	int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&  				    (tcp_fackets_out(tp) > tp->reordering));  	int fast_rexmit = 0, mib_idx; @@ -3032,19 +3031,10 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,  	if (tcp_check_sack_reneging(sk, flag))  		return; -	/* C. Process data loss notification, provided it is valid. */ -	if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) && -	    before(tp->snd_una, tp->high_seq) && -	    icsk->icsk_ca_state != TCP_CA_Open && -	    tp->fackets_out > tp->reordering) { -		tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0); -		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS); -	} - -	/* D. Check consistency of the current state. */ +	/* C. Check consistency of the current state. */  	tcp_verify_left_out(tp); -	/* E. Check state exit conditions. State can be terminated +	/* D. Check state exit conditions. State can be terminated  	 *    when high_seq is ACKed. */  	if (icsk->icsk_ca_state == TCP_CA_Open) {  		WARN_ON(tp->retrans_out != 0); @@ -3066,17 +3056,6 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,  			}  			break; -		case TCP_CA_Disorder: -			tcp_try_undo_dsack(sk); -			if (!tp->undo_marker || -			    /* For SACK case do not Open to allow to undo -			     * catching for all duplicate ACKs. */ -			    tcp_is_reno(tp) || tp->snd_una != tp->high_seq) { -				tp->undo_marker = 0; -				tcp_set_ca_state(sk, TCP_CA_Open); -			} -			break; -  		case TCP_CA_Recovery:  			if (tcp_is_reno(tp))  				tcp_reset_reno_sack(tp); @@ -3087,7 +3066,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,  		}  	} -	/* F. Process state. */ +	/* E. Process state. */  	switch (icsk->icsk_ca_state) {  	case TCP_CA_Recovery:  		if (!(flag & FLAG_SND_UNA_ADVANCED)) { @@ -3117,7 +3096,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,  				tcp_add_reno_sack(sk);  		} -		if (icsk->icsk_ca_state == TCP_CA_Disorder) +		if (icsk->icsk_ca_state <= TCP_CA_Disorder)  			tcp_try_undo_dsack(sk);  		if (!tcp_time_to_recover(sk)) { @@ -3681,10 +3660,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)  	u32 prior_snd_una = tp->snd_una;  	u32 ack_seq = TCP_SKB_CB(skb)->seq;  	u32 ack = TCP_SKB_CB(skb)->ack_seq; +	bool is_dupack = false;  	u32 prior_in_flight;  	u32 prior_fackets;  	int prior_packets;  	int prior_sacked = tp->sacked_out; +	int pkts_acked = 0;  	int newly_acked_sacked = 0;  	int frto_cwnd = 0; @@ -3757,6 +3738,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)  	/* See if we can take anything off of the retransmit queue. */  	flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); +	pkts_acked = prior_packets - tp->packets_out;  	newly_acked_sacked = (prior_packets - prior_sacked) -  			     (tp->packets_out - tp->sacked_out); @@ -3771,8 +3753,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)  		if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&  		    tcp_may_raise_cwnd(sk, flag))  			tcp_cong_avoid(sk, ack, prior_in_flight); -		tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, -				      newly_acked_sacked, flag); +		is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); +		tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, +				      is_dupack, flag);  	} else {  		if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)  			tcp_cong_avoid(sk, ack, prior_in_flight); @@ -3784,6 +3767,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)  	return 1;  no_queue: +	/* If data was DSACKed, see if we can undo a cwnd reduction. */ +	if (flag & FLAG_DSACKING_ACK) +		tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, +				      is_dupack, flag);  	/* If this ack opens up a zero window, clear backoff.  It was  	 * being used to time the probes, and is probably far higher than  	 * it needs to be for normal retransmission. @@ -3797,10 +3784,14 @@ invalid_ack:  	return -1;  old_ack: +	/* If data was SACKed, tag it and see if we should send more data. +	 * If data was DSACKed, see if we can undo a cwnd reduction. +	 */  	if (TCP_SKB_CB(skb)->sacked) { -		tcp_sacktag_write_queue(sk, skb, prior_snd_una); -		if (icsk->icsk_ca_state == TCP_CA_Open) -			tcp_try_keep_open(sk); +		flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); +		newly_acked_sacked = tp->sacked_out - prior_sacked; +		tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, +				      is_dupack, flag);  	}  	SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt); @@ -3876,7 +3867,7 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o  			case TCPOPT_SACK_PERM:  				if (opsize == TCPOLEN_SACK_PERM && th->syn &&  				    !estab && sysctl_tcp_sack) { -					opt_rx->sack_ok = 1; +					opt_rx->sack_ok = TCP_SACK_SEEN;  					tcp_sack_reset(opt_rx);  				}  				break; @@ -4864,7 +4855,7 @@ static int tcp_prune_queue(struct sock *sk)  	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)  		tcp_clamp_window(sk); -	else if (tcp_memory_pressure) +	else if (sk_under_memory_pressure(sk))  		tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);  	tcp_collapse_ofo_queue(sk); @@ -4930,11 +4921,11 @@ static int tcp_should_expand_sndbuf(const struct sock *sk)  		return 0;  	/* If we are under global TCP memory pressure, do not expand.  */ -	if (tcp_memory_pressure) +	if (sk_under_memory_pressure(sk))  		return 0;  	/* If we are under soft global TCP memory pressure, do not expand.  */ -	if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0]) +	if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0))  		return 0;  	/* If we filled the congestion window, do not expand.  */ @@ -5809,6 +5800,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,  			goto discard;  		if (th->syn) { +			if (th->fin) +				goto discard;  			if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)  				return 1; | 
