diff options
Diffstat (limited to 'net/ipv4/tcp_output.c')
| -rw-r--r-- | net/ipv4/tcp_output.c | 73 | 
1 files changed, 52 insertions, 21 deletions
| diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 17a11e65e57f..025e25093984 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -86,6 +86,9 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)  	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {  		tcp_rearm_rto(sk);  	} + +	NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT, +		      tcp_skb_pcount(skb));  }  /* SND.NXT, if window was not shrunk. @@ -269,6 +272,7 @@ EXPORT_SYMBOL(tcp_select_initial_window);  static u16 tcp_select_window(struct sock *sk)  {  	struct tcp_sock *tp = tcp_sk(sk); +	u32 old_win = tp->rcv_wnd;  	u32 cur_win = tcp_receive_window(tp);  	u32 new_win = __tcp_select_window(sk); @@ -281,6 +285,9 @@ static u16 tcp_select_window(struct sock *sk)  		 *  		 * Relax Will Robinson.  		 */ +		if (new_win == 0) +			NET_INC_STATS(sock_net(sk), +				      LINUX_MIB_TCPWANTZEROWINDOWADV);  		new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);  	}  	tp->rcv_wnd = new_win; @@ -298,8 +305,14 @@ static u16 tcp_select_window(struct sock *sk)  	new_win >>= tp->rx_opt.rcv_wscale;  	/* If we advertise zero window, disable fast path. */ -	if (new_win == 0) +	if (new_win == 0) {  		tp->pred_flags = 0; +		if (old_win) +			NET_INC_STATS(sock_net(sk), +				      LINUX_MIB_TCPTOZEROWINDOWADV); +	} else if (old_win == 0) { +		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFROMZEROWINDOWADV); +	}  	return new_win;  } @@ -867,11 +880,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,  	if (clone_it) {  		const struct sk_buff *fclone = skb + 1; -		/* If congestion control is doing timestamping, we must -		 * take such a timestamp before we potentially clone/copy. -		 */ -		if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP) -			__net_timestamp(skb); +		skb_mstamp_get(&skb->skb_mstamp);  		if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&  			     fclone->fclone == SKB_FCLONE_CLONE)) @@ -884,6 +893,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,  			skb = skb_clone(skb, gfp_mask);  		if (unlikely(!skb))  			return -ENOBUFS; +		/* Our usage of tstamp should remain private */ +		skb->tstamp.tv64 = 0;  	}  	inet = inet_sk(sk); @@ -970,7 +981,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,  		TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,  			      tcp_skb_pcount(skb)); -	err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl); +	err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);  	if (likely(err <= 0))  		return err; @@ -1426,7 +1437,7 @@ static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,   *    With Minshall's modification: all sent small packets are ACKed.   */  static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp, -			    unsigned int mss_now, int nonagle) +			    int nonagle)  {  	return partial &&  		((nonagle & TCP_NAGLE_CORK) || @@ -1458,7 +1469,7 @@ static unsigned int tcp_mss_split_point(const struct sock *sk,  	 * to include this last segment in this skb.  	 * Otherwise, we'll split the skb at last MSS boundary  	 */ -	if (tcp_nagle_check(partial != 0, tp, mss_now, nonagle)) +	if (tcp_nagle_check(partial != 0, tp, nonagle))  		return needed - partial;  	return needed; @@ -1521,7 +1532,7 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf  	if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))  		return true; -	if (!tcp_nagle_check(skb->len < cur_mss, tp, cur_mss, nonagle)) +	if (!tcp_nagle_check(skb->len < cur_mss, tp, nonagle))  		return true;  	return false; @@ -1975,7 +1986,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)  	struct inet_connection_sock *icsk = inet_csk(sk);  	struct tcp_sock *tp = tcp_sk(sk);  	u32 timeout, tlp_time_stamp, rto_time_stamp; -	u32 rtt = tp->srtt >> 3; +	u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);  	if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS))  		return false; @@ -1997,7 +2008,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)  	/* Schedule a loss probe in 2*RTT for SACK capable connections  	 * in Open state, that are either limited by cwnd or application.  	 */ -	if (sysctl_tcp_early_retrans < 3 || !tp->srtt || !tp->packets_out || +	if (sysctl_tcp_early_retrans < 3 || !tp->srtt_us || !tp->packets_out ||  	    !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)  		return false; @@ -2082,7 +2093,6 @@ rearm_timer:  	if (likely(!err))  		NET_INC_STATS_BH(sock_net(sk),  				 LINUX_MIB_TCPLOSSPROBES); -	return;  }  /* Push out any pending frames which were held back due to @@ -2180,7 +2190,8 @@ u32 __tcp_select_window(struct sock *sk)  	 */  	int mss = icsk->icsk_ack.rcv_mss;  	int free_space = tcp_space(sk); -	int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk)); +	int allowed_space = tcp_full_space(sk); +	int full_space = min_t(int, tp->window_clamp, allowed_space);  	int window;  	if (mss > full_space) @@ -2193,7 +2204,19 @@ u32 __tcp_select_window(struct sock *sk)  			tp->rcv_ssthresh = min(tp->rcv_ssthresh,  					       4U * tp->advmss); -		if (free_space < mss) +		/* free_space might become our new window, make sure we don't +		 * increase it due to wscale. +		 */ +		free_space = round_down(free_space, 1 << tp->rx_opt.rcv_wscale); + +		/* if free space is less than mss estimate, or is below 1/16th +		 * of the maximum allowed, try to move to zero-window, else +		 * tcp_clamp_window() will grow rcv buf up to tcp_rmem[2], and +		 * new incoming data is dropped due to memory limits. +		 * With large window, mss test triggers way too late in order +		 * to announce zero window in time before rmem limit kicks in. +		 */ +		if (free_space < (allowed_space >> 4) || free_space < mss)  			return 0;  	} @@ -2431,7 +2454,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)  	if (err == 0) {  		/* Update global TCP statistics. */  		TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); - +		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) +			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);  		tp->total_retrans++;  #if FASTRETRANS_DEBUG > 0 @@ -2717,7 +2741,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,  	int tcp_header_size;  	int mss; -	skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); +	skb = sock_wmalloc(sk, MAX_TCP_HEADER, 1, GFP_ATOMIC);  	if (unlikely(!skb)) {  		dst_release(dst);  		return NULL; @@ -2787,7 +2811,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,  	th->window = htons(min(req->rcv_wnd, 65535U));  	tcp_options_write((__be32 *)(th + 1), tp, &opts);  	th->doff = (tcp_header_size >> 2); -	TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb)); +	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_OUTSEGS);  #ifdef CONFIG_TCP_MD5SIG  	/* Okay, we have all we need - do the md5 hash if needed */ @@ -2959,9 +2983,15 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)  	tcp_connect_queue_skb(sk, data);  	fo->copied = data->len; +	/* syn_data is about to be sent, we need to take current time stamps +	 * for the packets that are in write queue : SYN packet and DATA +	 */ +	skb_mstamp_get(&syn->skb_mstamp); +	data->skb_mstamp = syn->skb_mstamp; +  	if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) {  		tp->syn_data = (fo->copied > 0); -		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE); +		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);  		goto done;  	}  	syn_data = NULL; @@ -3049,8 +3079,9 @@ void tcp_send_delayed_ack(struct sock *sk)  		 * Do not use inet_csk(sk)->icsk_rto here, use results of rtt measurements  		 * directly.  		 */ -		if (tp->srtt) { -			int rtt = max(tp->srtt >> 3, TCP_DELACK_MIN); +		if (tp->srtt_us) { +			int rtt = max_t(int, usecs_to_jiffies(tp->srtt_us >> 3), +					TCP_DELACK_MIN);  			if (rtt < max_ato)  				max_ato = rtt; | 
