diff options
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
| -rw-r--r-- | net/ipv4/tcp_ipv4.c | 80 | 
1 files changed, 47 insertions, 33 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2259114c7242..9a89b8deafae 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -84,7 +84,6 @@  #include <crypto/hash.h>  #include <linux/scatterlist.h> -int sysctl_tcp_tw_reuse __read_mostly;  int sysctl_tcp_low_latency __read_mostly;  #ifdef CONFIG_TCP_MD5SIG @@ -95,12 +94,12 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,  struct inet_hashinfo tcp_hashinfo;  EXPORT_SYMBOL(tcp_hashinfo); -static  __u32 tcp_v4_init_sequence(const struct sk_buff *skb) +static u32 tcp_v4_init_sequence(const struct sk_buff *skb, u32 *tsoff)  {  	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,  					  ip_hdr(skb)->saddr,  					  tcp_hdr(skb)->dest, -					  tcp_hdr(skb)->source); +					  tcp_hdr(skb)->source, tsoff);  }  int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) @@ -120,7 +119,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)  	   and use initial timestamp retrieved from peer table.  	 */  	if (tcptw->tw_ts_recent_stamp && -	    (!twp || (sysctl_tcp_tw_reuse && +	    (!twp || (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse &&  			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {  		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;  		if (tp->write_seq == 0) @@ -146,7 +145,9 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)  	struct flowi4 *fl4;  	struct rtable *rt;  	int err; +	u32 seq;  	struct ip_options_rcu *inet_opt; +	struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;  	if (addr_len < sizeof(struct sockaddr_in))  		return -EINVAL; @@ -197,7 +198,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)  			tp->write_seq	   = 0;  	} -	if (tcp_death_row.sysctl_tw_recycle && +	if (tcp_death_row->sysctl_tw_recycle &&  	    !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)  		tcp_fetch_timewait_stamp(sk, &rt->dst); @@ -216,7 +217,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)  	 * complete initialization after this.  	 */  	tcp_set_state(sk, TCP_SYN_SENT); -	err = inet_hash_connect(&tcp_death_row, sk); +	err = inet_hash_connect(tcp_death_row, sk);  	if (err)  		goto failure; @@ -232,18 +233,27 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)  	/* OK, now commit destination to socket.  */  	sk->sk_gso_type = SKB_GSO_TCPV4;  	sk_setup_caps(sk, &rt->dst); +	rt = NULL; -	if (!tp->write_seq && likely(!tp->repair)) -		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, -							   inet->inet_daddr, -							   inet->inet_sport, -							   usin->sin_port); +	if (likely(!tp->repair)) { +		seq = secure_tcp_sequence_number(inet->inet_saddr, +						 inet->inet_daddr, +						 inet->inet_sport, +						 usin->sin_port, +						 &tp->tsoffset); +		if (!tp->write_seq) +			tp->write_seq = seq; +	}  	inet->inet_id = tp->write_seq ^ jiffies; +	if (tcp_fastopen_defer_connect(sk, &err)) +		return err; +	if (err) +		goto failure; +  	err = tcp_connect(sk); -	rt = NULL;  	if (err)  		goto failure; @@ -442,7 +452,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)  			if (!sock_owned_by_user(sk)) {  				tcp_v4_mtu_reduced(sk);  			} else { -				if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags)) +				if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags))  					sock_hold(sk);  			}  			goto out; @@ -691,6 +701,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)  		     offsetof(struct inet_timewait_sock, tw_bound_dev_if));  	arg.tos = ip_hdr(skb)->tos; +	arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);  	local_bh_disable();  	ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),  			      skb, &TCP_SKB_CB(skb)->header.h4.opt, @@ -711,7 +722,7 @@ out:     outside socket context is ugly, certainly. What can I do?   */ -static void tcp_v4_send_ack(struct net *net, +static void tcp_v4_send_ack(const struct sock *sk,  			    struct sk_buff *skb, u32 seq, u32 ack,  			    u32 win, u32 tsval, u32 tsecr, int oif,  			    struct tcp_md5sig_key *key, @@ -726,6 +737,7 @@ static void tcp_v4_send_ack(struct net *net,  #endif  			];  	} rep; +	struct net *net = sock_net(sk);  	struct ip_reply_arg arg;  	memset(&rep.th, 0, sizeof(struct tcphdr)); @@ -775,6 +787,7 @@ static void tcp_v4_send_ack(struct net *net,  	if (oif)  		arg.bound_dev_if = oif;  	arg.tos = tos; +	arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);  	local_bh_disable();  	ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),  			      skb, &TCP_SKB_CB(skb)->header.h4.opt, @@ -790,7 +803,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)  	struct inet_timewait_sock *tw = inet_twsk(sk);  	struct tcp_timewait_sock *tcptw = tcp_twsk(sk); -	tcp_v4_send_ack(sock_net(sk), skb, +	tcp_v4_send_ack(sk, skb,  			tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,  			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,  			tcp_time_stamp + tcptw->tw_ts_offset, @@ -818,10 +831,10 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,  	 * exception of <SYN> segments, MUST be right-shifted by  	 * Rcv.Wind.Shift bits:  	 */ -	tcp_v4_send_ack(sock_net(sk), skb, seq, +	tcp_v4_send_ack(sk, skb, seq,  			tcp_rsk(req)->rcv_nxt,  			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, -			tcp_time_stamp, +			tcp_time_stamp + tcp_rsk(req)->ts_off,  			req->ts_recent,  			0,  			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, @@ -1315,10 +1328,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,  	tcp_ca_openreq_child(newsk, dst);  	tcp_sync_mss(newsk, dst_mtu(dst)); -	newtp->advmss = dst_metric_advmss(dst); -	if (tcp_sk(sk)->rx_opt.user_mss && -	    tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) -		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; +	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));  	tcp_initialize_rcv_mss(newsk); @@ -1552,8 +1562,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)  	 * It has been noticed pure SACK packets were sometimes dropped  	 * (if cooked by drivers without copybreak feature).  	 */ -	if (!skb->data_len) -		skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); +	skb_condense(skb);  	if (unlikely(sk_add_backlog(sk, skb, limit))) {  		bh_unlock_sock(sk); @@ -1813,7 +1822,6 @@ const struct inet_connection_sock_af_ops ipv4_specific = {  	.getsockopt	   = ip_getsockopt,  	.addr2sockaddr	   = inet_csk_addr2sockaddr,  	.sockaddr_len	   = sizeof(struct sockaddr_in), -	.bind_conflict	   = inet_csk_bind_conflict,  #ifdef CONFIG_COMPAT  	.compat_setsockopt = compat_ip_setsockopt,  	.compat_getsockopt = compat_ip_getsockopt, @@ -1884,9 +1892,7 @@ void tcp_v4_destroy_sock(struct sock *sk)  	tcp_free_fastopen_req(tp);  	tcp_saved_syn_free(tp); -	local_bh_disable();  	sk_sockets_allocated_dec(sk); -	local_bh_enable();  }  EXPORT_SYMBOL(tcp_v4_destroy_sock); @@ -1908,7 +1914,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)  	if (!sk) {  get_head:  		ilb = &tcp_hashinfo.listening_hash[st->bucket]; -		spin_lock_bh(&ilb->lock); +		spin_lock(&ilb->lock);  		sk = sk_head(&ilb->head);  		st->offset = 0;  		goto get_sk; @@ -1925,7 +1931,7 @@ get_sk:  		if (sk->sk_family == st->family)  			return sk;  	} -	spin_unlock_bh(&ilb->lock); +	spin_unlock(&ilb->lock);  	st->offset = 0;  	if (++st->bucket < INET_LHTABLE_SIZE)  		goto get_head; @@ -2133,7 +2139,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)  	switch (st->state) {  	case TCP_SEQ_STATE_LISTENING:  		if (v != SEQ_START_TOKEN) -			spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock); +			spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);  		break;  	case TCP_SEQ_STATE_ESTABLISHED:  		if (v) @@ -2225,7 +2231,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)  	int state;  	if (icsk->icsk_pending == ICSK_TIME_RETRANS || -	    icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || +	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||  	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {  		timer_active	= 1;  		timer_expires	= icsk->icsk_timeout; @@ -2372,6 +2378,7 @@ struct proto tcp_prot = {  	.shutdown		= tcp_shutdown,  	.setsockopt		= tcp_setsockopt,  	.getsockopt		= tcp_getsockopt, +	.keepalive		= tcp_set_keepalive,  	.recvmsg		= tcp_recvmsg,  	.sendmsg		= tcp_sendmsg,  	.sendpage		= tcp_sendpage, @@ -2415,7 +2422,7 @@ static void __net_exit tcp_sk_exit(struct net *net)  static int __net_init tcp_sk_init(struct net *net)  { -	int res, cpu; +	int res, cpu, cnt;  	net->ipv4.tcp_sk = alloc_percpu(struct sock *);  	if (!net->ipv4.tcp_sk) @@ -2452,6 +2459,14 @@ static int __net_init tcp_sk_init(struct net *net)  	net->ipv4.sysctl_tcp_orphan_retries = 0;  	net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;  	net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX; +	net->ipv4.sysctl_tcp_tw_reuse = 0; + +	cnt = tcp_hashinfo.ehash_mask + 1; +	net->ipv4.tcp_death_row.sysctl_tw_recycle = 0; +	net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2; +	net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo; + +	net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 256);  	return 0;  fail: @@ -2462,7 +2477,7 @@ fail:  static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)  { -	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET); +	inet_twsk_purge(&tcp_hashinfo, AF_INET);  }  static struct pernet_operations __net_initdata tcp_sk_ops = { @@ -2473,7 +2488,6 @@ static struct pernet_operations __net_initdata tcp_sk_ops = {  void __init tcp_v4_init(void)  { -	inet_hashinfo_init(&tcp_hashinfo);  	if (register_pernet_subsys(&tcp_sk_ops))  		panic("Failed to create the TCP control socket.\n");  }  | 
