diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 170 |
1 files changed, 65 insertions, 105 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bab7f0493098..c5d7656beeee 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1952,6 +1952,7 @@ void tcp_enter_loss(struct sock *sk) !after(tp->high_seq, tp->snd_una) || (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { tp->prior_ssthresh = tcp_current_ssthresh(sk); + tp->prior_cwnd = tp->snd_cwnd; tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); tcp_ca_event(sk, CA_EVENT_LOSS); tcp_init_undo(tp); @@ -4267,9 +4268,15 @@ static void tcp_sack_remove(struct tcp_sock *tp) tp->rx_opt.num_sacks = num_sacks; } +enum tcp_queue { + OOO_QUEUE, + RCV_QUEUE, +}; + /** * tcp_try_coalesce - try to merge skb to prior one * @sk: socket + * @dest: destination queue * @to: prior buffer * @from: buffer to add in queue * @fragstolen: pointer to boolean @@ -4281,6 +4288,7 @@ static void tcp_sack_remove(struct tcp_sock *tp) * Returns true if caller should free @from instead of queueing it */ static bool tcp_try_coalesce(struct sock *sk, + enum tcp_queue dest, struct sk_buff *to, struct sk_buff *from, bool *fragstolen) @@ -4302,6 +4310,15 @@ static bool tcp_try_coalesce(struct sock *sk, TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq; TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq; TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags; + + if (TCP_SKB_CB(from)->has_rxtstamp) { + TCP_SKB_CB(to)->has_rxtstamp = true; + if (dest == OOO_QUEUE) + TCP_SKB_CB(to)->swtstamp = TCP_SKB_CB(from)->swtstamp; + else + to->tstamp = from->tstamp; + } + return true; } @@ -4336,6 +4353,9 @@ static void tcp_ofo_queue(struct sock *sk) } p = rb_next(p); rb_erase(&skb->rbnode, &tp->out_of_order_queue); + /* Replace tstamp which was stomped by rbnode */ + if (TCP_SKB_CB(skb)->has_rxtstamp) + skb->tstamp = TCP_SKB_CB(skb)->swtstamp; if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) { SOCK_DEBUG(sk, "ofo packet was already received\n"); @@ -4347,7 +4367,8 @@ static void tcp_ofo_queue(struct sock *sk) TCP_SKB_CB(skb)->end_seq); tail = skb_peek_tail(&sk->sk_receive_queue); - eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen); + eaten = tail && tcp_try_coalesce(sk, RCV_QUEUE, + tail, skb, &fragstolen); tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; if (!eaten) @@ -4401,6 +4422,10 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) return; } + /* Stash tstamp to avoid being stomped on by rbnode */ + if (TCP_SKB_CB(skb)->has_rxtstamp) + TCP_SKB_CB(skb)->swtstamp = skb->tstamp; + /* Disable header prediction. */ tp->pred_flags = 0; inet_csk_schedule_ack(sk); @@ -4428,7 +4453,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) /* In the typical case, we are adding an skb to the end of the list. * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. */ - if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) { + if (tcp_try_coalesce(sk, OOO_QUEUE, tp->ooo_last_skb, + skb, &fragstolen)) { coalesce_done: tcp_grow_window(sk, skb); kfree_skb_partial(skb, fragstolen); @@ -4478,7 +4504,8 @@ coalesce_done: __kfree_skb(skb1); goto merge_right; } - } else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { + } else if (tcp_try_coalesce(sk, OOO_QUEUE, skb1, + skb, &fragstolen)) { goto coalesce_done; } p = &parent->rb_right; @@ -4529,7 +4556,8 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int __skb_pull(skb, hdrlen); eaten = (tail && - tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0; + tcp_try_coalesce(sk, RCV_QUEUE, tail, + skb, fragstolen)) ? 1 : 0; tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); if (!eaten) { __skb_queue_tail(&sk->sk_receive_queue, skb); @@ -4591,8 +4619,8 @@ err: static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); - bool fragstolen = false; - int eaten = -1; + bool fragstolen; + int eaten; if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { __kfree_skb(skb); @@ -4614,32 +4642,13 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) goto out_of_window; /* Ok. In sequence. In window. */ - if (tp->ucopy.task == current && - tp->copied_seq == tp->rcv_nxt && tp->ucopy.len && - sock_owned_by_user(sk) && !tp->urg_data) { - int chunk = min_t(unsigned int, skb->len, - tp->ucopy.len); - - __set_current_state(TASK_RUNNING); - - if (!skb_copy_datagram_msg(skb, 0, tp->ucopy.msg, chunk)) { - tp->ucopy.len -= chunk; - tp->copied_seq += chunk; - eaten = (chunk == skb->len); - tcp_rcv_space_adjust(sk); - } - } - - if (eaten <= 0) { queue_and_out: - if (eaten < 0) { - if (skb_queue_len(&sk->sk_receive_queue) == 0) - sk_forced_mem_schedule(sk, skb->truesize); - else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) - goto drop; - } - eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); - } + if (skb_queue_len(&sk->sk_receive_queue) == 0) + sk_forced_mem_schedule(sk, skb->truesize); + else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) + goto drop; + + eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); if (skb->len) tcp_event_data_recv(sk, skb); @@ -5189,26 +5198,6 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t } } -static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) -{ - struct tcp_sock *tp = tcp_sk(sk); - int chunk = skb->len - hlen; - int err; - - if (skb_csum_unnecessary(skb)) - err = skb_copy_datagram_msg(skb, hlen, tp->ucopy.msg, chunk); - else - err = skb_copy_and_csum_datagram_msg(skb, hlen, tp->ucopy.msg); - - if (!err) { - tp->ucopy.len -= chunk; - tp->copied_seq += chunk; - tcp_rcv_space_adjust(sk); - } - - return err; -} - /* Accept RST for rcv_nxt - 1 after a FIN. * When tcp connections are abruptly terminated from Mac OSX (via ^C), a * FIN is sent followed by a RST packet. The RST is sent with the same @@ -5361,8 +5350,9 @@ discard: * tcp_data_queue when everything is OK. */ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, - const struct tcphdr *th, unsigned int len) + const struct tcphdr *th) { + unsigned int len = skb->len; struct tcp_sock *tp = tcp_sk(sk); tcp_mstamp_refresh(tp); @@ -5448,56 +5438,28 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, int eaten = 0; bool fragstolen = false; - if (tp->ucopy.task == current && - tp->copied_seq == tp->rcv_nxt && - len - tcp_header_len <= tp->ucopy.len && - sock_owned_by_user(sk)) { - __set_current_state(TASK_RUNNING); - - if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) { - /* Predicted packet is in window by definition. - * seq == rcv_nxt and rcv_wup <= rcv_nxt. - * Hence, check seq<=rcv_wup reduces to: - */ - if (tcp_header_len == - (sizeof(struct tcphdr) + - TCPOLEN_TSTAMP_ALIGNED) && - tp->rcv_nxt == tp->rcv_wup) - tcp_store_ts_recent(tp); - - tcp_rcv_rtt_measure_ts(sk, skb); - - __skb_pull(skb, tcp_header_len); - tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); - NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPHPHITSTOUSER); - eaten = 1; - } - } - if (!eaten) { - if (tcp_checksum_complete(skb)) - goto csum_error; + if (tcp_checksum_complete(skb)) + goto csum_error; - if ((int)skb->truesize > sk->sk_forward_alloc) - goto step5; + if ((int)skb->truesize > sk->sk_forward_alloc) + goto step5; - /* Predicted packet is in window by definition. - * seq == rcv_nxt and rcv_wup <= rcv_nxt. - * Hence, check seq<=rcv_wup reduces to: - */ - if (tcp_header_len == - (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && - tp->rcv_nxt == tp->rcv_wup) - tcp_store_ts_recent(tp); + /* Predicted packet is in window by definition. + * seq == rcv_nxt and rcv_wup <= rcv_nxt. + * Hence, check seq<=rcv_wup reduces to: + */ + if (tcp_header_len == + (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && + tp->rcv_nxt == tp->rcv_wup) + tcp_store_ts_recent(tp); - tcp_rcv_rtt_measure_ts(sk, skb); + tcp_rcv_rtt_measure_ts(sk, skb); - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS); - /* Bulk data transfer: receiver */ - eaten = tcp_queue_rcv(sk, skb, tcp_header_len, - &fragstolen); - } + /* Bulk data transfer: receiver */ + eaten = tcp_queue_rcv(sk, skb, tcp_header_len, + &fragstolen); tcp_event_data_recv(sk, skb); @@ -5591,7 +5553,6 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) __tcp_fast_path_on(tp, tp->snd_wnd); else tp->pred_flags = 0; - } static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, @@ -6306,9 +6267,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); struct sock *fastopen_sk = NULL; - struct dst_entry *dst = NULL; struct request_sock *req; bool want_cookie = false; + struct dst_entry *dst; struct flowi fl; /* TW buckets are converted to open requests without @@ -6358,6 +6319,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (tmp_opt.tstamp_ok) tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb); + dst = af_ops->route_req(sk, &fl, req); + if (!dst) + goto drop_and_free; + if (!want_cookie && !isn) { /* Kill the following clause, if you dislike this way. */ if (!net->ipv4.sysctl_tcp_syncookies && @@ -6378,11 +6343,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, isn = af_ops->init_seq(skb); } - if (!dst) { - dst = af_ops->route_req(sk, &fl, req); - if (!dst) - goto drop_and_free; - } tcp_ecn_create_request(req, skb, sk, dst); @@ -6398,7 +6358,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_openreq_init_rwin(req, sk, dst); if (!want_cookie) { tcp_reqsk_record_syn(sk, req, skb); - fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst); + fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc); } if (fastopen_sk) { af_ops->send_synack(fastopen_sk, dst, &fl, req, |