diff options
author | Eric Dumazet <edumazet@google.com> | 2018-10-15 09:37:53 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-10-15 22:56:42 -0700 |
commit | 76a9ebe811fb3d0605cb084f1ae6be5610541865 (patch) | |
tree | 1088c87b2590940390c0ee46df04ef6c951b7eae /net/ipv4/tcp_output.c | |
parent | 5f6188a8003d080e3753b8f14f4a5a2325ae1ff6 (diff) |
net: extend sk_pacing_rate to unsigned long
sk_pacing_rate has beed introduced as a u32 field in 2013,
effectively limiting per flow pacing to 34Gbit.
We believe it is time to allow TCP to pace high speed flows
on 64bit hosts, as we now can reach 100Gbit on one TCP flow.
This patch adds no cost for 32bit kernels.
The tcpi_pacing_rate and tcpi_max_pacing_rate were already
exported as 64bit, so iproute2/ss command require no changes.
Unfortunately the SO_MAX_PACING_RATE socket option will stay
32bit and we will need to add a new option to let applications
control high pacing rates.
State Recv-Q Send-Q Local Address:Port Peer Address:Port
ESTAB 0 1787144 10.246.9.76:49992 10.246.9.77:36741
timer:(on,003ms,0) ino:91863 sk:2 <->
skmem:(r0,rb540000,t66440,tb2363904,f605944,w1822984,o0,bl0,d0)
ts sack bbr wscale:8,8 rto:201 rtt:0.057/0.006 mss:1448
rcvmss:536 advmss:1448
cwnd:138 ssthresh:178 bytes_acked:256699822585 segs_out:177279177
segs_in:3916318 data_segs_out:177279175
bbr:(bw:31276.8Mbps,mrtt:0,pacing_gain:1.25,cwnd_gain:2)
send 28045.5Mbps lastrcv:73333
pacing_rate 38705.0Mbps delivery_rate 22997.6Mbps
busy:73333ms unacked:135 retrans:0/157 rcv_space:14480
notsent:2085120 minrtt:0.013
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r-- | net/ipv4/tcp_output.c | 19 |
1 files changed, 11 insertions, 8 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f14df66a0c85..f4aa4109334a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -991,14 +991,14 @@ static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb) skb->skb_mstamp_ns = tp->tcp_wstamp_ns; if (sk->sk_pacing_status != SK_PACING_NONE) { - u32 rate = sk->sk_pacing_rate; + unsigned long rate = sk->sk_pacing_rate; /* Original sch_fq does not pace first 10 MSS * Note that tp->data_segs_out overflows after 2^32 packets, * this is a minor annoyance. */ - if (rate != ~0U && rate && tp->data_segs_out >= 10) { - tp->tcp_wstamp_ns += div_u64((u64)skb->len * NSEC_PER_SEC, rate); + if (rate != ~0UL && rate && tp->data_segs_out >= 10) { + tp->tcp_wstamp_ns += div64_ul((u64)skb->len * NSEC_PER_SEC, rate); tcp_internal_pacing(sk); } @@ -1704,8 +1704,9 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, { u32 bytes, segs; - bytes = min(sk->sk_pacing_rate >> sk->sk_pacing_shift, - sk->sk_gso_max_size - 1 - MAX_TCP_HEADER); + bytes = min_t(unsigned long, + sk->sk_pacing_rate >> sk->sk_pacing_shift, + sk->sk_gso_max_size - 1 - MAX_TCP_HEADER); /* Goal is to send at least one packet per ms, * not one big TSO packet every 100 ms. @@ -2198,10 +2199,12 @@ static bool tcp_pacing_check(const struct sock *sk) static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, unsigned int factor) { - unsigned int limit; + unsigned long limit; - limit = max(2 * skb->truesize, sk->sk_pacing_rate >> sk->sk_pacing_shift); - limit = min_t(u32, limit, + limit = max_t(unsigned long, + 2 * skb->truesize, + sk->sk_pacing_rate >> sk->sk_pacing_shift); + limit = min_t(unsigned long, limit, sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes); limit <<= factor; |