summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorSubash Abhinov Kasiviswanathan <quic_subashab@quicinc.com>2024-07-26 13:41:05 -0700
committerDavid S. Miller <davem@davemloft.net>2024-07-29 11:31:50 +0100
commit05f76b2d634e65ab34472802d9b142ea9e03f74e (patch)
treedff5d5403fa979a2049f3cb7cf5bb67190d47eb5 /net
parente96a79b19a8499ce2f22ccf0e6b0192e9dcff001 (diff)
tcp: Adjust clamping window for applications specifying SO_RCVBUF
tp->scaling_ratio is not updated based on skb->len/skb->truesize once SO_RCVBUF is set leading to the maximum window scaling to be 25% of rcvbuf after commit dfa2f0483360 ("tcp: get rid of sysctl_tcp_adv_win_scale") and 50% of rcvbuf after commit 697a6c8cec03 ("tcp: increase the default TCP scaling ratio"). 50% tries to emulate the behavior of older kernels using sysctl_tcp_adv_win_scale with default value. Systems which were using a different values of sysctl_tcp_adv_win_scale in older kernels ended up seeing reduced download speeds in certain cases as covered in https://lists.openwall.net/netdev/2024/05/15/13 While the sysctl scheme is no longer acceptable, the value of 50% is a bit conservative when the skb->len/skb->truesize ratio is later determined to be ~0.66. Applications not specifying SO_RCVBUF update the window scaling and the receiver buffer every time data is copied to userspace. This computation is now used for applications setting SO_RCVBUF to update the maximum window scaling while ensuring that the receive buffer is within the application specified limit. Fixes: dfa2f0483360 ("tcp: get rid of sysctl_tcp_adv_win_scale") Signed-off-by: Sean Tranchetti <quic_stranche@quicinc.com> Signed-off-by: Subash Abhinov Kasiviswanathan <quic_subashab@quicinc.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/tcp_input.c23
1 files changed, 16 insertions, 7 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 454362e359da..e2b9583ed96a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -754,8 +754,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
* <prev RTT . ><current RTT .. ><next RTT .... >
*/
- if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
- !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf)) {
u64 rcvwin, grow;
int rcvbuf;
@@ -771,12 +770,22 @@ void tcp_rcv_space_adjust(struct sock *sk)
rcvbuf = min_t(u64, tcp_space_from_win(sk, rcvwin),
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
- if (rcvbuf > sk->sk_rcvbuf) {
- WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
+ if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
+ if (rcvbuf > sk->sk_rcvbuf) {
+ WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
- /* Make the window clamp follow along. */
- WRITE_ONCE(tp->window_clamp,
- tcp_win_from_space(sk, rcvbuf));
+ /* Make the window clamp follow along. */
+ WRITE_ONCE(tp->window_clamp,
+ tcp_win_from_space(sk, rcvbuf));
+ }
+ } else {
+ /* Make the window clamp follow along while being bounded
+ * by SO_RCVBUF.
+ */
+ int clamp = tcp_win_from_space(sk, min(rcvbuf, sk->sk_rcvbuf));
+
+ if (clamp > tp->window_clamp)
+ WRITE_ONCE(tp->window_clamp, clamp);
}
}
tp->rcvq_space.space = copied;