summaryrefslogtreecommitdiff
path: root/net/mptcp
diff options
context:
space:
mode:
authorFlorian Westphal <fw@strlen.de>2020-05-16 10:46:19 +0200
committerDavid S. Miller <davem@davemloft.net>2020-05-17 12:35:34 -0700
commit72511aab95c94d7c0f03d0b7db5df47fdca059f6 (patch)
tree8c928725d7e634da7ad08971dd50d7915dd1d374 /net/mptcp
parentfb529e62d3f3e85001108213dc323c35f2765575 (diff)
mptcp: avoid blocking in tcp_sendpages
The transmit loop continues to xmit new data until an error is returned or all data was transmitted. For the blocking i/o case, this means that tcp_sendpages() may block on the subflow until more space becomes available, i.e. we end up sleeping with the mptcp socket lock held. Instead we should check if a different subflow is ready to be used. This restarts the subflow sk lookup when the tx operation succeeded and the tcp subflow can't accept more data or if tcp_sendpages indicates -EAGAIN on a blocking mptcp socket. In that case we also need to set the NOSPACE bit to make sure we get notified once memory becomes available. In case all subflows are busy, the existing logic will wait until a subflow is ready, releasing the mptcp socket lock while doing so. The mptcp worker already sets DONTWAIT, so no need to make changes there. v2: * set NOSPACE bit * add a comment to clarify that mptcp-sk sndbuf limits need to be checked as well. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/mptcp')
-rw-r--r--net/mptcp/protocol.c35
1 files changed, 32 insertions, 3 deletions
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 75be8d662ac5..e97357066b21 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -590,7 +590,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
* access the skb after the sendpages call
*/
ret = do_tcp_sendpages(ssk, page, offset, psize,
- msg->msg_flags | MSG_SENDPAGE_NOTLAST);
+ msg->msg_flags | MSG_SENDPAGE_NOTLAST | MSG_DONTWAIT);
if (ret <= 0)
return ret;
@@ -713,6 +713,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct socket *ssock;
size_t copied = 0;
struct sock *ssk;
+ bool tx_ok;
long timeo;
if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
@@ -737,6 +738,7 @@ fallback:
return ret >= 0 ? ret + copied : (copied ? copied : ret);
}
+restart:
mptcp_clean_una(sk);
wait_for_sndbuf:
@@ -772,11 +774,18 @@ wait_for_sndbuf:
pr_debug("conn_list->subflow=%p", ssk);
lock_sock(ssk);
- while (msg_data_left(msg)) {
+ tx_ok = msg_data_left(msg);
+ while (tx_ok) {
ret = mptcp_sendmsg_frag(sk, ssk, msg, NULL, &timeo, &mss_now,
&size_goal);
- if (ret < 0)
+ if (ret < 0) {
+ if (ret == -EAGAIN && timeo > 0) {
+ mptcp_set_timeout(sk, ssk);
+ release_sock(ssk);
+ goto restart;
+ }
break;
+ }
if (ret == 0 && unlikely(__mptcp_needs_tcp_fallback(msk))) {
/* Can happen for passive sockets:
* 3WHS negotiated MPTCP, but first packet after is
@@ -791,11 +800,31 @@ wait_for_sndbuf:
copied += ret;
+ tx_ok = msg_data_left(msg);
+ if (!tx_ok)
+ break;
+
+ if (!sk_stream_memory_free(ssk)) {
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ tcp_push(ssk, msg->msg_flags, mss_now,
+ tcp_sk(ssk)->nonagle, size_goal);
+ mptcp_set_timeout(sk, ssk);
+ release_sock(ssk);
+ goto restart;
+ }
+
/* memory is charged to mptcp level socket as well, i.e.
* if msg is very large, mptcp socket may run out of buffer
* space. mptcp_clean_una() will release data that has
* been acked at mptcp level in the mean time, so there is
* a good chance we can continue sending data right away.
+ *
+ * Normally, when the tcp subflow can accept more data, then
+ * so can the MPTCP socket. However, we need to cope with
+ * peers that might lag behind in their MPTCP-level
+ * acknowledgements, i.e. data might have been acked at
+ * tcp level only. So, we must also check the MPTCP socket
+ * limits before we send more data.
*/
if (unlikely(!sk_stream_memory_free(sk))) {
tcp_push(ssk, msg->msg_flags, mss_now,