summaryrefslogtreecommitdiff
path: root/net/mptcp/protocol.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-03-17 13:31:16 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-03-17 13:31:16 -0700
commit478a351ce0d69cef2d2bf2a686a09b356b63a66c (patch)
tree42a6b550a0ae321799fc81f68e6548c532283ccc /net/mptcp/protocol.c
parent8d3c682a5e3d9dfc2448ecbb22f4cd48359b9e21 (diff)
parentf5e305e63b035a1782a666a6535765f80bb2dca3 (diff)
Merge tag 'net-6.3-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Jakub Kicinski: "Including fixes from netfilter, wifi and ipsec. A little more changes than usual, but it's pretty normal for us that the rc3/rc4 PRs are oversized as people start testing in earnest. Possibly an extra boost from people deploying the 6.1 LTS but that's more of an unscientific hunch. Current release - regressions: - phy: mscc: fix deadlock in phy_ethtool_{get,set}_wol() - virtio: vsock: don't use skbuff state to account credit - virtio: vsock: don't drop skbuff on copy failure - virtio_net: fix page_to_skb() miscalculating the memory size Current release - new code bugs: - eth: correct xdp_features after device reconfig - wifi: nl80211: fix the puncturing bitmap policy - net/mlx5e: flower: - fix raw counter initialization - fix missing error code - fix cloned flow attribute - ipa: - fix some register validity checks - fix a surprising number of bad offsets - kill FILT_ROUT_CACHE_CFG IPA register Previous releases - regressions: - tcp: fix bind() conflict check for dual-stack wildcard address - veth: fix use after free in XDP_REDIRECT when skb headroom is small - ipv4: fix incorrect table ID in IOCTL path - ipvlan: make skb->skb_iif track skb->dev for l3s mode - mptcp: - fix possible deadlock in subflow_error_report - fix UaFs when destroying unaccepted and listening sockets - dsa: mv88e6xxx: fix max_mtu of 1492 on 6165, 6191, 6220, 6250, 6290 Previous releases - always broken: - tcp: tcp_make_synack() can be called from process context, don't assume preemption is disabled when updating stats - netfilter: correct length for loading protocol registers - virtio_net: add checking sq is full inside xdp xmit - bonding: restore IFF_MASTER/SLAVE flags on bond enslave Ethertype change - phy: nxp-c45-tja11xx: fix MII_BASIC_CONFIG_REV bit number - eth: i40e: fix crash during reboot when adapter is in recovery mode - eth: ice: avoid deadlock on rtnl lock when auxiliary device plug/unplug meets bonding - dsa: mt7530: - remove now incorrect comment regarding port 5 - set PLL frequency and trgmii only when trgmii is used - eth: mtk_eth_soc: reset PCS state when changing interface types Misc: - ynl: another license adjustment - move the TCA_EXT_WARN_MSG attribute for tc action" * tag 'net-6.3-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (108 commits) selftests: bonding: add tests for ether type changes bonding: restore bond's IFF_SLAVE flag if a non-eth dev enslave fails bonding: restore IFF_MASTER/SLAVE flags on bond enslave ether type change net: renesas: rswitch: Fix GWTSDIE register handling net: renesas: rswitch: Fix the output value of quote from rswitch_rx() ethernet: sun: add check for the mdesc_grab() net: ipa: fix some register validity checks net: ipa: kill FILT_ROUT_CACHE_CFG IPA register net: ipa: add two missing declarations net: ipa: reg: include <linux/bug.h> net: xdp: don't call notifiers during driver init net/sched: act_api: add specific EXT_WARN_MSG for tc action Revert "net/sched: act_api: move TCA_EXT_WARN_MSG to the correct hierarchy" net: dsa: microchip: fix RGMII delay configuration on KSZ8765/KSZ8794/KSZ8795 ynl: make the tooling check the license ynl: broaden the license even more tools: ynl: make definitions optional again hsr: ratelimit only when errors are printed qed/qed_mng_tlv: correctly zero out ->min instead of ->hour selftests: net: devlink_port_split.py: skip test if no suitable device available ...
Diffstat (limited to 'net/mptcp/protocol.c')
-rw-r--r--net/mptcp/protocol.c64
1 files changed, 32 insertions, 32 deletions
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 3ad9c46202fc..60b23b2716c4 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -825,7 +825,6 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
if (sk->sk_socket && !ssk->sk_socket)
mptcp_sock_graft(ssk, sk->sk_socket);
- mptcp_propagate_sndbuf((struct sock *)msk, ssk);
mptcp_sockopt_sync_locked(msk, ssk);
return true;
}
@@ -2343,7 +2342,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
goto out;
}
- sock_orphan(ssk);
subflow->disposable = 1;
/* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops
@@ -2351,15 +2349,25 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
* reference owned by msk;
*/
if (!inet_csk(ssk)->icsk_ulp_ops) {
+ WARN_ON_ONCE(!sock_flag(ssk, SOCK_DEAD));
kfree_rcu(subflow, rcu);
+ } else if (msk->in_accept_queue && msk->first == ssk) {
+ /* if the first subflow moved to a close state, e.g. due to
+ * incoming reset and we reach here before inet_child_forget()
+ * the TCP stack could later try to close it via
+ * inet_csk_listen_stop(), or deliver it to the user space via
+ * accept().
+ * We can't delete the subflow - or risk a double free - nor let
+ * the msk survive - or will be leaked in the non accept scenario:
+ * fallback and let TCP cope with the subflow cleanup.
+ */
+ WARN_ON_ONCE(sock_flag(ssk, SOCK_DEAD));
+ mptcp_subflow_drop_ctx(ssk);
} else {
/* otherwise tcp will dispose of the ssk and subflow ctx */
- if (ssk->sk_state == TCP_LISTEN) {
- tcp_set_state(ssk, TCP_CLOSE);
- mptcp_subflow_queue_clean(sk, ssk);
- inet_csk_listen_stop(ssk);
+ if (ssk->sk_state == TCP_LISTEN)
mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
- }
+
__tcp_close(ssk, 0);
/* close acquired an extra ref */
@@ -2399,9 +2407,10 @@ static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu)
return 0;
}
-static void __mptcp_close_subflow(struct mptcp_sock *msk)
+static void __mptcp_close_subflow(struct sock *sk)
{
struct mptcp_subflow_context *subflow, *tmp;
+ struct mptcp_sock *msk = mptcp_sk(sk);
might_sleep();
@@ -2415,7 +2424,15 @@ static void __mptcp_close_subflow(struct mptcp_sock *msk)
if (!skb_queue_empty_lockless(&ssk->sk_receive_queue))
continue;
- mptcp_close_ssk((struct sock *)msk, ssk, subflow);
+ mptcp_close_ssk(sk, ssk, subflow);
+ }
+
+ /* if the MPC subflow has been closed before the msk is accepted,
+ * msk will never be accept-ed, close it now
+ */
+ if (!msk->first && msk->in_accept_queue) {
+ sock_set_flag(sk, SOCK_DEAD);
+ inet_sk_state_store(sk, TCP_CLOSE);
}
}
@@ -2624,6 +2641,9 @@ static void mptcp_worker(struct work_struct *work)
__mptcp_check_send_data_fin(sk);
mptcp_check_data_fin(sk);
+ if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
+ __mptcp_close_subflow(sk);
+
/* There is no point in keeping around an orphaned sk timedout or
* closed, but we need the msk around to reply to incoming DATA_FIN,
* even if it is orphaned and in FIN_WAIT2 state
@@ -2639,9 +2659,6 @@ static void mptcp_worker(struct work_struct *work)
}
}
- if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
- __mptcp_close_subflow(msk);
-
if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
__mptcp_retrans(sk);
@@ -3079,6 +3096,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->local_key = subflow_req->local_key;
msk->token = subflow_req->token;
msk->subflow = NULL;
+ msk->in_accept_queue = 1;
WRITE_ONCE(msk->fully_established, false);
if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
WRITE_ONCE(msk->csum_enabled, true);
@@ -3096,8 +3114,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
security_inet_csk_clone(nsk, req);
bh_unlock_sock(nsk);
- /* keep a single reference */
- __sock_put(nsk);
+ /* note: the newly allocated socket refcount is 2 now */
return nsk;
}
@@ -3153,8 +3170,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
goto out;
}
- /* acquire the 2nd reference for the owning socket */
- sock_hold(new_mptcp_sock);
newsk = new_mptcp_sock;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
} else {
@@ -3705,25 +3720,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
struct sock *newsk = newsock->sk;
set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
+ msk->in_accept_queue = 0;
lock_sock(newsk);
- /* PM/worker can now acquire the first subflow socket
- * lock without racing with listener queue cleanup,
- * we can notify it, if needed.
- *
- * Even if remote has reset the initial subflow by now
- * the refcnt is still at least one.
- */
- subflow = mptcp_subflow_ctx(msk->first);
- list_add(&subflow->node, &msk->conn_list);
- sock_hold(msk->first);
- if (mptcp_is_fully_established(newsk))
- mptcp_pm_fully_established(msk, msk->first, GFP_KERNEL);
-
- mptcp_rcv_space_init(msk, msk->first);
- mptcp_propagate_sndbuf(newsk, msk->first);
-
/* set ssk->sk_socket of accept()ed flows to mptcp socket.
* This is needed so NOSPACE flag can be set from tcp stack.
*/