summaryrefslogtreecommitdiff
path: root/net/mptcp/subflow.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-06-04 18:25:39 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-06-04 18:25:39 -0700
commit9d32fa5d74b148b1cba262c0c24b9a27a910909b (patch)
treef45f2db51b738797cb99fd1f3ae38fd74305a566 /net/mptcp/subflow.c
parent2cb26c15a247a2b2bc9de653773cf21d969bf570 (diff)
parent3822d0670c9d4342794d73e0d0e615322b40438e (diff)
Merge tag 'net-5.13-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Jakub Kicinski: "Networking fixes, including fixes from bpf, wireless, netfilter and wireguard trees. The bpf vs lockdown+audit fix is the most notable. Things haven't slowed down just yet, both in terms of regressions in current release and largish fixes for older code, but we usually see a slowdown only after -rc5. Current release - regressions: - virtio-net: fix page faults and crashes when XDP is enabled - mlx5e: fix HW timestamping with CQE compression, and make sure they are only allowed to coexist with capable devices - stmmac: - fix kernel panic due to NULL pointer dereference of mdio_bus_data - fix double clk unprepare when no PHY device is connected Current release - new code bugs: - mt76: a few fixes for the recent MT7921 devices and runtime power management Previous releases - regressions: - ice: - track AF_XDP ZC enabled queues in bitmap to fix copy mode Tx - fix allowing VF to request more/less queues via virtchnl - correct supported and advertised autoneg by using PHY capabilities - allow all LLDP packets from PF to Tx - kbuild: quote OBJCOPY var to avoid a pahole call break the build Previous releases - always broken: - bpf, lockdown, audit: fix buggy SELinux lockdown permission checks - mt76: address the recent FragAttack vulnerabilities not covered by generic fixes - ipv6: fix KASAN: slab-out-of-bounds Read in fib6_nh_flush_exceptions - Bluetooth: - fix the erroneous flush_work() order, to avoid double free - use correct lock to prevent UAF of hdev object - nfc: fix NULL ptr dereference in llcp_sock_getname() after failed connect - ieee802154: multiple fixes to error checking and return values - igb: fix XDP with PTP enabled - intel: add correct exception tracing for XDP - tls: fix use-after-free when TLS offload device goes down and back up - ipvs: ignore IP_VS_SVC_F_HASHED flag when adding service - netfilter: nft_ct: skip expectations for confirmed conntrack - mptcp: fix falling back to TCP in presence of out of order packets early in connection lifetime - wireguard: switch from O(n) to a O(1) algorithm for maintaining peers, fixing stalls and a large memory leak in the process Misc: - devlink: correct VIRTUAL port to not have phys_port attributes - Bluetooth: fix VIRTIO_ID_BT assigned number - net: return the correct errno code ENOBUF -> ENOMEM - wireguard: - peer: allocate in kmem_cache saving 25% on peer memory - do not use -O3" * tag 'net-5.13-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (91 commits) cxgb4: avoid link re-train during TC-MQPRIO configuration sch_htb: fix refcount leak in htb_parent_to_leaf_offload wireguard: allowedips: free empty intermediate nodes when removing single node wireguard: allowedips: allocate nodes in kmem_cache wireguard: allowedips: remove nodes in O(1) wireguard: allowedips: initialize list head in selftest wireguard: peer: allocate in kmem_cache wireguard: use synchronize_net rather than synchronize_rcu wireguard: do not use -O3 wireguard: selftests: make sure rp_filter is disabled on vethc wireguard: selftests: remove old conntrack kconfig value virtchnl: Add missing padding to virtchnl_proto_hdrs ice: Allow all LLDP packets from PF to Tx ice: report supported and advertised autoneg using PHY capabilities ice: handle the VF VSI rebuild failure ice: Fix VFR issues for AVF drivers that expect ATQLEN cleared ice: Fix allowing VF to request more/less queues via virtchnl virtio-net: fix for skb_over_panic inside big mode ipv6: Fix KASAN: slab-out-of-bounds Read in fib6_nh_flush_exceptions fib: Return the correct errno code ...
Diffstat (limited to 'net/mptcp/subflow.c')
-rw-r--r--net/mptcp/subflow.c79
1 files changed, 40 insertions, 39 deletions
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index bde6be77ea73..ef3d037f984a 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -630,21 +630,20 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
/* if the sk is MP_CAPABLE, we try to fetch the client key */
if (subflow_req->mp_capable) {
- if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) {
- /* here we can receive and accept an in-window,
- * out-of-order pkt, which will not carry the MP_CAPABLE
- * opt even on mptcp enabled paths
- */
- goto create_msk;
- }
-
+ /* we can receive and accept an in-window, out-of-order pkt,
+ * which may not carry the MP_CAPABLE opt even on mptcp enabled
+ * paths: always try to extract the peer key, and fallback
+ * for packets missing it.
+ * Even OoO DSS packets coming legitly after dropped or
+ * reordered MPC will cause fallback, but we don't have other
+ * options.
+ */
mptcp_get_options(skb, &mp_opt);
if (!mp_opt.mp_capable) {
fallback = true;
goto create_child;
}
-create_msk:
new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req);
if (!new_msk)
fallback = true;
@@ -1012,21 +1011,11 @@ static bool subflow_check_data_avail(struct sock *ssk)
status = get_mapping_status(ssk, msk);
trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue));
- if (status == MAPPING_INVALID) {
- ssk->sk_err = EBADMSG;
- goto fatal;
- }
- if (status == MAPPING_DUMMY) {
- __mptcp_do_fallback(msk);
- skb = skb_peek(&ssk->sk_receive_queue);
- subflow->map_valid = 1;
- subflow->map_seq = READ_ONCE(msk->ack_seq);
- subflow->map_data_len = skb->len;
- subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq -
- subflow->ssn_offset;
- subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
- return true;
- }
+ if (unlikely(status == MAPPING_INVALID))
+ goto fallback;
+
+ if (unlikely(status == MAPPING_DUMMY))
+ goto fallback;
if (status != MAPPING_OK)
goto no_data;
@@ -1039,10 +1028,8 @@ static bool subflow_check_data_avail(struct sock *ssk)
* MP_CAPABLE-based mapping
*/
if (unlikely(!READ_ONCE(msk->can_ack))) {
- if (!subflow->mpc_map) {
- ssk->sk_err = EBADMSG;
- goto fatal;
- }
+ if (!subflow->mpc_map)
+ goto fallback;
WRITE_ONCE(msk->remote_key, subflow->remote_key);
WRITE_ONCE(msk->ack_seq, subflow->map_seq);
WRITE_ONCE(msk->can_ack, true);
@@ -1070,17 +1057,31 @@ static bool subflow_check_data_avail(struct sock *ssk)
no_data:
subflow_sched_work_if_closed(msk, ssk);
return false;
-fatal:
- /* fatal protocol error, close the socket */
- /* This barrier is coupled with smp_rmb() in tcp_poll() */
- smp_wmb();
- ssk->sk_error_report(ssk);
- tcp_set_state(ssk, TCP_CLOSE);
- subflow->reset_transient = 0;
- subflow->reset_reason = MPTCP_RST_EMPTCP;
- tcp_send_active_reset(ssk, GFP_ATOMIC);
- subflow->data_avail = 0;
- return false;
+
+fallback:
+ /* RFC 8684 section 3.7. */
+ if (subflow->mp_join || subflow->fully_established) {
+ /* fatal protocol error, close the socket.
+ * subflow_error_report() will introduce the appropriate barriers
+ */
+ ssk->sk_err = EBADMSG;
+ ssk->sk_error_report(ssk);
+ tcp_set_state(ssk, TCP_CLOSE);
+ subflow->reset_transient = 0;
+ subflow->reset_reason = MPTCP_RST_EMPTCP;
+ tcp_send_active_reset(ssk, GFP_ATOMIC);
+ subflow->data_avail = 0;
+ return false;
+ }
+
+ __mptcp_do_fallback(msk);
+ skb = skb_peek(&ssk->sk_receive_queue);
+ subflow->map_valid = 1;
+ subflow->map_seq = READ_ONCE(msk->ack_seq);
+ subflow->map_data_len = skb->len;
+ subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
+ subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
+ return true;
}
bool mptcp_subflow_data_available(struct sock *sk)