summaryrefslogtreecommitdiff
path: root/net/mptcp
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp')
-rw-r--r--net/mptcp/Makefile2
-rw-r--r--net/mptcp/fastopen.c73
-rw-r--r--net/mptcp/options.c25
-rw-r--r--net/mptcp/pm_netlink.c126
-rw-r--r--net/mptcp/pm_userspace.c8
-rw-r--r--net/mptcp/protocol.c87
-rw-r--r--net/mptcp/protocol.h30
-rw-r--r--net/mptcp/sockopt.c45
-rw-r--r--net/mptcp/subflow.c168
-rw-r--r--net/mptcp/token.c4
10 files changed, 394 insertions, 174 deletions
diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
index 6e7df47c9584..a3829ce548f9 100644
--- a/net/mptcp/Makefile
+++ b/net/mptcp/Makefile
@@ -2,7 +2,7 @@
obj-$(CONFIG_MPTCP) += mptcp.o
mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \
- mib.o pm_netlink.o sockopt.o pm_userspace.o
+ mib.o pm_netlink.o sockopt.o pm_userspace.o fastopen.o
obj-$(CONFIG_SYN_COOKIES) += syncookies.o
obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o
diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c
new file mode 100644
index 000000000000..d237d142171c
--- /dev/null
+++ b/net/mptcp/fastopen.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* MPTCP Fast Open Mechanism
+ *
+ * Copyright (c) 2021-2022, Dmytro SHYTYI
+ */
+
+#include "protocol.h"
+
+void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
+ struct request_sock *req)
+{
+ struct sock *ssk = subflow->tcp_sock;
+ struct sock *sk = subflow->conn;
+ struct sk_buff *skb;
+ struct tcp_sock *tp;
+
+ tp = tcp_sk(ssk);
+
+ subflow->is_mptfo = 1;
+
+ skb = skb_peek(&ssk->sk_receive_queue);
+ if (WARN_ON_ONCE(!skb))
+ return;
+
+ /* dequeue the skb from sk receive queue */
+ __skb_unlink(skb, &ssk->sk_receive_queue);
+ skb_ext_reset(skb);
+ skb_orphan(skb);
+
+ /* We copy the fastopen data, but that don't belong to the mptcp sequence
+ * space, need to offset it in the subflow sequence, see mptcp_subflow_get_map_offset()
+ */
+ tp->copied_seq += skb->len;
+ subflow->ssn_offset += skb->len;
+
+ /* initialize a dummy sequence number, we will update it at MPC
+ * completion, if needed
+ */
+ MPTCP_SKB_CB(skb)->map_seq = -skb->len;
+ MPTCP_SKB_CB(skb)->end_seq = 0;
+ MPTCP_SKB_CB(skb)->offset = 0;
+ MPTCP_SKB_CB(skb)->has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;
+
+ mptcp_data_lock(sk);
+
+ mptcp_set_owner_r(skb, sk);
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+
+ sk->sk_data_ready(sk);
+
+ mptcp_data_unlock(sk);
+}
+
+void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt)
+{
+ struct sock *sk = (struct sock *)msk;
+ struct sk_buff *skb;
+
+ mptcp_data_lock(sk);
+ skb = skb_peek_tail(&sk->sk_receive_queue);
+ if (skb) {
+ WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq);
+ pr_debug("msk %p moving seq %llx -> %llx end_seq %llx -> %llx", sk,
+ MPTCP_SKB_CB(skb)->map_seq, MPTCP_SKB_CB(skb)->map_seq + msk->ack_seq,
+ MPTCP_SKB_CB(skb)->end_seq, MPTCP_SKB_CB(skb)->end_seq + msk->ack_seq);
+ MPTCP_SKB_CB(skb)->map_seq += msk->ack_seq;
+ MPTCP_SKB_CB(skb)->end_seq += msk->ack_seq;
+ }
+
+ pr_debug("msk=%p ack_seq=%llx", msk, msk->ack_seq);
+ mptcp_data_unlock(sk);
+}
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 30d289044e71..5ded85e2c374 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -26,6 +26,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
{
u8 subtype = *ptr >> 4;
int expected_opsize;
+ u16 subopt;
u8 version;
u8 flags;
u8 i;
@@ -38,11 +39,15 @@ static void mptcp_parse_option(const struct sk_buff *skb,
expected_opsize = TCPOLEN_MPTCP_MPC_ACK_DATA;
else
expected_opsize = TCPOLEN_MPTCP_MPC_ACK;
+ subopt = OPTION_MPTCP_MPC_ACK;
} else {
- if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK) {
expected_opsize = TCPOLEN_MPTCP_MPC_SYNACK;
- else
+ subopt = OPTION_MPTCP_MPC_SYNACK;
+ } else {
expected_opsize = TCPOLEN_MPTCP_MPC_SYN;
+ subopt = OPTION_MPTCP_MPC_SYN;
+ }
}
/* Cfr RFC 8684 Section 3.3.0:
@@ -85,7 +90,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->deny_join_id0 = !!(flags & MPTCP_CAP_DENY_JOIN_ID0);
- mp_opt->suboptions |= OPTIONS_MPTCP_MPC;
+ mp_opt->suboptions |= subopt;
if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
mp_opt->sndr_key = get_unaligned_be64(ptr);
ptr += 8;
@@ -934,7 +939,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
subflow->mp_join && (mp_opt->suboptions & OPTIONS_MPTCP_MPJ) &&
!subflow->request_join)
tcp_send_ack(ssk);
- goto fully_established;
+ goto check_notify;
}
/* we must process OoO packets before the first subflow is fully
@@ -945,17 +950,20 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1) {
if (subflow->mp_join)
goto reset;
+ if (subflow->is_mptfo && mp_opt->suboptions & OPTION_MPTCP_MPC_ACK)
+ goto set_fully_established;
return subflow->mp_capable;
}
- if (((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) ||
- ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && !mp_opt->echo)) {
+ if (subflow->remote_key_valid &&
+ (((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) ||
+ ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && !mp_opt->echo))) {
/* subflows are fully established as soon as we get any
* additional ack, including ADD_ADDR.
*/
subflow->fully_established = 1;
WRITE_ONCE(msk->fully_established, true);
- goto fully_established;
+ goto check_notify;
}
/* If the first established packet does not contain MP_CAPABLE + data
@@ -974,11 +982,12 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
if (mp_opt->deny_join_id0)
WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
+set_fully_established:
if (unlikely(!READ_ONCE(msk->pm.server_side)))
pr_warn_once("bogus mpc option on established client sk");
mptcp_subflow_fully_established(subflow, mp_opt);
-fully_established:
+check_notify:
/* if the subflow is not already linked into the conn_list, we can't
* notify the PM: this subflow is still on the listener queue
* and the PM possibly acquiring the subflow lock could race with
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 9813ed0fde9b..2ea7eae43bdb 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -912,10 +912,14 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
*/
if (pernet->next_id == MPTCP_PM_MAX_ADDR_ID)
pernet->next_id = 1;
- if (pernet->addrs >= MPTCP_PM_ADDR_MAX)
+ if (pernet->addrs >= MPTCP_PM_ADDR_MAX) {
+ ret = -ERANGE;
goto out;
- if (test_bit(entry->addr.id, pernet->id_bitmap))
+ }
+ if (test_bit(entry->addr.id, pernet->id_bitmap)) {
+ ret = -EBUSY;
goto out;
+ }
/* do not insert duplicate address, differentiate on port only
* singled addresses
@@ -929,8 +933,10 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
* endpoint is an implicit one and the user-space
* did not provide an endpoint id
*/
- if (!(cur->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT))
+ if (!(cur->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)) {
+ ret = -EEXIST;
goto out;
+ }
if (entry->addr.id)
goto out;
@@ -1003,16 +1009,12 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
return err;
msk = mptcp_sk(entry->lsk->sk);
- if (!msk) {
- err = -EINVAL;
- goto out;
- }
+ if (!msk)
+ return -EINVAL;
ssock = __mptcp_nmpc_socket(msk);
- if (!ssock) {
- err = -EINVAL;
- goto out;
- }
+ if (!ssock)
+ return -EINVAL;
mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
@@ -1020,22 +1022,16 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
addrlen = sizeof(struct sockaddr_in6);
#endif
err = kernel_bind(ssock, (struct sockaddr *)&addr, addrlen);
- if (err) {
- pr_warn("kernel_bind error, err=%d", err);
- goto out;
- }
+ if (err)
+ return err;
err = kernel_listen(ssock, backlog);
- if (err) {
- pr_warn("kernel_listen error, err=%d", err);
- goto out;
- }
+ if (err)
+ return err;
- return 0;
+ mptcp_event_pm_listener(ssock->sk, MPTCP_EVENT_LISTENER_CREATED);
-out:
- sock_release(entry->lsk);
- return err;
+ return 0;
}
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
@@ -1194,7 +1190,7 @@ static int mptcp_pm_parse_pm_addr_attr(struct nlattr *tb[],
if (!tb[MPTCP_PM_ADDR_ATTR_FAMILY]) {
if (!require_family)
- return err;
+ return 0;
NL_SET_ERR_MSG_ATTR(info->extack, attr,
"missing family");
@@ -1228,7 +1224,7 @@ static int mptcp_pm_parse_pm_addr_attr(struct nlattr *tb[],
if (tb[MPTCP_PM_ADDR_ATTR_PORT])
addr->port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]));
- return err;
+ return 0;
}
int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info,
@@ -1327,7 +1323,7 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
}
- entry = kmalloc(sizeof(*entry), GFP_KERNEL_ACCOUNT);
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL_ACCOUNT);
if (!entry) {
GENL_SET_ERR_MSG(info, "can't allocate addr");
return -ENOMEM;
@@ -1337,23 +1333,22 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
if (entry->addr.port) {
ret = mptcp_pm_nl_create_listen_socket(skb->sk, entry);
if (ret) {
- GENL_SET_ERR_MSG(info, "create listen socket error");
- kfree(entry);
- return ret;
+ GENL_SET_ERR_MSG_FMT(info, "create listen socket error: %d", ret);
+ goto out_free;
}
}
ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
if (ret < 0) {
- GENL_SET_ERR_MSG(info, "too many addresses or duplicate one");
- if (entry->lsk)
- sock_release(entry->lsk);
- kfree(entry);
- return ret;
+ GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret);
+ goto out_free;
}
mptcp_nl_add_subflow_or_signal_addr(sock_net(skb->sk));
-
return 0;
+
+out_free:
+ __mptcp_pm_release_addr_entry(entry);
+ return ret;
}
int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
@@ -2099,7 +2094,7 @@ void mptcp_event_addr_removed(const struct mptcp_sock *msk, uint8_t id)
return;
nla_put_failure:
- kfree_skb(skb);
+ nlmsg_free(skb);
}
void mptcp_event_addr_announced(const struct sock *ssk,
@@ -2156,7 +2151,59 @@ void mptcp_event_addr_announced(const struct sock *ssk,
return;
nla_put_failure:
- kfree_skb(skb);
+ nlmsg_free(skb);
+}
+
+void mptcp_event_pm_listener(const struct sock *ssk,
+ enum mptcp_event_type event)
+{
+ const struct inet_sock *issk = inet_sk(ssk);
+ struct net *net = sock_net(ssk);
+ struct nlmsghdr *nlh;
+ struct sk_buff *skb;
+
+ if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET))
+ return;
+
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ return;
+
+ nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, event);
+ if (!nlh)
+ goto nla_put_failure;
+
+ if (nla_put_u16(skb, MPTCP_ATTR_FAMILY, ssk->sk_family))
+ goto nla_put_failure;
+
+ if (nla_put_be16(skb, MPTCP_ATTR_SPORT, issk->inet_sport))
+ goto nla_put_failure;
+
+ switch (ssk->sk_family) {
+ case AF_INET:
+ if (nla_put_in_addr(skb, MPTCP_ATTR_SADDR4, issk->inet_saddr))
+ goto nla_put_failure;
+ break;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ case AF_INET6: {
+ const struct ipv6_pinfo *np = inet6_sk(ssk);
+
+ if (nla_put_in6_addr(skb, MPTCP_ATTR_SADDR6, &np->saddr))
+ goto nla_put_failure;
+ break;
+ }
+#endif
+ default:
+ WARN_ON_ONCE(1);
+ goto nla_put_failure;
+ }
+
+ genlmsg_end(skb, nlh);
+ mptcp_nl_mcast_send(net, skb, GFP_KERNEL);
+ return;
+
+nla_put_failure:
+ nlmsg_free(skb);
}
void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
@@ -2204,6 +2251,9 @@ void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
if (mptcp_event_sub_closed(skb, msk, ssk) < 0)
goto nla_put_failure;
break;
+ case MPTCP_EVENT_LISTENER_CREATED:
+ case MPTCP_EVENT_LISTENER_CLOSED:
+ break;
}
genlmsg_end(skb, nlh);
@@ -2211,7 +2261,7 @@ void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
return;
nla_put_failure:
- kfree_skb(skb);
+ nlmsg_free(skb);
}
static const struct genl_small_ops mptcp_pm_ops[] = {
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index 9e82250cbb70..65dcc55a8ad8 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -156,6 +156,7 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info)
if (addr_val.addr.id == 0 || !(addr_val.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
GENL_SET_ERR_MSG(info, "invalid addr id or flags");
+ err = -EINVAL;
goto announce_err;
}
@@ -282,6 +283,7 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info)
if (addr_l.id == 0) {
NL_SET_ERR_MSG_ATTR(info->extack, laddr, "missing local addr id");
+ err = -EINVAL;
goto create_err;
}
@@ -291,7 +293,7 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info)
goto create_err;
}
- sk = &msk->sk.icsk_inet.sk;
+ sk = (struct sock *)msk;
lock_sock(sk);
err = __mptcp_subflow_connect(sk, &addr_l, &addr_r);
@@ -395,15 +397,17 @@ int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info)
if (addr_l.family != addr_r.family) {
GENL_SET_ERR_MSG(info, "address families do not match");
+ err = -EINVAL;
goto destroy_err;
}
if (!addr_l.port || !addr_r.port) {
GENL_SET_ERR_MSG(info, "missing local or remote port");
+ err = -EINVAL;
goto destroy_err;
}
- sk = &msk->sk.icsk_inet.sk;
+ sk = (struct sock *)msk;
lock_sock(sk);
ssk = mptcp_nl_find_ssk(msk, &addr_l, &addr_r);
if (ssk) {
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 1dbc62537259..f6f93957275b 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -36,15 +36,6 @@ struct mptcp6_sock {
};
#endif
-struct mptcp_skb_cb {
- u64 map_seq;
- u64 end_seq;
- u32 offset;
- u8 has_rxtstamp:1;
-};
-
-#define MPTCP_SKB_CB(__skb) ((struct mptcp_skb_cb *)&((__skb)->cb[0]))
-
enum {
MPTCP_CMSG_TS = BIT(0),
MPTCP_CMSG_INQ = BIT(1),
@@ -200,7 +191,7 @@ static void mptcp_rfree(struct sk_buff *skb)
mptcp_rmem_uncharge(sk, len);
}
-static void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk)
+void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk)
{
skb_orphan(skb);
skb->sk = sk;
@@ -1602,7 +1593,7 @@ out:
__mptcp_check_send_data_fin(sk);
}
-static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
+static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool first)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_sendmsg_info info = {
@@ -1611,7 +1602,6 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
struct mptcp_data_frag *dfrag;
struct sock *xmit_ssk;
int len, copied = 0;
- bool first = true;
info.flags = 0;
while ((dfrag = mptcp_send_head(sk))) {
@@ -1621,11 +1611,10 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
while (len > 0) {
int ret = 0;
- /* the caller already invoked the packet scheduler,
- * check for a different subflow usage only after
+ /* check for a different subflow usage only after
* spooling the first chunk of data
*/
- xmit_ssk = first ? ssk : mptcp_subflow_get_send(mptcp_sk(sk));
+ xmit_ssk = first ? ssk : mptcp_subflow_get_send(msk);
if (!xmit_ssk)
goto out;
if (xmit_ssk != ssk) {
@@ -1713,17 +1702,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int ret = 0;
long timeo;
- /* we don't support FASTOPEN yet */
- if (msg->msg_flags & MSG_FASTOPEN)
- return -EOPNOTSUPP;
-
/* silently ignore everything else */
- msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL;
+ msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_FASTOPEN;
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
- if (unlikely(ssock && inet_sk(ssock->sk)->defer_connect)) {
+ if (unlikely(ssock && (inet_sk(ssock->sk)->defer_connect ||
+ msg->msg_flags & MSG_FASTOPEN))) {
int copied_syn = 0;
ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn);
@@ -2275,7 +2261,7 @@ bool __mptcp_retransmit_pending_data(struct sock *sk)
struct mptcp_data_frag *cur, *rtx_head;
struct mptcp_sock *msk = mptcp_sk(sk);
- if (__mptcp_check_fallback(mptcp_sk(sk)))
+ if (__mptcp_check_fallback(msk))
return false;
if (tcp_rtx_and_write_queues_empty(sk))
@@ -2369,6 +2355,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
tcp_set_state(ssk, TCP_CLOSE);
mptcp_subflow_queue_clean(ssk);
inet_csk_listen_stop(ssk);
+ mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
}
__tcp_close(ssk, 0);
@@ -2451,7 +2438,7 @@ static bool mptcp_check_close_timeout(const struct sock *sk)
static void mptcp_check_fastclose(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow, *tmp;
- struct sock *sk = &msk->sk.icsk_inet.sk;
+ struct sock *sk = (struct sock *)msk;
if (likely(!READ_ONCE(msk->rcv_fastclose)))
return;
@@ -2613,7 +2600,7 @@ static void mptcp_do_fastclose(struct sock *sk)
static void mptcp_worker(struct work_struct *work)
{
struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
- struct sock *sk = &msk->sk.icsk_inet.sk;
+ struct sock *sk = (struct sock *)msk;
unsigned long fail_tout;
int state;
@@ -2725,6 +2712,8 @@ static int mptcp_init_sock(struct sock *sk)
if (ret)
return ret;
+ set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
+
/* fetch the ca name; do it outside __mptcp_init_sock(), so that clone will
* propagate the correct value
*/
@@ -2946,7 +2935,7 @@ cleanup:
sock_hold(sk);
pr_debug("msk=%p state=%d", sk, sk->sk_state);
- if (mptcp_sk(sk)->token)
+ if (msk->token)
mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
if (sk->sk_state == TCP_CLOSE) {
@@ -3005,8 +2994,8 @@ static int mptcp_disconnect(struct sock *sk, int flags)
mptcp_stop_timer(sk);
sk_stop_timer(sk, &sk->sk_timer);
- if (mptcp_sk(sk)->token)
- mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL);
+ if (msk->token)
+ mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
/* msk->subflow is still intact, the following will not free the first
* subflow
@@ -3048,7 +3037,6 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
struct mptcp_sock *msk;
- u64 ack_seq;
if (!nsk)
return NULL;
@@ -3074,15 +3062,6 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
- if (mp_opt->suboptions & OPTIONS_MPTCP_MPC) {
- msk->can_ack = true;
- msk->remote_key = mp_opt->sndr_key;
- mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
- ack_seq++;
- WRITE_ONCE(msk->ack_seq, ack_seq);
- atomic64_set(&msk->rcv_wnd_sent, ack_seq);
- }
-
sock_reset_flag(nsk, SOCK_RCU_FREE);
/* will be fully established after successful MPC subflow creation */
inet_sk_state_store(nsk, TCP_SYN_RECV);
@@ -3217,16 +3196,10 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
if (!mptcp_send_head(sk))
return;
- if (!sock_owned_by_user(sk)) {
- struct sock *xmit_ssk = mptcp_subflow_get_send(mptcp_sk(sk));
-
- if (xmit_ssk == ssk)
- __mptcp_subflow_push_pending(sk, ssk);
- else if (xmit_ssk)
- mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk), MPTCP_DELEGATE_SEND);
- } else {
+ if (!sock_owned_by_user(sk))
+ __mptcp_subflow_push_pending(sk, ssk, false);
+ else
__set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->cb_flags);
- }
}
#define MPTCP_FLAGS_PROCESS_CTX_NEED (BIT(MPTCP_PUSH_PENDING) | \
@@ -3317,7 +3290,7 @@ void mptcp_subflow_process_delegated(struct sock *ssk)
if (test_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status)) {
mptcp_data_lock(sk);
if (!sock_owned_by_user(sk))
- __mptcp_subflow_push_pending(sk, ssk);
+ __mptcp_subflow_push_pending(sk, ssk, true);
else
__set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->cb_flags);
mptcp_data_unlock(sk);
@@ -3361,7 +3334,6 @@ void mptcp_finish_connect(struct sock *ssk)
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk;
struct sock *sk;
- u64 ack_seq;
subflow = mptcp_subflow_ctx(ssk);
sk = subflow->conn;
@@ -3369,22 +3341,16 @@ void mptcp_finish_connect(struct sock *ssk)
pr_debug("msk=%p, token=%u", sk, subflow->token);
- mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq);
- ack_seq++;
- subflow->map_seq = ack_seq;
+ subflow->map_seq = subflow->iasn;
subflow->map_subflow_seq = 1;
/* the socket is not connected yet, no msk/subflow ops can access/race
* accessing the field below
*/
- WRITE_ONCE(msk->remote_key, subflow->remote_key);
WRITE_ONCE(msk->local_key, subflow->local_key);
WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
WRITE_ONCE(msk->snd_nxt, msk->write_seq);
- WRITE_ONCE(msk->ack_seq, ack_seq);
- WRITE_ONCE(msk->can_ack, 1);
WRITE_ONCE(msk->snd_una, msk->write_seq);
- atomic64_set(&msk->rcv_wnd_sent, ack_seq);
mptcp_pm_new_connection(msk, ssk, 0);
@@ -3682,6 +3648,8 @@ static int mptcp_listen(struct socket *sock, int backlog)
if (!err)
mptcp_copy_inaddrs(sock->sk, ssock->sk);
+ mptcp_event_pm_listener(ssock->sk, MPTCP_EVENT_LISTENER_CREATED);
+
unlock:
release_sock(sock->sk);
return err;
@@ -3706,6 +3674,8 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
struct mptcp_subflow_context *subflow;
struct sock *newsk = newsock->sk;
+ set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
+
lock_sock(newsk);
/* PM/worker can now acquire the first subflow socket
@@ -3919,12 +3889,6 @@ static const struct proto_ops mptcp_v6_stream_ops = {
static struct proto mptcp_v6_prot;
-static void mptcp_v6_destroy(struct sock *sk)
-{
- mptcp_destroy(sk);
- inet6_destroy_sock(sk);
-}
-
static struct inet_protosw mptcp_v6_protosw = {
.type = SOCK_STREAM,
.protocol = IPPROTO_MPTCP,
@@ -3940,7 +3904,6 @@ int __init mptcp_proto_v6_init(void)
mptcp_v6_prot = mptcp_prot;
strcpy(mptcp_v6_prot.name, "MPTCPv6");
mptcp_v6_prot.slab = NULL;
- mptcp_v6_prot.destroy = mptcp_v6_destroy;
mptcp_v6_prot.obj_size = sizeof(struct mptcp6_sock);
err = proto_register(&mptcp_v6_prot, 1);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 6a09ab99a12d..955fb3d88eb3 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -126,6 +126,15 @@
#define MPTCP_CONNECTED 6
#define MPTCP_RESET_SCHEDULER 7
+struct mptcp_skb_cb {
+ u64 map_seq;
+ u64 end_seq;
+ u32 offset;
+ u8 has_rxtstamp:1;
+};
+
+#define MPTCP_SKB_CB(__skb) ((struct mptcp_skb_cb *)&((__skb)->cb[0]))
+
static inline bool before64(__u64 seq1, __u64 seq2)
{
return (__s64)(seq1 - seq2) < 0;
@@ -467,17 +476,22 @@ struct mptcp_subflow_context {
send_fastclose : 1,
send_infinite_map : 1,
rx_eof : 1,
- can_ack : 1, /* only after processing the remote a key */
+ remote_key_valid : 1, /* received the peer key from */
disposable : 1, /* ctx can be free at ulp release time */
stale : 1, /* unable to snd/rcv data, do not use for xmit */
local_id_valid : 1, /* local_id is correctly initialized */
- valid_csum_seen : 1; /* at least one csum validated */
+ valid_csum_seen : 1, /* at least one csum validated */
+ is_mptfo : 1, /* subflow is doing TFO */
+ __unused : 8;
enum mptcp_data_avail data_avail;
u32 remote_nonce;
u64 thmac;
u32 local_nonce;
u32 remote_token;
- u8 hmac[MPTCPOPT_HMAC_LEN];
+ union {
+ u8 hmac[MPTCPOPT_HMAC_LEN]; /* MPJ subflow only */
+ u64 iasn; /* initial ack sequence number, MPC subflows only */
+ };
u8 local_id;
u8 remote_id;
u8 reset_seen:1;
@@ -603,7 +617,7 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net);
int mptcp_get_pm_type(const struct net *net);
void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
- struct mptcp_options_received *mp_opt);
+ const struct mptcp_options_received *mp_opt);
bool __mptcp_retransmit_pending_data(struct sock *sk);
void mptcp_check_and_set_pending(struct sock *sk);
void __mptcp_push_pending(struct sock *sk, unsigned int flags);
@@ -619,6 +633,7 @@ void mptcp_sock_graft(struct sock *sk, struct socket *parent);
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
bool __mptcp_close(struct sock *sk, long timeout);
void mptcp_cancel_work(struct sock *sk);
+void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk);
bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
const struct mptcp_addr_info *b, bool use_port);
@@ -824,8 +839,15 @@ void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
const struct sock *ssk, gfp_t gfp);
void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_info *info);
void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id);
+void mptcp_event_pm_listener(const struct sock *ssk,
+ enum mptcp_event_type event);
bool mptcp_userspace_pm_active(const struct mptcp_sock *msk);
+void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt);
+void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
+ struct request_sock *req);
+
static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk)
{
return READ_ONCE(msk->pm.addr_signal) &
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index c7cb68c725b2..d4b1e6ec1b36 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -559,7 +559,10 @@ static bool mptcp_supported_sockopt(int level, int optname)
case TCP_NOTSENT_LOWAT:
case TCP_TX_DELAY:
case TCP_INQ:
+ case TCP_FASTOPEN:
case TCP_FASTOPEN_CONNECT:
+ case TCP_FASTOPEN_KEY:
+ case TCP_FASTOPEN_NO_COOKIE:
return true;
}
@@ -568,9 +571,6 @@ static bool mptcp_supported_sockopt(int level, int optname)
/* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS,
* TCP_REPAIR_WINDOW are not supported, better avoid this mess
*/
- /* TCP_FASTOPEN_KEY, TCP_FASTOPEN, TCP_FASTOPEN_NO_COOKIE,
- * are not supported fastopen is currently unsupported
- */
}
return false;
}
@@ -740,7 +740,7 @@ static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname,
}
release_sock(sk);
- return err;
+ return 0;
}
static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
@@ -757,29 +757,17 @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
return -EOPNOTSUPP;
}
-static int mptcp_setsockopt_sol_tcp_defer(struct mptcp_sock *msk, sockptr_t optval,
- unsigned int optlen)
-{
- struct socket *listener;
-
- listener = __mptcp_nmpc_socket(msk);
- if (!listener)
- return 0; /* TCP_DEFER_ACCEPT does not fail */
-
- return tcp_setsockopt(listener->sk, SOL_TCP, TCP_DEFER_ACCEPT, optval, optlen);
-}
-
-static int mptcp_setsockopt_sol_tcp_fastopen_connect(struct mptcp_sock *msk, sockptr_t optval,
- unsigned int optlen)
+static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname,
+ sockptr_t optval, unsigned int optlen)
{
struct socket *sock;
- /* Limit to first subflow */
+ /* Limit to first subflow, before the connection establishment */
sock = __mptcp_nmpc_socket(msk);
if (!sock)
return -EINVAL;
- return tcp_setsockopt(sock->sk, SOL_TCP, TCP_FASTOPEN_CONNECT, optval, optlen);
+ return tcp_setsockopt(sock->sk, level, optname, optval, optlen);
}
static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
@@ -809,9 +797,15 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
case TCP_NODELAY:
return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen);
case TCP_DEFER_ACCEPT:
- return mptcp_setsockopt_sol_tcp_defer(msk, optval, optlen);
+ /* See tcp.c: TCP_DEFER_ACCEPT does not fail */
+ mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen);
+ return 0;
+ case TCP_FASTOPEN:
case TCP_FASTOPEN_CONNECT:
- return mptcp_setsockopt_sol_tcp_fastopen_connect(msk, optval, optlen);
+ case TCP_FASTOPEN_KEY:
+ case TCP_FASTOPEN_NO_COOKIE:
+ return mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname,
+ optval, optlen);
}
return -EOPNOTSUPP;
@@ -994,7 +988,7 @@ static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval,
int __user *optlen)
{
struct mptcp_subflow_context *subflow;
- struct sock *sk = &msk->sk.icsk_inet.sk;
+ struct sock *sk = (struct sock *)msk;
unsigned int sfcount = 0, copied = 0;
struct mptcp_subflow_data sfd;
char __user *infoptr;
@@ -1085,8 +1079,8 @@ static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addr
static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *optval,
int __user *optlen)
{
- struct sock *sk = &msk->sk.icsk_inet.sk;
struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
unsigned int sfcount = 0, copied = 0;
struct mptcp_subflow_data sfd;
char __user *addrptr;
@@ -1173,7 +1167,10 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
case TCP_INFO:
case TCP_CC_INFO:
case TCP_DEFER_ACCEPT:
+ case TCP_FASTOPEN:
case TCP_FASTOPEN_CONNECT:
+ case TCP_FASTOPEN_KEY:
+ case TCP_FASTOPEN_NO_COOKIE:
return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname,
optval, optlen);
case TCP_INQ:
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 2159b5f9988f..d1d32a66ae3f 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -45,7 +45,6 @@ static void subflow_req_destructor(struct request_sock *req)
sock_put((struct sock *)subflow_req->msk);
mptcp_token_destroy_request(req);
- tcp_request_sock_ops.destructor(req);
}
static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
@@ -307,7 +306,48 @@ static struct dst_entry *subflow_v4_route_req(const struct sock *sk,
return NULL;
}
+static void subflow_prep_synack(const struct sock *sk, struct request_sock *req,
+ struct tcp_fastopen_cookie *foc,
+ enum tcp_synack_type synack_type)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct inet_request_sock *ireq = inet_rsk(req);
+
+ /* clear tstamp_ok, as needed depending on cookie */
+ if (foc && foc->len > -1)
+ ireq->tstamp_ok = 0;
+
+ if (synack_type == TCP_SYNACK_FASTOPEN)
+ mptcp_fastopen_subflow_synack_set_params(subflow, req);
+}
+
+static int subflow_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
+ struct flowi *fl,
+ struct request_sock *req,
+ struct tcp_fastopen_cookie *foc,
+ enum tcp_synack_type synack_type,
+ struct sk_buff *syn_skb)
+{
+ subflow_prep_synack(sk, req, foc, synack_type);
+
+ return tcp_request_sock_ipv4_ops.send_synack(sk, dst, fl, req, foc,
+ synack_type, syn_skb);
+}
+
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+static int subflow_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
+ struct flowi *fl,
+ struct request_sock *req,
+ struct tcp_fastopen_cookie *foc,
+ enum tcp_synack_type synack_type,
+ struct sk_buff *syn_skb)
+{
+ subflow_prep_synack(sk, req, foc, synack_type);
+
+ return tcp_request_sock_ipv6_ops.send_synack(sk, dst, fl, req, foc,
+ synack_type, syn_skb);
+}
+
static struct dst_entry *subflow_v6_route_req(const struct sock *sk,
struct sk_buff *skb,
struct flowi *fl,
@@ -392,11 +432,33 @@ static void mptcp_set_connected(struct sock *sk)
mptcp_data_unlock(sk);
}
+static void subflow_set_remote_key(struct mptcp_sock *msk,
+ struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt)
+{
+ /* active MPC subflow will reach here multiple times:
+ * at subflow_finish_connect() time and at 4th ack time
+ */
+ if (subflow->remote_key_valid)
+ return;
+
+ subflow->remote_key_valid = 1;
+ subflow->remote_key = mp_opt->sndr_key;
+ mptcp_crypto_key_sha(subflow->remote_key, NULL, &subflow->iasn);
+ subflow->iasn++;
+
+ WRITE_ONCE(msk->remote_key, subflow->remote_key);
+ WRITE_ONCE(msk->ack_seq, subflow->iasn);
+ WRITE_ONCE(msk->can_ack, true);
+ atomic64_set(&msk->rcv_wnd_sent, subflow->iasn);
+}
+
static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_options_received mp_opt;
struct sock *parent = subflow->conn;
+ struct mptcp_sock *msk;
subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
@@ -404,6 +466,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
if (subflow->conn_finished)
return;
+ msk = mptcp_sk(parent);
mptcp_propagate_sndbuf(parent, sk);
subflow->rel_write_seq = 1;
subflow->conn_finished = 1;
@@ -416,19 +479,16 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
MPTCP_INC_STATS(sock_net(sk),
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
mptcp_do_fallback(sk);
- pr_fallback(mptcp_sk(subflow->conn));
+ pr_fallback(msk);
goto fallback;
}
if (mp_opt.suboptions & OPTION_MPTCP_CSUMREQD)
- WRITE_ONCE(mptcp_sk(parent)->csum_enabled, true);
+ WRITE_ONCE(msk->csum_enabled, true);
if (mp_opt.deny_join_id0)
- WRITE_ONCE(mptcp_sk(parent)->pm.remote_deny_join_id0, true);
+ WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
subflow->mp_capable = 1;
- subflow->can_ack = 1;
- subflow->remote_key = mp_opt.sndr_key;
- pr_debug("subflow=%p, remote_key=%llu", subflow,
- subflow->remote_key);
+ subflow_set_remote_key(msk, subflow, &mp_opt);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
mptcp_finish_connect(sk);
mptcp_set_connected(parent);
@@ -466,7 +526,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->mp_join = 1;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
- if (subflow_use_different_dport(mptcp_sk(parent), sk)) {
+ if (subflow_use_different_dport(msk, sk)) {
pr_debug("synack inet_dport=%d %d",
ntohs(inet_sk(sk)->inet_dport),
ntohs(inet_sk(parent)->inet_dport));
@@ -474,7 +534,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
}
} else if (mptcp_check_fallback(sk)) {
fallback:
- mptcp_rcv_space_init(mptcp_sk(parent), sk);
+ mptcp_rcv_space_init(msk, sk);
mptcp_set_connected(parent);
}
return;
@@ -529,7 +589,7 @@ static int subflow_v6_rebuild_header(struct sock *sk)
}
#endif
-struct request_sock_ops mptcp_subflow_request_sock_ops;
+static struct request_sock_ops mptcp_subflow_v4_request_sock_ops __ro_after_init;
static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops __ro_after_init;
static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
@@ -542,7 +602,7 @@ static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
goto drop;
- return tcp_conn_request(&mptcp_subflow_request_sock_ops,
+ return tcp_conn_request(&mptcp_subflow_v4_request_sock_ops,
&subflow_request_sock_ipv4_ops,
sk, skb);
drop:
@@ -550,7 +610,14 @@ drop:
return 0;
}
+static void subflow_v4_req_destructor(struct request_sock *req)
+{
+ subflow_req_destructor(req);
+ tcp_request_sock_ops.destructor(req);
+}
+
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+static struct request_sock_ops mptcp_subflow_v6_request_sock_ops __ro_after_init;
static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops __ro_after_init;
static struct inet_connection_sock_af_ops subflow_v6_specific __ro_after_init;
static struct inet_connection_sock_af_ops subflow_v6m_specific __ro_after_init;
@@ -573,15 +640,36 @@ static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
return 0;
}
- return tcp_conn_request(&mptcp_subflow_request_sock_ops,
+ return tcp_conn_request(&mptcp_subflow_v6_request_sock_ops,
&subflow_request_sock_ipv6_ops, sk, skb);
drop:
tcp_listendrop(sk);
return 0; /* don't send reset */
}
+
+static void subflow_v6_req_destructor(struct request_sock *req)
+{
+ subflow_req_destructor(req);
+ tcp6_request_sock_ops.destructor(req);
+}
+#endif
+
+struct request_sock *mptcp_subflow_reqsk_alloc(const struct request_sock_ops *ops,
+ struct sock *sk_listener,
+ bool attach_listener)
+{
+ if (ops->family == AF_INET)
+ ops = &mptcp_subflow_v4_request_sock_ops;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ else if (ops->family == AF_INET6)
+ ops = &mptcp_subflow_v6_request_sock_ops;
#endif
+ return inet_reqsk_alloc(ops, sk_listener, attach_listener);
+}
+EXPORT_SYMBOL(mptcp_subflow_reqsk_alloc);
+
/* validate hmac received in third ACK */
static bool subflow_hmac_valid(const struct request_sock *req,
const struct mptcp_options_received *mp_opt)
@@ -637,14 +725,16 @@ static void subflow_drop_ctx(struct sock *ssk)
}
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
- struct mptcp_options_received *mp_opt)
+ const struct mptcp_options_received *mp_opt)
{
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
- subflow->remote_key = mp_opt->sndr_key;
+ subflow_set_remote_key(msk, subflow, mp_opt);
subflow->fully_established = 1;
- subflow->can_ack = 1;
WRITE_ONCE(msk->fully_established, true);
+
+ if (subflow->is_mptfo)
+ mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
}
static struct sock *subflow_syn_recv_sock(const struct sock *sk,
@@ -760,7 +850,7 @@ create_child:
/* with OoO packets we can reach here without ingress
* mpc option
*/
- if (mp_opt.suboptions & OPTIONS_MPTCP_MPC)
+ if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK)
mptcp_subflow_fully_established(ctx, &mp_opt);
} else if (ctx->mp_join) {
struct mptcp_sock *owner;
@@ -1198,16 +1288,8 @@ static bool subflow_check_data_avail(struct sock *ssk)
if (WARN_ON_ONCE(!skb))
goto no_data;
- /* if msk lacks the remote key, this subflow must provide an
- * MP_CAPABLE-based mapping
- */
- if (unlikely(!READ_ONCE(msk->can_ack))) {
- if (!subflow->mpc_map)
- goto fallback;
- WRITE_ONCE(msk->remote_key, subflow->remote_key);
- WRITE_ONCE(msk->ack_seq, subflow->map_seq);
- WRITE_ONCE(msk->can_ack, true);
- }
+ if (unlikely(!READ_ONCE(msk->can_ack)))
+ goto fallback;
old_ack = READ_ONCE(msk->ack_seq);
ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
@@ -1480,6 +1562,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
mptcp_pm_get_flags_and_ifindex_by_id(msk, local_id,
&flags, &ifindex);
+ subflow->remote_key_valid = 1;
subflow->remote_key = msk->remote_key;
subflow->local_key = msk->local_key;
subflow->token = msk->token;
@@ -1602,7 +1685,9 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
/* kernel sockets do not by default acquire net ref, but TCP timer
* needs it.
+ * Update ns_tracker to current stack trace and refcounted tracker.
*/
+ __netns_tracker_free(net, &sf->sk->ns_tracker, false);
sf->sk->sk_net_refcnt = 1;
get_net_track(net, &sf->sk->ns_tracker, GFP_KERNEL);
sock_inuse_add(net, 1);
@@ -1871,6 +1956,7 @@ static void subflow_ulp_clone(const struct request_sock *req,
new_ctx->ssn_offset = subflow_req->ssn_offset;
new_ctx->mp_join = 1;
new_ctx->fully_established = 1;
+ new_ctx->remote_key_valid = 1;
new_ctx->backup = subflow_req->backup;
new_ctx->remote_id = subflow_req->remote_id;
new_ctx->token = subflow_req->token;
@@ -1904,7 +1990,6 @@ static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = {
static int subflow_ops_init(struct request_sock_ops *subflow_ops)
{
subflow_ops->obj_size = sizeof(struct mptcp_subflow_request_sock);
- subflow_ops->slab_name = "request_sock_subflow";
subflow_ops->slab = kmem_cache_create(subflow_ops->slab_name,
subflow_ops->obj_size, 0,
@@ -1914,19 +1999,21 @@ static int subflow_ops_init(struct request_sock_ops *subflow_ops)
if (!subflow_ops->slab)
return -ENOMEM;
- subflow_ops->destructor = subflow_req_destructor;
-
return 0;
}
void __init mptcp_subflow_init(void)
{
- mptcp_subflow_request_sock_ops = tcp_request_sock_ops;
- if (subflow_ops_init(&mptcp_subflow_request_sock_ops) != 0)
- panic("MPTCP: failed to init subflow request sock ops\n");
+ mptcp_subflow_v4_request_sock_ops = tcp_request_sock_ops;
+ mptcp_subflow_v4_request_sock_ops.slab_name = "request_sock_subflow_v4";
+ mptcp_subflow_v4_request_sock_ops.destructor = subflow_v4_req_destructor;
+
+ if (subflow_ops_init(&mptcp_subflow_v4_request_sock_ops) != 0)
+ panic("MPTCP: failed to init subflow v4 request sock ops\n");
subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
subflow_request_sock_ipv4_ops.route_req = subflow_v4_route_req;
+ subflow_request_sock_ipv4_ops.send_synack = subflow_v4_send_synack;
subflow_specific = ipv4_specific;
subflow_specific.conn_request = subflow_v4_conn_request;
@@ -1938,8 +2025,23 @@ void __init mptcp_subflow_init(void)
tcp_prot_override.release_cb = tcp_release_cb_override;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ /* In struct mptcp_subflow_request_sock, we assume the TCP request sock
+ * structures for v4 and v6 have the same size. It should not changed in
+ * the future but better to make sure to be warned if it is no longer
+ * the case.
+ */
+ BUILD_BUG_ON(sizeof(struct tcp_request_sock) != sizeof(struct tcp6_request_sock));
+
+ mptcp_subflow_v6_request_sock_ops = tcp6_request_sock_ops;
+ mptcp_subflow_v6_request_sock_ops.slab_name = "request_sock_subflow_v6";
+ mptcp_subflow_v6_request_sock_ops.destructor = subflow_v6_req_destructor;
+
+ if (subflow_ops_init(&mptcp_subflow_v6_request_sock_ops) != 0)
+ panic("MPTCP: failed to init subflow v6 request sock ops\n");
+
subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
subflow_request_sock_ipv6_ops.route_req = subflow_v6_route_req;
+ subflow_request_sock_ipv6_ops.send_synack = subflow_v6_send_synack;
subflow_v6_specific = ipv6_specific;
subflow_v6_specific.conn_request = subflow_v6_conn_request;
diff --git a/net/mptcp/token.c b/net/mptcp/token.c
index f52ee7b26aed..65430f314a68 100644
--- a/net/mptcp/token.c
+++ b/net/mptcp/token.c
@@ -287,8 +287,8 @@ EXPORT_SYMBOL_GPL(mptcp_token_get_sock);
* This function returns the first mptcp connection structure found inside the
* token container starting from the specified position, or NULL.
*
- * On successful iteration, the iterator is move to the next position and the
- * the acquires a reference to the returned socket.
+ * On successful iteration, the iterator is moved to the next position and
+ * a reference to the returned socket is acquired.
*/
struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot,
long *s_num)