diff options
author | David S. Miller <davem@davemloft.net> | 2022-07-06 12:50:27 +0100 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2022-07-06 12:50:27 +0100 |
commit | ae9fdf6cb4da4265bdc3a574d06eaad02a7f669a (patch) | |
tree | 4ebc6effcbc78352e322fc1ac7a78573c3d06ae7 | |
parent | 44d632d5dde2514b414bd6344918d68dacd8fe6f (diff) | |
parent | d2d21f175f1f9580eb5681f5b476c8d7a0a3c895 (diff) |
Merge branch 'mptcp-path-manager-fixes'
Mat Martineau says:
====================
mptcp: Path manager fixes for 5.19
The MPTCP userspace path manager is new in 5.19, and these patches fix
some issues in that new code.
Patches 1-3 fix path manager locking issues.
Patches 4 and 5 allow userspace path managers to change priority of
established subflows using the existing MPTCP_PM_CMD_SET_FLAGS generic
netlink command. Includes corresponding self test update.
Patches 6 and 7 fix accounting of available endpoint IDs and the
MPTCP_MIB_RMSUBFLOW counter.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/mptcp/options.c | 3 | ||||
-rw-r--r-- | net/mptcp/pm_netlink.c | 46 | ||||
-rw-r--r-- | net/mptcp/pm_userspace.c | 51 | ||||
-rw-r--r-- | net/mptcp/protocol.c | 9 | ||||
-rw-r--r-- | net/mptcp/protocol.h | 9 | ||||
-rw-r--r-- | tools/testing/selftests/net/mptcp/pm_nl_ctl.c | 73 | ||||
-rwxr-xr-x | tools/testing/selftests/net/mptcp/userspace_pm.sh | 32 |
7 files changed, 192 insertions, 31 deletions
diff --git a/net/mptcp/options.c b/net/mptcp/options.c index aead331866a0..bd8f0f425be4 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1584,6 +1584,9 @@ mp_rst: *ptr++ = mptcp_option(MPTCPOPT_MP_PRIO, TCPOLEN_MPTCP_PRIO, opts->backup, TCPOPT_NOP); + + MPTCP_INC_STATS(sock_net((const struct sock *)tp), + MPTCP_MIB_MPPRIOTX); } mp_capable_done: diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index e099f2a12504..7c7395b58944 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -717,9 +717,10 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) } } -static int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, - struct mptcp_addr_info *addr, - u8 bkup) +int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, + struct mptcp_addr_info *addr, + struct mptcp_addr_info *rem, + u8 bkup) { struct mptcp_subflow_context *subflow; @@ -727,24 +728,29 @@ static int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - struct sock *sk = (struct sock *)msk; - struct mptcp_addr_info local; + struct mptcp_addr_info local, remote; + bool slow; local_address((struct sock_common *)ssk, &local); if (!mptcp_addresses_equal(&local, addr, addr->port)) continue; + if (rem && rem->family != AF_UNSPEC) { + remote_address((struct sock_common *)ssk, &remote); + if (!mptcp_addresses_equal(&remote, rem, rem->port)) + continue; + } + + slow = lock_sock_fast(ssk); if (subflow->backup != bkup) msk->last_snd = NULL; subflow->backup = bkup; subflow->send_mp_prio = 1; subflow->request_bkup = bkup; - __MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIOTX); - spin_unlock_bh(&msk->pm.lock); pr_debug("send ack for mp_prio"); - mptcp_subflow_send_ack(ssk); - spin_lock_bh(&msk->pm.lock); + __mptcp_subflow_send_ack(ssk); + unlock_sock_fast(ssk, slow); return 0; } @@ -801,7 +807,8 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, removed = true; __MPTCP_INC_STATS(sock_net(sk), rm_type); } - __set_bit(rm_list->ids[i], msk->pm.id_avail_bitmap); + if (rm_type == MPTCP_MIB_RMSUBFLOW) + __set_bit(rm_list->ids[i], msk->pm.id_avail_bitmap); if (!removed) continue; @@ -1816,8 +1823,10 @@ static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk, list.ids[list.nr++] = addr->id; + spin_lock_bh(&msk->pm.lock); mptcp_pm_nl_rm_subflow_received(msk, &list); mptcp_pm_create_subflow_or_signal_addr(msk); + spin_unlock_bh(&msk->pm.lock); } static int mptcp_nl_set_flags(struct net *net, @@ -1835,12 +1844,10 @@ static int mptcp_nl_set_flags(struct net *net, goto next; lock_sock(sk); - spin_lock_bh(&msk->pm.lock); if (changed & MPTCP_PM_ADDR_FLAG_BACKUP) - ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, bkup); + ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, NULL, bkup); if (changed & MPTCP_PM_ADDR_FLAG_FULLMESH) mptcp_pm_nl_fullmesh(msk, addr); - spin_unlock_bh(&msk->pm.lock); release_sock(sk); next: @@ -1854,6 +1861,9 @@ next: static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) { struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }, *entry; + struct mptcp_pm_addr_entry remote = { .addr = { .family = AF_UNSPEC }, }; + struct nlattr *attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; + struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP | @@ -1866,6 +1876,12 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) if (ret < 0) return ret; + if (attr_rem) { + ret = mptcp_pm_parse_entry(attr_rem, info, false, &remote); + if (ret < 0) + return ret; + } + if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP) bkup = 1; if (addr.addr.family == AF_UNSPEC) { @@ -1874,6 +1890,10 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) return -EOPNOTSUPP; } + if (token) + return mptcp_userspace_pm_set_flags(sock_net(skb->sk), + token, &addr, &remote, bkup); + spin_lock_bh(&pernet->lock); entry = __lookup_addr(pernet, &addr.addr, lookup_by_id); if (!entry) { diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index f56378e4f597..9e82250cbb70 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -5,6 +5,7 @@ */ #include "protocol.h" +#include "mib.h" void mptcp_free_local_addr_list(struct mptcp_sock *msk) { @@ -306,15 +307,11 @@ static struct sock *mptcp_nl_find_ssk(struct mptcp_sock *msk, const struct mptcp_addr_info *local, const struct mptcp_addr_info *remote) { - struct sock *sk = &msk->sk.icsk_inet.sk; struct mptcp_subflow_context *subflow; - struct sock *found = NULL; if (local->family != remote->family) return NULL; - lock_sock(sk); - mptcp_for_each_subflow(msk, subflow) { const struct inet_sock *issk; struct sock *ssk; @@ -347,16 +344,11 @@ static struct sock *mptcp_nl_find_ssk(struct mptcp_sock *msk, } if (issk->inet_sport == local->port && - issk->inet_dport == remote->port) { - found = ssk; - goto found; - } + issk->inet_dport == remote->port) + return ssk; } -found: - release_sock(sk); - - return found; + return NULL; } int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info) @@ -412,18 +404,51 @@ int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info) } sk = &msk->sk.icsk_inet.sk; + lock_sock(sk); ssk = mptcp_nl_find_ssk(msk, &addr_l, &addr_r); if (ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); mptcp_subflow_shutdown(sk, ssk, RCV_SHUTDOWN | SEND_SHUTDOWN); mptcp_close_ssk(sk, ssk, subflow); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMSUBFLOW); err = 0; } else { err = -ESRCH; } + release_sock(sk); - destroy_err: +destroy_err: sock_put((struct sock *)msk); return err; } + +int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token, + struct mptcp_pm_addr_entry *loc, + struct mptcp_pm_addr_entry *rem, u8 bkup) +{ + struct mptcp_sock *msk; + int ret = -EINVAL; + u32 token_val; + + token_val = nla_get_u32(token); + + msk = mptcp_token_get_sock(net, token_val); + if (!msk) + return ret; + + if (!mptcp_pm_is_userspace(msk)) + goto set_flags_err; + + if (loc->addr.family == AF_UNSPEC || + rem->addr.family == AF_UNSPEC) + goto set_flags_err; + + lock_sock((struct sock *)msk); + ret = mptcp_pm_nl_mp_prio_send_ack(msk, &loc->addr, &rem->addr, bkup); + release_sock((struct sock *)msk); + +set_flags_err: + sock_put((struct sock *)msk); + return ret; +} diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index e475212f2618..cc21fafd9726 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -506,13 +506,18 @@ static inline bool tcp_can_send_ack(const struct sock *ssk) (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_TIME_WAIT | TCPF_CLOSE | TCPF_LISTEN)); } +void __mptcp_subflow_send_ack(struct sock *ssk) +{ + if (tcp_can_send_ack(ssk)) + tcp_send_ack(ssk); +} + void mptcp_subflow_send_ack(struct sock *ssk) { bool slow; slow = lock_sock_fast(ssk); - if (tcp_can_send_ack(ssk)) - tcp_send_ack(ssk); + __mptcp_subflow_send_ack(ssk); unlock_sock_fast(ssk, slow); } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index c14d70c036d0..480c5320b86e 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -607,6 +607,7 @@ void __init mptcp_subflow_init(void); void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how); void mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow); +void __mptcp_subflow_send_ack(struct sock *ssk); void mptcp_subflow_send_ack(struct sock *ssk); void mptcp_subflow_reset(struct sock *ssk); void mptcp_subflow_queue_clean(struct sock *ssk); @@ -771,6 +772,10 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup); void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq); +int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, + struct mptcp_addr_info *addr, + struct mptcp_addr_info *rem, + u8 bkup); bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, const struct mptcp_pm_addr_entry *entry); void mptcp_pm_free_anno_list(struct mptcp_sock *msk); @@ -787,7 +792,9 @@ int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, u8 *flags, int *ifindex); - +int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token, + struct mptcp_pm_addr_entry *loc, + struct mptcp_pm_addr_entry *rem, u8 bkup); int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, bool echo); diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index 6a2f4b981e1d..cb79f0719e3b 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -39,7 +39,7 @@ static void syntax(char *argv[]) fprintf(stderr, "\tdsf lip <local-ip> lport <local-port> rip <remote-ip> rport <remote-port> token <token>\n"); fprintf(stderr, "\tdel <id> [<ip>]\n"); fprintf(stderr, "\tget <id>\n"); - fprintf(stderr, "\tset [<ip>] [id <nr>] flags [no]backup|[no]fullmesh [port <nr>]\n"); + fprintf(stderr, "\tset [<ip>] [id <nr>] flags [no]backup|[no]fullmesh [port <nr>] [token <token>] [rip <ip>] [rport <port>]\n"); fprintf(stderr, "\tflush\n"); fprintf(stderr, "\tdump\n"); fprintf(stderr, "\tlimits [<rcv addr max> <subflow max>]\n"); @@ -1279,7 +1279,10 @@ int set_flags(int fd, int pm_family, int argc, char *argv[]) struct rtattr *rta, *nest; struct nlmsghdr *nh; u_int32_t flags = 0; + u_int32_t token = 0; + u_int16_t rport = 0; u_int16_t family; + void *rip = NULL; int nest_start; int use_id = 0; u_int8_t id; @@ -1339,7 +1342,13 @@ int set_flags(int fd, int pm_family, int argc, char *argv[]) error(1, 0, " missing flags keyword"); for (; arg < argc; arg++) { - if (!strcmp(argv[arg], "flags")) { + if (!strcmp(argv[arg], "token")) { + if (++arg >= argc) + error(1, 0, " missing token value"); + + /* token */ + token = atoi(argv[arg]); + } else if (!strcmp(argv[arg], "flags")) { char *tok, *str; /* flags */ @@ -1378,12 +1387,72 @@ int set_flags(int fd, int pm_family, int argc, char *argv[]) rta->rta_len = RTA_LENGTH(2); memcpy(RTA_DATA(rta), &port, 2); off += NLMSG_ALIGN(rta->rta_len); + } else if (!strcmp(argv[arg], "rport")) { + if (++arg >= argc) + error(1, 0, " missing remote port"); + + rport = atoi(argv[arg]); + } else if (!strcmp(argv[arg], "rip")) { + if (++arg >= argc) + error(1, 0, " missing remote ip"); + + rip = argv[arg]; } else { error(1, 0, "unknown keyword %s", argv[arg]); } } nest->rta_len = off - nest_start; + /* token */ + if (token) { + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ATTR_TOKEN; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &token, 4); + off += NLMSG_ALIGN(rta->rta_len); + } + + /* remote addr/port */ + if (rip) { + nest_start = off; + nest = (void *)(data + off); + nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR_REMOTE; + nest->rta_len = RTA_LENGTH(0); + off += NLMSG_ALIGN(nest->rta_len); + + /* addr data */ + rta = (void *)(data + off); + if (inet_pton(AF_INET, rip, RTA_DATA(rta))) { + family = AF_INET; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4; + rta->rta_len = RTA_LENGTH(4); + } else if (inet_pton(AF_INET6, rip, RTA_DATA(rta))) { + family = AF_INET6; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6; + rta->rta_len = RTA_LENGTH(16); + } else { + error(1, errno, "can't parse ip %s", (char *)rip); + } + off += NLMSG_ALIGN(rta->rta_len); + + /* family */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &family, 2); + off += NLMSG_ALIGN(rta->rta_len); + + if (rport) { + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &rport, 2); + off += NLMSG_ALIGN(rta->rta_len); + } + + nest->rta_len = off - nest_start; + } + do_nl_req(fd, nh, off, 0); return 0; } diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 78d0bb640b11..abe3d4ebe554 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -770,10 +770,42 @@ test_subflows() rm -f "$evts" } +test_prio() +{ + local count + + # Send MP_PRIO signal from client to server machine + ip netns exec "$ns2" ./pm_nl_ctl set 10.0.1.2 port "$client4_port" flags backup token "$client4_token" rip 10.0.1.1 rport "$server4_port" + sleep 0.5 + + # Check TX + stdbuf -o0 -e0 printf "MP_PRIO TX \t" + count=$(ip netns exec "$ns2" nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}') + [ -z "$count" ] && count=0 + if [ $count != 1 ]; then + stdbuf -o0 -e0 printf "[FAIL]\n" + exit 1 + else + stdbuf -o0 -e0 printf "[OK]\n" + fi + + # Check RX + stdbuf -o0 -e0 printf "MP_PRIO RX \t" + count=$(ip netns exec "$ns1" nstat -as | grep MPTcpExtMPPrioRx | awk '{print $2}') + [ -z "$count" ] && count=0 + if [ $count != 1 ]; then + stdbuf -o0 -e0 printf "[FAIL]\n" + exit 1 + else + stdbuf -o0 -e0 printf "[OK]\n" + fi +} + make_connection make_connection "v6" test_announce test_remove test_subflows +test_prio exit 0 |