summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/mptcp-sysctl.rst11
-rw-r--r--net/mptcp/ctrl.c121
-rw-r--r--net/mptcp/mib.c2
-rw-r--r--net/mptcp/mib.h2
-rw-r--r--net/mptcp/protocol.c11
-rw-r--r--net/mptcp/protocol.h8
-rw-r--r--net/mptcp/subflow.c4
7 files changed, 151 insertions, 8 deletions
diff --git a/Documentation/networking/mptcp-sysctl.rst b/Documentation/networking/mptcp-sysctl.rst
index fd514bba8c43..95598c21fc8e 100644
--- a/Documentation/networking/mptcp-sysctl.rst
+++ b/Documentation/networking/mptcp-sysctl.rst
@@ -34,6 +34,17 @@ available_schedulers - STRING
Shows the available schedulers choices that are registered. More packet
schedulers may be available, but not loaded.
+blackhole_timeout - INTEGER (seconds)
+ Initial time period in second to disable MPTCP on active MPTCP sockets
+ when a MPTCP firewall blackhole issue happens. This time period will
+ grow exponentially when more blackhole issues get detected right after
+ MPTCP is re-enabled and will reset to the initial value when the
+ blackhole issue goes away.
+
+ 0 to disable the blackhole detection.
+
+ Default: 3600
+
checksum_enabled - BOOLEAN
Control whether DSS checksum can be enabled.
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index 0b23e3c5e8ff..38d8121331d4 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -28,8 +28,11 @@ struct mptcp_pernet {
#endif
unsigned int add_addr_timeout;
+ unsigned int blackhole_timeout;
unsigned int close_timeout;
unsigned int stale_loss_cnt;
+ atomic_t active_disable_times;
+ unsigned long active_disable_stamp;
u8 mptcp_enabled;
u8 checksum_enabled;
u8 allow_join_initial_addr_port;
@@ -88,6 +91,8 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
{
pernet->mptcp_enabled = 1;
pernet->add_addr_timeout = TCP_RTO_MAX;
+ pernet->blackhole_timeout = 3600;
+ atomic_set(&pernet->active_disable_times, 0);
pernet->close_timeout = TCP_TIMEWAIT_LEN;
pernet->checksum_enabled = 0;
pernet->allow_join_initial_addr_port = 1;
@@ -152,6 +157,20 @@ static int proc_available_schedulers(const struct ctl_table *ctl,
return ret;
}
+static int proc_blackhole_detect_timeout(const struct ctl_table *table,
+ int write, void *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ struct mptcp_pernet *pernet = mptcp_get_pernet(current->nsproxy->net_ns);
+ int ret;
+
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ if (write && ret == 0)
+ atomic_set(&pernet->active_disable_times, 0);
+
+ return ret;
+}
+
static struct ctl_table mptcp_sysctl_table[] = {
{
.procname = "enabled",
@@ -218,6 +237,13 @@ static struct ctl_table mptcp_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
+ {
+ .procname = "blackhole_timeout",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_blackhole_detect_timeout,
+ .extra1 = SYSCTL_ZERO,
+ },
};
static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
@@ -241,6 +267,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[6].data = &pernet->scheduler;
/* table[7] is for available_schedulers which is read-only info */
table[8].data = &pernet->close_timeout;
+ table[9].data = &pernet->blackhole_timeout;
hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table,
ARRAY_SIZE(mptcp_sysctl_table));
@@ -278,6 +305,88 @@ static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {}
#endif /* CONFIG_SYSCTL */
+/* The following code block is to deal with middle box issues with MPTCP,
+ * similar to what is done with TFO.
+ * The proposed solution is to disable active MPTCP globally when SYN+MPC are
+ * dropped, while SYN without MPC aren't. In this case, active side MPTCP is
+ * disabled globally for 1hr at first. Then if it happens again, it is disabled
+ * for 2h, then 4h, 8h, ...
+ * The timeout is reset back to 1hr when a successful active MPTCP connection is
+ * fully established.
+ */
+
+/* Disable active MPTCP and record current jiffies and active_disable_times */
+void mptcp_active_disable(struct sock *sk)
+{
+ struct net *net = sock_net(sk);
+ struct mptcp_pernet *pernet;
+
+ pernet = mptcp_get_pernet(net);
+
+ if (!READ_ONCE(pernet->blackhole_timeout))
+ return;
+
+ /* Paired with READ_ONCE() in mptcp_active_should_disable() */
+ WRITE_ONCE(pernet->active_disable_stamp, jiffies);
+
+ /* Paired with smp_rmb() in mptcp_active_should_disable().
+ * We want pernet->active_disable_stamp to be updated first.
+ */
+ smp_mb__before_atomic();
+ atomic_inc(&pernet->active_disable_times);
+
+ MPTCP_INC_STATS(net, MPTCP_MIB_BLACKHOLE);
+}
+
+/* Calculate timeout for MPTCP active disable
+ * Return true if we are still in the active MPTCP disable period
+ * Return false if timeout already expired and we should use active MPTCP
+ */
+bool mptcp_active_should_disable(struct sock *ssk)
+{
+ struct net *net = sock_net(ssk);
+ unsigned int blackhole_timeout;
+ struct mptcp_pernet *pernet;
+ unsigned long timeout;
+ int disable_times;
+ int multiplier;
+
+ pernet = mptcp_get_pernet(net);
+ blackhole_timeout = READ_ONCE(pernet->blackhole_timeout);
+
+ if (!blackhole_timeout)
+ return false;
+
+ disable_times = atomic_read(&pernet->active_disable_times);
+ if (!disable_times)
+ return false;
+
+ /* Paired with smp_mb__before_atomic() in mptcp_active_disable() */
+ smp_rmb();
+
+ /* Limit timeout to max: 2^6 * initial timeout */
+ multiplier = 1 << min(disable_times - 1, 6);
+
+ /* Paired with the WRITE_ONCE() in mptcp_active_disable(). */
+ timeout = READ_ONCE(pernet->active_disable_stamp) +
+ multiplier * blackhole_timeout * HZ;
+
+ return time_before(jiffies, timeout);
+}
+
+/* Enable active MPTCP and reset active_disable_times if needed */
+void mptcp_active_enable(struct sock *sk)
+{
+ struct mptcp_pernet *pernet = mptcp_get_pernet(sock_net(sk));
+
+ if (atomic_read(&pernet->active_disable_times)) {
+ struct dst_entry *dst = sk_dst_get(sk);
+
+ if (dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK))
+ atomic_set(&pernet->active_disable_times, 0);
+ }
+}
+
/* Check the number of retransmissions, and fallback to TCP if needed */
void mptcp_active_detect_blackhole(struct sock *ssk, bool expired)
{
@@ -290,10 +399,14 @@ void mptcp_active_detect_blackhole(struct sock *ssk, bool expired)
timeouts = inet_csk(ssk)->icsk_retransmits;
subflow = mptcp_subflow_ctx(ssk);
- if (subflow->request_mptcp && ssk->sk_state == TCP_SYN_SENT &&
- (timeouts == 2 || (timeouts < 2 && expired))) {
- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDROP);
- mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow);
+ if (subflow->request_mptcp && ssk->sk_state == TCP_SYN_SENT) {
+ if (timeouts == 2 || (timeouts < 2 && expired)) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDROP);
+ subflow->mpc_drop = 1;
+ mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow);
+ } else {
+ subflow->mpc_drop = 0;
+ }
}
}
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index d70a3e2bfad6..38c2efc82b94 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -16,6 +16,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPCapableFallbackACK", MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK),
SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK),
SNMP_MIB_ITEM("MPCapableSYNTXDrop", MPTCP_MIB_MPCAPABLEACTIVEDROP),
+ SNMP_MIB_ITEM("MPCapableSYNTXDisabled", MPTCP_MIB_MPCAPABLEACTIVEDISABLED),
SNMP_MIB_ITEM("MPFallbackTokenInit", MPTCP_MIB_TOKENFALLBACKINIT),
SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS),
SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN),
@@ -74,6 +75,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE),
SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT),
SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB),
+ SNMP_MIB_ITEM("Blackhole", MPTCP_MIB_BLACKHOLE),
SNMP_MIB_SENTINEL
};
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 062775700b63..c8ffe18a8722 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -11,6 +11,7 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK,/* Server-side fallback during 3-way handshake */
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK, /* Client-side fallback during 3-way handshake */
MPTCP_MIB_MPCAPABLEACTIVEDROP, /* Client-side fallback due to a MPC drop */
+ MPTCP_MIB_MPCAPABLEACTIVEDISABLED, /* Client-side disabled due to past issues */
MPTCP_MIB_TOKENFALLBACKINIT, /* Could not init/allocate token */
MPTCP_MIB_RETRANSSEGS, /* Segments retransmitted at the MPTCP-level */
MPTCP_MIB_JOINNOTOKEN, /* Received MP_JOIN but the token was not found */
@@ -75,6 +76,7 @@ enum linux_mptcp_mib_field {
*/
MPTCP_MIB_RCVWNDCONFLICT, /* Conflict with while updating msk rcv wnd */
MPTCP_MIB_CURRESTAB, /* Current established MPTCP connections */
+ MPTCP_MIB_BLACKHOLE, /* A blackhole has been detected */
__MPTCP_MIB_MAX
};
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index cbbcc46cfef0..c2317919fc14 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3737,9 +3737,14 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (rcu_access_pointer(tcp_sk(ssk)->md5sig_info))
mptcp_subflow_early_fallback(msk, subflow);
#endif
- if (subflow->request_mptcp && mptcp_token_new_connect(ssk)) {
- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
- mptcp_subflow_early_fallback(msk, subflow);
+ if (subflow->request_mptcp) {
+ if (mptcp_active_should_disable(sk)) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDISABLED);
+ mptcp_subflow_early_fallback(msk, subflow);
+ } else if (mptcp_token_new_connect(ssk) < 0) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
+ mptcp_subflow_early_fallback(msk, subflow);
+ }
}
WRITE_ONCE(msk->write_seq, subflow->idsn);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 302bd808b839..74417aae08d0 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -531,7 +531,8 @@ struct mptcp_subflow_context {
valid_csum_seen : 1, /* at least one csum validated */
is_mptfo : 1, /* subflow is doing TFO */
close_event_done : 1, /* has done the post-closed part */
- __unused : 9;
+ mpc_drop : 1, /* the MPC option has been dropped in a rtx */
+ __unused : 8;
bool data_avail;
bool scheduled;
u32 remote_nonce;
@@ -697,6 +698,11 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net);
unsigned int mptcp_close_timeout(const struct sock *sk);
int mptcp_get_pm_type(const struct net *net);
const char *mptcp_get_scheduler(const struct net *net);
+
+void mptcp_active_disable(struct sock *sk);
+bool mptcp_active_should_disable(struct sock *ssk);
+void mptcp_active_enable(struct sock *sk);
+
void mptcp_get_available_schedulers(char *buf, size_t maxlen);
void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
struct mptcp_subflow_context *subflow,
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index b9b14e75e8c2..1040b3b9696b 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -546,6 +546,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->mp_capable = 1;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
mptcp_finish_connect(sk);
+ mptcp_active_enable(parent);
mptcp_propagate_state(parent, sk, subflow, &mp_opt);
} else if (subflow->request_join) {
u8 hmac[SHA256_DIGEST_SIZE];
@@ -591,6 +592,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINPORTSYNACKRX);
}
} else if (mptcp_check_fallback(sk)) {
+ /* It looks like MPTCP is blocked, while TCP is not */
+ if (subflow->mpc_drop)
+ mptcp_active_disable(parent);
fallback:
mptcp_propagate_state(parent, sk, subflow, NULL);
}