diff options
author | Jakub Kicinski <kuba@kernel.org> | 2022-06-30 16:31:00 -0700 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2022-06-30 16:31:00 -0700 |
commit | 0d8730f07c822a351a624462918c7109cdc7f402 (patch) | |
tree | 3539d4c7c098894f3b2d5f49134c8cfccc06aaca /net/mptcp | |
parent | b7d78b46d5e8dc77c656c13885d31e931923b915 (diff) | |
parent | 5e8379351dbde61ea383e514f0f9ecb2c047cf4e (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c
9c5de246c1db ("net: sparx5: mdb add/del handle non-sparx5 devices")
fbb89d02e33a ("net: sparx5: Allow mdb entries to both CPU and ports")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/mptcp')
-rw-r--r-- | net/mptcp/options.c | 7 | ||||
-rw-r--r-- | net/mptcp/pm.c | 10 | ||||
-rw-r--r-- | net/mptcp/protocol.c | 84 | ||||
-rw-r--r-- | net/mptcp/protocol.h | 24 | ||||
-rw-r--r-- | net/mptcp/subflow.c | 127 |
5 files changed, 179 insertions, 73 deletions
diff --git a/net/mptcp/options.c b/net/mptcp/options.c index be3b918a6d15..aead331866a0 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -765,6 +765,7 @@ static noinline bool mptcp_established_options_rst(struct sock *sk, struct sk_bu opts->suboptions |= OPTION_MPTCP_RST; opts->reset_transient = subflow->reset_transient; opts->reset_reason = subflow->reset_reason; + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPRSTTX); return true; } @@ -788,6 +789,7 @@ static bool mptcp_established_options_fastclose(struct sock *sk, opts->rcvr_key = msk->remote_key; pr_debug("FASTCLOSE key=%llu", opts->rcvr_key); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSETX); return true; } @@ -809,6 +811,7 @@ static bool mptcp_established_options_mp_fail(struct sock *sk, opts->fail_seq = subflow->map_seq; pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX); return true; } @@ -833,13 +836,11 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) { *size += opt_size; remaining -= opt_size; - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSETX); } /* MP_RST can be used with MP_FASTCLOSE and MP_FAIL if there is room */ if (mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts)) { *size += opt_size; remaining -= opt_size; - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPRSTTX); } return true; } @@ -966,7 +967,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, goto reset; subflow->mp_capable = 0; pr_fallback(msk); - __mptcp_do_fallback(msk); + mptcp_do_fallback(ssk); return false; } diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 59a85220edc9..45e2a48397b9 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -299,23 +299,21 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); - struct sock *s = (struct sock *)msk; pr_debug("fail_seq=%llu", fail_seq); if (!READ_ONCE(msk->allow_infinite_fallback)) return; - if (!READ_ONCE(subflow->mp_fail_response_expect)) { + if (!subflow->fail_tout) { pr_debug("send MP_FAIL response and infinite map"); subflow->send_mp_fail = 1; - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX); subflow->send_infinite_map = 1; - } else if (!sock_flag(sk, SOCK_DEAD)) { + tcp_send_ack(sk); + } else { pr_debug("MP_FAIL response received"); - - sk_stop_timer(s, &s->sk_timer); + WRITE_ONCE(subflow->fail_tout, 0); } } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index e0fb9f96c45c..883bea93c2ae 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -500,7 +500,7 @@ static void mptcp_set_timeout(struct sock *sk) __mptcp_set_timeout(sk, tout); } -static bool tcp_can_send_ack(const struct sock *ssk) +static inline bool tcp_can_send_ack(const struct sock *ssk) { return !((1 << inet_sk_state_load(ssk)) & (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_TIME_WAIT | TCPF_CLOSE | TCPF_LISTEN)); @@ -1245,7 +1245,7 @@ static void mptcp_update_infinite_map(struct mptcp_sock *msk, MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPTX); mptcp_subflow_ctx(ssk)->send_infinite_map = 0; pr_fallback(msk); - __mptcp_do_fallback(msk); + mptcp_do_fallback(ssk); } static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, @@ -2175,21 +2175,6 @@ static void mptcp_retransmit_timer(struct timer_list *t) sock_put(sk); } -static struct mptcp_subflow_context * -mp_fail_response_expect_subflow(struct mptcp_sock *msk) -{ - struct mptcp_subflow_context *subflow, *ret = NULL; - - mptcp_for_each_subflow(msk, subflow) { - if (READ_ONCE(subflow->mp_fail_response_expect)) { - ret = subflow; - break; - } - } - - return ret; -} - static void mptcp_timeout_timer(struct timer_list *t) { struct sock *sk = from_timer(sk, t, sk_timer); @@ -2346,6 +2331,11 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, kfree_rcu(subflow, rcu); } else { /* otherwise tcp will dispose of the ssk and subflow ctx */ + if (ssk->sk_state == TCP_LISTEN) { + tcp_set_state(ssk, TCP_CLOSE); + mptcp_subflow_queue_clean(ssk); + inet_csk_listen_stop(ssk); + } __tcp_close(ssk, 0); /* close acquired an extra ref */ @@ -2518,27 +2508,50 @@ reset_timer: mptcp_reset_timer(sk); } +/* schedule the timeout timer for the relevant event: either close timeout + * or mp_fail timeout. The close timeout takes precedence on the mp_fail one + */ +void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout) +{ + struct sock *sk = (struct sock *)msk; + unsigned long timeout, close_timeout; + + if (!fail_tout && !sock_flag(sk, SOCK_DEAD)) + return; + + close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + TCP_TIMEWAIT_LEN; + + /* the close timeout takes precedence on the fail one, and here at least one of + * them is active + */ + timeout = sock_flag(sk, SOCK_DEAD) ? close_timeout : fail_tout; + + sk_reset_timer(sk, &sk->sk_timer, timeout); +} + static void mptcp_mp_fail_no_response(struct mptcp_sock *msk) { - struct mptcp_subflow_context *subflow; - struct sock *ssk; + struct sock *ssk = msk->first; bool slow; - subflow = mp_fail_response_expect_subflow(msk); - if (subflow) { - pr_debug("MP_FAIL doesn't respond, reset the subflow"); + if (!ssk) + return; - ssk = mptcp_subflow_tcp_sock(subflow); - slow = lock_sock_fast(ssk); - mptcp_subflow_reset(ssk); - unlock_sock_fast(ssk, slow); - } + pr_debug("MP_FAIL doesn't respond, reset the subflow"); + + slow = lock_sock_fast(ssk); + mptcp_subflow_reset(ssk); + WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0); + unlock_sock_fast(ssk, slow); + + mptcp_reset_timeout(msk, 0); } static void mptcp_worker(struct work_struct *work) { struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work); struct sock *sk = &msk->sk.icsk_inet.sk; + unsigned long fail_tout; int state; lock_sock(sk); @@ -2575,7 +2588,9 @@ static void mptcp_worker(struct work_struct *work) if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) __mptcp_retrans(sk); - mptcp_mp_fail_no_response(msk); + fail_tout = msk->first ? READ_ONCE(mptcp_subflow_ctx(msk->first)->fail_tout) : 0; + if (fail_tout && time_after(jiffies, fail_tout)) + mptcp_mp_fail_no_response(msk); unlock: release_sock(sk); @@ -2822,6 +2837,7 @@ static void __mptcp_destroy_sock(struct sock *sk) static void mptcp_close(struct sock *sk, long timeout) { struct mptcp_subflow_context *subflow; + struct mptcp_sock *msk = mptcp_sk(sk); bool do_cancel_work = false; lock_sock(sk); @@ -2840,10 +2856,16 @@ static void mptcp_close(struct sock *sk, long timeout) cleanup: /* orphan all the subflows */ inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32; - mptcp_for_each_subflow(mptcp_sk(sk), subflow) { + mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow = lock_sock_fast_nested(ssk); + /* since the close timeout takes precedence on the fail one, + * cancel the latter + */ + if (ssk == msk->first) + subflow->fail_tout = 0; + sock_orphan(ssk); unlock_sock_fast(ssk, slow); } @@ -2852,13 +2874,13 @@ cleanup: sock_hold(sk); pr_debug("msk=%p state=%d", sk, sk->sk_state); if (mptcp_sk(sk)->token) - mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL); + mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL); if (sk->sk_state == TCP_CLOSE) { __mptcp_destroy_sock(sk); do_cancel_work = true; } else { - sk_reset_timer(sk, &sk->sk_timer, jiffies + TCP_TIMEWAIT_LEN); + mptcp_reset_timeout(msk, 0); } release_sock(sk); if (do_cancel_work) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 200f89f6d62f..c14d70c036d0 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -306,6 +306,7 @@ struct mptcp_sock { u32 setsockopt_seq; char ca_name[TCP_CA_NAME_MAX]; + struct mptcp_sock *dl_next; }; #define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock) @@ -468,7 +469,6 @@ struct mptcp_subflow_context { local_id_valid : 1, /* local_id is correctly initialized */ valid_csum_seen : 1; /* at least one csum validated */ enum mptcp_data_avail data_avail; - bool mp_fail_response_expect; u32 remote_nonce; u64 thmac; u32 local_nonce; @@ -482,6 +482,7 @@ struct mptcp_subflow_context { u8 stale_count; long delegated_status; + unsigned long fail_tout; ); @@ -608,6 +609,7 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow); void mptcp_subflow_send_ack(struct sock *ssk); void mptcp_subflow_reset(struct sock *ssk); +void mptcp_subflow_queue_clean(struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); @@ -662,6 +664,7 @@ void mptcp_get_options(const struct sk_buff *skb, void mptcp_finish_connect(struct sock *sk); void __mptcp_set_connected(struct sock *sk); +void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout); static inline bool mptcp_is_fully_established(struct sock *sk) { return inet_sk_state_load(sk) == TCP_ESTABLISHED && @@ -926,12 +929,25 @@ static inline void __mptcp_do_fallback(struct mptcp_sock *msk) set_bit(MPTCP_FALLBACK_DONE, &msk->flags); } -static inline void mptcp_do_fallback(struct sock *sk) +static inline void mptcp_do_fallback(struct sock *ssk) { - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - struct mptcp_sock *msk = mptcp_sk(subflow->conn); + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct sock *sk = subflow->conn; + struct mptcp_sock *msk; + msk = mptcp_sk(sk); __mptcp_do_fallback(msk); + if (READ_ONCE(msk->snd_data_fin_enable) && !(ssk->sk_shutdown & SEND_SHUTDOWN)) { + gfp_t saved_allocation = ssk->sk_allocation; + + /* we are in a atomic (BH) scope, override ssk default for data + * fin allocation + */ + ssk->sk_allocation = GFP_ATOMIC; + ssk->sk_shutdown |= SEND_SHUTDOWN; + tcp_shutdown(ssk, SEND_SHUTDOWN); + ssk->sk_allocation = saved_allocation; + } } #define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)", __func__, a) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index ad565ff9ebc3..d4b16d033978 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -843,7 +843,8 @@ enum mapping_status { MAPPING_INVALID, MAPPING_EMPTY, MAPPING_DATA_FIN, - MAPPING_DUMMY + MAPPING_DUMMY, + MAPPING_BAD_CSUM }; static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn) @@ -958,11 +959,7 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff * subflow->map_data_csum); if (unlikely(csum)) { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR); - if (subflow->mp_join || subflow->valid_csum_seen) { - subflow->send_mp_fail = 1; - MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX); - } - return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY; + return MAPPING_BAD_CSUM; } subflow->valid_csum_seen = 1; @@ -974,7 +971,6 @@ static enum mapping_status get_mapping_status(struct sock *ssk, { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); bool csum_reqd = READ_ONCE(msk->csum_enabled); - struct sock *sk = (struct sock *)msk; struct mptcp_ext *mpext; struct sk_buff *skb; u16 data_len; @@ -1016,9 +1012,6 @@ static enum mapping_status get_mapping_status(struct sock *ssk, pr_debug("infinite mapping received"); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX); subflow->map_data_len = 0; - if (!sock_flag(ssk, SOCK_DEAD)) - sk_stop_timer(sk, &sk->sk_timer); - return MAPPING_INVALID; } @@ -1165,6 +1158,33 @@ static bool subflow_can_fallback(struct mptcp_subflow_context *subflow) return !subflow->fully_established; } +static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + unsigned long fail_tout; + + /* greceful failure can happen only on the MPC subflow */ + if (WARN_ON_ONCE(ssk != READ_ONCE(msk->first))) + return; + + /* since the close timeout take precedence on the fail one, + * no need to start the latter when the first is already set + */ + if (sock_flag((struct sock *)msk, SOCK_DEAD)) + return; + + /* we don't need extreme accuracy here, use a zero fail_tout as special + * value meaning no fail timeout at all; + */ + fail_tout = jiffies + TCP_RTO_MAX; + if (!fail_tout) + fail_tout = 1; + WRITE_ONCE(subflow->fail_tout, fail_tout); + tcp_send_ack(ssk); + + mptcp_reset_timeout(msk, subflow->fail_tout); +} + static bool subflow_check_data_avail(struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); @@ -1184,10 +1204,8 @@ static bool subflow_check_data_avail(struct sock *ssk) status = get_mapping_status(ssk, msk); trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue)); - if (unlikely(status == MAPPING_INVALID)) - goto fallback; - - if (unlikely(status == MAPPING_DUMMY)) + if (unlikely(status == MAPPING_INVALID || status == MAPPING_DUMMY || + status == MAPPING_BAD_CSUM)) goto fallback; if (status != MAPPING_OK) @@ -1229,22 +1247,17 @@ no_data: fallback: if (!__mptcp_check_fallback(msk)) { /* RFC 8684 section 3.7. */ - if (subflow->send_mp_fail) { + if (status == MAPPING_BAD_CSUM && + (subflow->mp_join || subflow->valid_csum_seen)) { + subflow->send_mp_fail = 1; + if (!READ_ONCE(msk->allow_infinite_fallback)) { - ssk->sk_err = EBADMSG; - tcp_set_state(ssk, TCP_CLOSE); subflow->reset_transient = 0; subflow->reset_reason = MPTCP_RST_EMIDDLEBOX; - tcp_send_active_reset(ssk, GFP_ATOMIC); - while ((skb = skb_peek(&ssk->sk_receive_queue))) - sk_eat_skb(ssk, skb); - } else if (!sock_flag(ssk, SOCK_DEAD)) { - WRITE_ONCE(subflow->mp_fail_response_expect, true); - sk_reset_timer((struct sock *)msk, - &((struct sock *)msk)->sk_timer, - jiffies + TCP_RTO_MAX); + goto reset; } - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA); + mptcp_subflow_fail(msk, ssk); + WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); return true; } @@ -1252,16 +1265,20 @@ fallback: /* fatal protocol error, close the socket. * subflow_error_report() will introduce the appropriate barriers */ - ssk->sk_err = EBADMSG; - tcp_set_state(ssk, TCP_CLOSE); subflow->reset_transient = 0; subflow->reset_reason = MPTCP_RST_EMPTCP; + +reset: + ssk->sk_err = EBADMSG; + tcp_set_state(ssk, TCP_CLOSE); + while ((skb = skb_peek(&ssk->sk_receive_queue))) + sk_eat_skb(ssk, skb); tcp_send_active_reset(ssk, GFP_ATOMIC); WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA); return false; } - __mptcp_do_fallback(msk); + mptcp_do_fallback(ssk); } skb = skb_peek(&ssk->sk_receive_queue); @@ -1706,6 +1723,58 @@ static void subflow_state_change(struct sock *sk) } } +void mptcp_subflow_queue_clean(struct sock *listener_ssk) +{ + struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue; + struct mptcp_sock *msk, *next, *head = NULL; + struct request_sock *req; + + /* build a list of all unaccepted mptcp sockets */ + spin_lock_bh(&queue->rskq_lock); + for (req = queue->rskq_accept_head; req; req = req->dl_next) { + struct mptcp_subflow_context *subflow; + struct sock *ssk = req->sk; + struct mptcp_sock *msk; + + if (!sk_is_mptcp(ssk)) + continue; + + subflow = mptcp_subflow_ctx(ssk); + if (!subflow || !subflow->conn) + continue; + + /* skip if already in list */ + msk = mptcp_sk(subflow->conn); + if (msk->dl_next || msk == head) + continue; + + msk->dl_next = head; + head = msk; + } + spin_unlock_bh(&queue->rskq_lock); + if (!head) + return; + + /* can't acquire the msk socket lock under the subflow one, + * or will cause ABBA deadlock + */ + release_sock(listener_ssk); + + for (msk = head; msk; msk = next) { + struct sock *sk = (struct sock *)msk; + bool slow; + + slow = lock_sock_fast_nested(sk); + next = msk->dl_next; + msk->first = NULL; + msk->dl_next = NULL; + unlock_sock_fast(sk, slow); + } + + /* we are still under the listener msk socket lock */ + lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING); +} + static int subflow_ulp_init(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); |