diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-01-27 20:58:39 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-01-27 20:58:39 +0200 |
commit | 23a46422c56144939c091c76cf389aa863ce9c18 (patch) | |
tree | 3f1e02f4eaa1e7e41abe6f1da815f00fe9e08241 /net | |
parent | a0f90c8815706981c483a652a6aefca51a5e191c (diff) | |
parent | fd20d9738395cf8e27d0a17eba34169699fccdff (diff) |
Merge tag 'net-5.17-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Jakub Kicinski:
"Including fixes from netfilter and can.
Current release - new code bugs:
- tcp: add a missing sk_defer_free_flush() in tcp_splice_read()
- tcp: add a stub for sk_defer_free_flush(), fix CONFIG_INET=n
- nf_tables: set last expression in register tracking area
- nft_connlimit: fix memleak if nf_ct_netns_get() fails
- mptcp: fix removing ids bitmap setting
- bonding: use rcu_dereference_rtnl when getting active slave
- fix three cases of sleep in atomic context in drivers: lan966x, gve
- handful of build fixes for esoteric drivers after netdev->dev_addr
was made const
Previous releases - regressions:
- revert "ipv6: Honor all IPv6 PIO Valid Lifetime values", it broke
Linux compatibility with USGv6 tests
- procfs: show net device bound packet types
- ipv4: fix ip option filtering for locally generated fragments
- phy: broadcom: hook up soft_reset for BCM54616S
Previous releases - always broken:
- ipv4: raw: lock the socket in raw_bind()
- ipv4: decrease the use of shared IPID generator to decrease the
chance of attackers guessing the values
- procfs: fix cross-netns information leakage in /proc/net/ptype
- ethtool: fix link extended state for big endian
- bridge: vlan: fix single net device option dumping
- ping: fix the sk_bound_dev_if match in ping_lookup"
* tag 'net-5.17-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (86 commits)
net: bridge: vlan: fix memory leak in __allowed_ingress
net: socket: rename SKB_DROP_REASON_SOCKET_FILTER
ipv4: remove sparse error in ip_neigh_gw4()
ipv4: avoid using shared IP generator for connected sockets
ipv4: tcp: send zero IPID in SYNACK messages
ipv4: raw: lock the socket in raw_bind()
MAINTAINERS: add missing IPv4/IPv6 header paths
MAINTAINERS: add more files to eth PHY
net: stmmac: dwmac-sun8i: use return val of readl_poll_timeout()
net: bridge: vlan: fix single net device option dumping
net: stmmac: skip only stmmac_ptp_register when resume from suspend
net: stmmac: configure PTP clock source prior to PTP initialization
Revert "ipv6: Honor all IPv6 PIO Valid Lifetime values"
connector/cn_proc: Use task_is_in_init_pid_ns()
pid: Introduce helper task_is_in_init_pid_ns()
gve: Fix GFP flags when allocing pages
net: lan966x: Fix sleep in atomic context when updating MAC table
net: lan966x: Fix sleep in atomic context when injecting frames
ethernet: seeq/ether3: don't write directly to netdev->dev_addr
ethernet: 8390/etherh: don't write directly to netdev->dev_addr
...
Diffstat (limited to 'net')
-rw-r--r-- | net/bridge/br_vlan.c | 9 | ||||
-rw-r--r-- | net/core/net-procfs.c | 38 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 26 | ||||
-rw-r--r-- | net/ipv4/ping.c | 3 | ||||
-rw-r--r-- | net/ipv4/raw.c | 5 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 2 | ||||
-rw-r--r-- | net/ipv6/addrconf.c | 27 | ||||
-rw-r--r-- | net/ipv6/ip6_fib.c | 23 | ||||
-rw-r--r-- | net/ipv6/ip6_tunnel.c | 8 | ||||
-rw-r--r-- | net/ipv6/route.c | 2 | ||||
-rw-r--r-- | net/mptcp/pm_netlink.c | 39 | ||||
-rw-r--r-- | net/mptcp/protocol.h | 6 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_core.c | 8 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_netbios_ns.c | 5 | ||||
-rw-r--r-- | net/netfilter/nf_tables_api.c | 4 | ||||
-rw-r--r-- | net/netfilter/nft_connlimit.c | 11 | ||||
-rw-r--r-- | net/packet/af_packet.c | 2 | ||||
-rw-r--r-- | net/rxrpc/call_event.c | 8 | ||||
-rw-r--r-- | net/rxrpc/output.c | 2 | ||||
-rw-r--r-- | net/sched/sch_api.c | 2 | ||||
-rw-r--r-- | net/sched/sch_htb.c | 20 | ||||
-rw-r--r-- | net/smc/af_smc.c | 63 |
23 files changed, 234 insertions, 80 deletions
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 84ba456a78cc..1402d5ca242d 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -560,10 +560,10 @@ static bool __allowed_ingress(const struct net_bridge *br, !br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) { if (*state == BR_STATE_FORWARDING) { *state = br_vlan_get_pvid_state(vg); - return br_vlan_state_allowed(*state, true); - } else { - return true; + if (!br_vlan_state_allowed(*state, true)) + goto drop; } + return true; } } v = br_vlan_find(vg, *vid); @@ -2020,7 +2020,8 @@ static int br_vlan_rtm_dump(struct sk_buff *skb, struct netlink_callback *cb) goto out_err; } err = br_vlan_dump_dev(dev, skb, cb, dump_flags); - if (err && err != -EMSGSIZE) + /* if the dump completed without an error we return 0 here */ + if (err != -EMSGSIZE) goto out_err; } else { for_each_netdev_rcu(net, dev) { diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index d8b9dbabd4a4..88cc0ad7d386 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -190,12 +190,23 @@ static const struct seq_operations softnet_seq_ops = { .show = softnet_seq_show, }; -static void *ptype_get_idx(loff_t pos) +static void *ptype_get_idx(struct seq_file *seq, loff_t pos) { + struct list_head *ptype_list = NULL; struct packet_type *pt = NULL; + struct net_device *dev; loff_t i = 0; int t; + for_each_netdev_rcu(seq_file_net(seq), dev) { + ptype_list = &dev->ptype_all; + list_for_each_entry_rcu(pt, ptype_list, list) { + if (i == pos) + return pt; + ++i; + } + } + list_for_each_entry_rcu(pt, &ptype_all, list) { if (i == pos) return pt; @@ -216,22 +227,40 @@ static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { rcu_read_lock(); - return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; + return *pos ? ptype_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct net_device *dev; struct packet_type *pt; struct list_head *nxt; int hash; ++*pos; if (v == SEQ_START_TOKEN) - return ptype_get_idx(0); + return ptype_get_idx(seq, 0); pt = v; nxt = pt->list.next; + if (pt->dev) { + if (nxt != &pt->dev->ptype_all) + goto found; + + dev = pt->dev; + for_each_netdev_continue_rcu(seq_file_net(seq), dev) { + if (!list_empty(&dev->ptype_all)) { + nxt = dev->ptype_all.next; + goto found; + } + } + + nxt = ptype_all.next; + goto ptype_all; + } + if (pt->type == htons(ETH_P_ALL)) { +ptype_all: if (nxt != &ptype_all) goto found; hash = 0; @@ -260,7 +289,8 @@ static int ptype_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) seq_puts(seq, "Type Device Function\n"); - else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { + else if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) && + (!pt->dev || net_eq(dev_net(pt->dev), seq_file_net(seq)))) { if (pt->type == htons(ETH_P_ALL)) seq_puts(seq, "ALL "); else diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 57c1d8431386..139cec29ed06 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -162,12 +162,19 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk, iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); iph->saddr = saddr; iph->protocol = sk->sk_protocol; - if (ip_dont_fragment(sk, &rt->dst)) { + /* Do not bother generating IPID for small packets (eg SYNACK) */ + if (skb->len <= IPV4_MIN_MTU || ip_dont_fragment(sk, &rt->dst)) { iph->frag_off = htons(IP_DF); iph->id = 0; } else { iph->frag_off = 0; - __ip_select_ident(net, iph, 1); + /* TCP packets here are SYNACK with fat IPv4/TCP options. + * Avoid using the hashed IP ident generator. + */ + if (sk->sk_protocol == IPPROTO_TCP) + iph->id = (__force __be16)prandom_u32(); + else + __ip_select_ident(net, iph, 1); } if (opt && opt->opt.optlen) { @@ -825,15 +832,24 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, /* Everything is OK. Generate! */ ip_fraglist_init(skb, iph, hlen, &iter); - if (iter.frag) - ip_options_fragment(iter.frag); - for (;;) { /* Prepare header of the next frame, * before previous one went down. */ if (iter.frag) { + bool first_frag = (iter.offset == 0); + IPCB(iter.frag)->flags = IPCB(skb)->flags; ip_fraglist_prepare(skb, &iter); + if (first_frag && IPCB(skb)->opt.optlen) { + /* ipcb->opt is not populated for frags + * coming from __ip_make_skb(), + * ip_options_fragment() needs optlen + */ + IPCB(iter.frag)->opt.optlen = + IPCB(skb)->opt.optlen; + ip_options_fragment(iter.frag); + ip_send_check(iter.iph); + } } skb->tstamp = tstamp; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 0e56df3a45e2..bcf7bc71cb56 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -220,7 +220,8 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) continue; } - if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) + if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif && + sk->sk_bound_dev_if != inet_sdif(skb)) continue; sock_hold(sk); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 9eb5fc247868..9f97b9cbf7b3 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -722,6 +722,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) int ret = -EINVAL; int chk_addr_ret; + lock_sock(sk); if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in)) goto out; @@ -741,7 +742,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_saddr = 0; /* Use device */ sk_dst_reset(sk); ret = 0; -out: return ret; +out: + release_sock(sk); + return ret; } /* diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3b75836db19b..78e81465f5f3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -842,6 +842,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, } release_sock(sk); + sk_defer_free_flush(sk); if (spliced) return spliced; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b53476e78c84..fec656f5a39e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2095,7 +2095,7 @@ process: nf_reset_ct(skb); if (tcp_filter(sk, skb)) { - drop_reason = SKB_DROP_REASON_TCP_FILTER; + drop_reason = SKB_DROP_REASON_SOCKET_FILTER; goto discard_and_relse; } th = (const struct tcphdr *)skb->data; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3eee17790a82..f927c199a93c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2589,7 +2589,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, __u32 valid_lft, u32 prefered_lft) { struct inet6_ifaddr *ifp = ipv6_get_ifaddr(net, addr, dev, 1); - int create = 0; + int create = 0, update_lft = 0; if (!ifp && valid_lft) { int max_addresses = in6_dev->cnf.max_addresses; @@ -2633,19 +2633,32 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, unsigned long now; u32 stored_lft; - /* Update lifetime (RFC4862 5.5.3 e) - * We deviate from RFC4862 by honoring all Valid Lifetimes to - * improve the reaction of SLAAC to renumbering events - * (draft-gont-6man-slaac-renum-06, Section 4.2) - */ + /* update lifetime (RFC2462 5.5.3 e) */ spin_lock_bh(&ifp->lock); now = jiffies; if (ifp->valid_lft > (now - ifp->tstamp) / HZ) stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ; else stored_lft = 0; - if (!create && stored_lft) { + const u32 minimum_lft = min_t(u32, + stored_lft, MIN_VALID_LIFETIME); + valid_lft = max(valid_lft, minimum_lft); + + /* RFC4862 Section 5.5.3e: + * "Note that the preferred lifetime of the + * corresponding address is always reset to + * the Preferred Lifetime in the received + * Prefix Information option, regardless of + * whether the valid lifetime is also reset or + * ignored." + * + * So we should always update prefered_lft here. + */ + update_lft = 1; + } + + if (update_lft) { ifp->valid_lft = valid_lft; ifp->prefered_lft = prefered_lft; ifp->tstamp = now; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 463c37dea449..413f66781e50 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -112,7 +112,7 @@ void fib6_update_sernum(struct net *net, struct fib6_info *f6i) fn = rcu_dereference_protected(f6i->fib6_node, lockdep_is_held(&f6i->fib6_table->tb6_lock)); if (fn) - fn->fn_sernum = fib6_new_sernum(net); + WRITE_ONCE(fn->fn_sernum, fib6_new_sernum(net)); } /* @@ -590,12 +590,13 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, spin_unlock_bh(&table->tb6_lock); if (res > 0) { cb->args[4] = 1; - cb->args[5] = w->root->fn_sernum; + cb->args[5] = READ_ONCE(w->root->fn_sernum); } } else { - if (cb->args[5] != w->root->fn_sernum) { + int sernum = READ_ONCE(w->root->fn_sernum); + if (cb->args[5] != sernum) { /* Begin at the root if the tree changed */ - cb->args[5] = w->root->fn_sernum; + cb->args[5] = sernum; w->state = FWS_INIT; w->node = w->root; w->skip = w->count; @@ -1345,7 +1346,7 @@ static void __fib6_update_sernum_upto_root(struct fib6_info *rt, /* paired with smp_rmb() in fib6_get_cookie_safe() */ smp_wmb(); while (fn) { - fn->fn_sernum = sernum; + WRITE_ONCE(fn->fn_sernum, sernum); fn = rcu_dereference_protected(fn->parent, lockdep_is_held(&rt->fib6_table->tb6_lock)); } @@ -2174,8 +2175,8 @@ static int fib6_clean_node(struct fib6_walker *w) }; if (c->sernum != FIB6_NO_SERNUM_CHANGE && - w->node->fn_sernum != c->sernum) - w->node->fn_sernum = c->sernum; + READ_ONCE(w->node->fn_sernum) != c->sernum) + WRITE_ONCE(w->node->fn_sernum, c->sernum); if (!c->func) { WARN_ON_ONCE(c->sernum == FIB6_NO_SERNUM_CHANGE); @@ -2543,7 +2544,7 @@ static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter, iter->w.state = FWS_INIT; iter->w.node = iter->w.root; iter->w.args = iter; - iter->sernum = iter->w.root->fn_sernum; + iter->sernum = READ_ONCE(iter->w.root->fn_sernum); INIT_LIST_HEAD(&iter->w.lh); fib6_walker_link(net, &iter->w); } @@ -2571,8 +2572,10 @@ static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl, static void ipv6_route_check_sernum(struct ipv6_route_iter *iter) { - if (iter->sernum != iter->w.root->fn_sernum) { - iter->sernum = iter->w.root->fn_sernum; + int sernum = READ_ONCE(iter->w.root->fn_sernum); + + if (iter->sernum != sernum) { + iter->sernum = sernum; iter->w.state = FWS_INIT; iter->w.node = iter->w.root; WARN_ON(iter->w.skip); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index fe786df4f849..97ade833f58c 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1036,14 +1036,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t, if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false, 0, IFA_F_TENTATIVE))) - pr_warn("%s xmit: Local address not yet configured!\n", - p->name); + pr_warn_ratelimited("%s xmit: Local address not yet configured!\n", + p->name); else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) && !ipv6_addr_is_multicast(raddr) && unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev, true, 0, IFA_F_TENTATIVE))) - pr_warn("%s xmit: Routing loop! Remote address found on this node!\n", - p->name); + pr_warn_ratelimited("%s xmit: Routing loop! Remote address found on this node!\n", + p->name); else ret = 1; rcu_read_unlock(); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e6de94203c13..f4884cda13b9 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2802,7 +2802,7 @@ static void ip6_link_failure(struct sk_buff *skb) if (from) { fn = rcu_dereference(from->fib6_node); if (fn && (rt->rt6i_flags & RTF_DEFAULT)) - fn->fn_sernum = -1; + WRITE_ONCE(fn->fn_sernum, -1); } } rcu_read_unlock(); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 75af1f701e1d..782b1d452269 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -478,6 +478,20 @@ __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) return NULL; } +static struct mptcp_pm_addr_entry * +__lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info, + bool lookup_by_id) +{ + struct mptcp_pm_addr_entry *entry; + + list_for_each_entry(entry, &pernet->local_addr_list, list) { + if ((!lookup_by_id && addresses_equal(&entry->addr, info, true)) || + (lookup_by_id && entry->addr.id == info->id)) + return entry; + } + return NULL; +} + static int lookup_id_by_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *addr) { @@ -777,7 +791,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, removed = true; __MPTCP_INC_STATS(sock_net(sk), rm_type); } - __set_bit(rm_list->ids[1], msk->pm.id_avail_bitmap); + __set_bit(rm_list->ids[i], msk->pm.id_avail_bitmap); if (!removed) continue; @@ -1763,18 +1777,21 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) return -EOPNOTSUPP; } - list_for_each_entry(entry, &pernet->local_addr_list, list) { - if ((!lookup_by_id && addresses_equal(&entry->addr, &addr.addr, true)) || - (lookup_by_id && entry->addr.id == addr.addr.id)) { - mptcp_nl_addr_backup(net, &entry->addr, bkup); - - if (bkup) - entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP; - else - entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP; - } + spin_lock_bh(&pernet->lock); + entry = __lookup_addr(pernet, &addr.addr, lookup_by_id); + if (!entry) { + spin_unlock_bh(&pernet->lock); + return -EINVAL; } + if (bkup) + entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP; + else + entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP; + addr = *entry; + spin_unlock_bh(&pernet->lock); + + mptcp_nl_addr_backup(net, &addr.addr, bkup); return 0; } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 0e6b42c76ea0..85317ce38e3f 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -408,7 +408,7 @@ DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); struct mptcp_subflow_context { struct list_head node;/* conn_list of subflows */ - char reset_start[0]; + struct_group(reset, unsigned long avg_pacing_rate; /* protected by msk socket lock */ u64 local_key; @@ -458,7 +458,7 @@ struct mptcp_subflow_context { long delegated_status; - char reset_end[0]; + ); struct list_head delegated_node; /* link into delegated_action, protected by local BH */ @@ -494,7 +494,7 @@ mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) static inline void mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow) { - memset(subflow->reset_start, 0, subflow->reset_end - subflow->reset_start); + memset(&subflow->reset, 0, sizeof(subflow->reset)); subflow->request_mptcp = 1; } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 894a325d39f2..d6aa5b47031e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1924,15 +1924,17 @@ repeat: pr_debug("nf_conntrack_in: Can't track with proto module\n"); nf_ct_put(ct); skb->_nfct = 0; - NF_CT_STAT_INC_ATOMIC(state->net, invalid); - if (ret == -NF_DROP) - NF_CT_STAT_INC_ATOMIC(state->net, drop); /* Special case: TCP tracker reports an attempt to reopen a * closed/aborted connection. We have to go back and create a * fresh conntrack. */ if (ret == -NF_REPEAT) goto repeat; + + NF_CT_STAT_INC_ATOMIC(state->net, invalid); + if (ret == -NF_DROP) + NF_CT_STAT_INC_ATOMIC(state->net, drop); + ret = -ret; goto out; } diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index 7f19ee259609..55415f011943 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c @@ -20,13 +20,14 @@ #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_expect.h> +#define HELPER_NAME "netbios-ns" #define NMBD_PORT 137 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_conntrack_netbios_ns"); -MODULE_ALIAS_NFCT_HELPER("netbios_ns"); +MODULE_ALIAS_NFCT_HELPER(HELPER_NAME); static unsigned int timeout __read_mostly = 3; module_param(timeout, uint, 0400); @@ -44,7 +45,7 @@ static int netbios_ns_help(struct sk_buff *skb, unsigned int protoff, } static struct nf_conntrack_helper helper __read_mostly = { - .name = "netbios-ns", + .name = HELPER_NAME, .tuple.src.l3num = NFPROTO_IPV4, .tuple.src.u.udp.port = cpu_to_be16(NMBD_PORT), .tuple.dst.protonum = IPPROTO_UDP, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 77938b1042f3..cf454f8ca2b0 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8264,14 +8264,12 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha void *data, *data_boundary; struct nft_rule_dp *prule; struct nft_rule *rule; - int i; /* already handled or inactive chain? */ if (chain->blob_next || !nft_is_active_next(net, chain)) return 0; rule = list_entry(&chain->rules, struct nft_rule, list); - i = 0; data_size = 0; list_for_each_entry_continue(rule, &chain->rules, list) { @@ -8301,7 +8299,7 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha return -ENOMEM; size = 0; - track.last = last; + track.last = nft_expr_last(rule); nft_rule_for_each_expr(expr, last, rule) { track.cur = expr; diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c index 7d00a1452b1d..3362417ebfdb 100644 --- a/net/netfilter/nft_connlimit.c +++ b/net/netfilter/nft_connlimit.c @@ -62,6 +62,7 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx, { bool invert = false; u32 flags, limit; + int err; if (!tb[NFTA_CONNLIMIT_COUNT]) return -EINVAL; @@ -84,7 +85,15 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx, priv->limit = limit; priv->invert = invert; - return nf_ct_netns_get(ctx->net, ctx->family); + err = nf_ct_netns_get(ctx->net, ctx->family); + if (err < 0) + goto err_netns; + + return 0; +err_netns: + kfree(priv->list); + + return err; } static void nft_connlimit_do_destroy(const struct nft_ctx *ctx, diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 5bd409ab4cc2..85ea7ddb48db 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1774,6 +1774,7 @@ static int fanout_add(struct sock *sk, struct fanout_args *args) match->prot_hook.dev = po->prot_hook.dev; match->prot_hook.func = packet_rcv_fanout; match->prot_hook.af_packet_priv = match; + match->prot_hook.af_packet_net = read_pnet(&match->net); match->prot_hook.id_match = match_fanout_group; match->max_num_members = args->max_num_members; list_add(&match->list, &fanout_list); @@ -3353,6 +3354,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po->prot_hook.func = packet_rcv_spkt; po->prot_hook.af_packet_priv = sk; + po->prot_hook.af_packet_net = sock_net(sk); if (proto) { po->prot_hook.type = proto; diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 6be2672a65ea..df864e692267 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -157,7 +157,7 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call) static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) { struct sk_buff *skb; - unsigned long resend_at, rto_j; + unsigned long resend_at; rxrpc_seq_t cursor, seq, top; ktime_t now, max_age, oldest, ack_ts; int ix; @@ -165,10 +165,8 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); - rto_j = call->peer->rto_j; - now = ktime_get_real(); - max_age = ktime_sub(now, jiffies_to_usecs(rto_j)); + max_age = ktime_sub(now, jiffies_to_usecs(call->peer->rto_j)); spin_lock_bh(&call->lock); @@ -213,7 +211,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) } resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest))); - resend_at += jiffies + rto_j; + resend_at += jiffies + rxrpc_get_rto_backoff(call->peer, retrans); WRITE_ONCE(call->resend_at, resend_at); if (unacked) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 10f2bf2e9068..a45c83f22236 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -468,7 +468,7 @@ done: if (call->peer->rtt_count > 1) { unsigned long nowj = jiffies, ack_lost_at; - ack_lost_at = rxrpc_get_rto_backoff(call->peer, retrans); + ack_lost_at = rxrpc_get_rto_backoff(call->peer, false); ack_lost_at += nowj; WRITE_ONCE(call->ack_lost_at, ack_lost_at); rxrpc_reduce_call_timer(call, ack_lost_at, nowj, diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 2cb496c84878..179825a3b2fd 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1204,7 +1204,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, err = -ENOENT; if (!ops) { - NL_SET_ERR_MSG(extack, "Specified qdisc not found"); + NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown"); goto err_out; } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 9267922ea9c3..23a9d6242429 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1810,6 +1810,26 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!hopt->rate.rate || !hopt->ceil.rate) goto failure; + if (q->offload) { + /* Options not supported by the offload. */ + if (hopt->rate.overhead || hopt->ceil.overhead) { + NL_SET_ERR_MSG(extack, "HTB offload doesn't support the overhead parameter"); + goto failure; + } + if (hopt->rate.mpu || hopt->ceil.mpu) { + NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter"); + goto failure; + } + if (hopt->quantum) { + NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter"); + goto failure; + } + if (hopt->prio) { + NL_SET_ERR_MSG(extack, "HTB offload doesn't support the prio parameter"); + goto failure; + } + } + /* Keeping backward compatible with rate_table based iproute2 tc */ if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB], diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 961854e56736..d5ea62b82bb8 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -566,12 +566,17 @@ static void smc_stat_fallback(struct smc_sock *smc) mutex_unlock(&net->smc.mutex_fback_rsn); } -static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code) +static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code) { wait_queue_head_t *smc_wait = sk_sleep(&smc->sk); - wait_queue_head_t *clc_wait = sk_sleep(smc->clcsock->sk); + wait_queue_head_t *clc_wait; unsigned long flags; + mutex_lock(&smc->clcsock_release_lock); + if (!smc->clcsock) { + mutex_unlock(&smc->clcsock_release_lock); + return -EBADF; + } smc->use_fallback = true; smc->fallback_rsn = reason_code; smc_stat_fallback(smc); @@ -586,18 +591,30 @@ static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code) * smc socket->wq, which should be removed * to clcsocket->wq during the fallback. */ + clc_wait = sk_sleep(smc->clcsock->sk); spin_lock_irqsave(&smc_wait->lock, flags); spin_lock_nested(&clc_wait->lock, SINGLE_DEPTH_NESTING); list_splice_init(&smc_wait->head, &clc_wait->head); spin_unlock(&clc_wait->lock); spin_unlock_irqrestore(&smc_wait->lock, flags); } + mutex_unlock(&smc->clcsock_release_lock); + return 0; } /* fall back during connect */ static int smc_connect_fallback(struct smc_sock *smc, int reason_code) { - smc_switch_to_fallback(smc, reason_code); + struct net *net = sock_net(&smc->sk); + int rc = 0; + + rc = smc_switch_to_fallback(smc, reason_code); + if (rc) { /* fallback fails */ + this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt); + if (smc->sk.sk_state == SMC_INIT) + sock_put(&smc->sk); /* passive closing */ + return rc; + } smc_copy_sock_settings_to_clc(smc); smc->connect_nonblock = 0; if (smc->sk.sk_state == SMC_INIT) @@ -1518,11 +1535,12 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, { /* RDMA setup failed, switch back to TCP */ smc_conn_abort(new_smc, local_first); - if (reason_code < 0) { /* error, no fallback possible */ + if (reason_code < 0 || + smc_switch_to_fallback(new_smc, reason_code)) { + /* error, no fallback possible */ smc_listen_out_err(new_smc); return; } - smc_switch_to_fallback(new_smc, reason_code); if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) { if (smc_clc_send_decline(new_smc, reason_code, version) < 0) { smc_listen_out_err(new_smc); @@ -1964,8 +1982,11 @@ static void smc_listen_work(struct work_struct *work) /* check if peer is smc capable */ if (!tcp_sk(newclcsock->sk)->syn_smc) { - smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC); - smc_listen_out_connected(new_smc); + rc = smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC); + if (rc) + smc_listen_out_err(new_smc); + else + smc_listen_out_connected(new_smc); return; } @@ -2254,7 +2275,9 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (msg->msg_flags & MSG_FASTOPEN) { if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { - smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP); + rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP); + if (rc) + goto out; } else { rc = -EINVAL; goto out; @@ -2447,6 +2470,11 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, /* generic setsockopts reaching us here always apply to the * CLC socket */ + mutex_lock(&smc->clcsock_release_lock); + if (!smc->clcsock) { + mutex_unlock(&smc->clcsock_release_lock); + return -EBADF; + } if (unlikely(!smc->clcsock->ops->setsockopt)) rc = -EOPNOTSUPP; else @@ -2456,6 +2484,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, sk->sk_err = smc->clcsock->sk->sk_err; sk_error_report(sk); } + mutex_unlock(&smc->clcsock_release_lock); if (optlen < sizeof(int)) return -EINVAL; @@ -2472,7 +2501,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, case TCP_FASTOPEN_NO_COOKIE: /* option not supported by SMC */ if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { - smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP); + rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP); } else { rc = -EINVAL; } @@ -2515,13 +2544,23 @@ static int smc_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { struct smc_sock *smc; + int rc; smc = smc_sk(sock->sk); + mutex_lock(&smc->clcsock_release_lock); + if (!smc->clcsock) { + mutex_unlock(&smc->clcsock_release_lock); + return -EBADF; + } /* socket options apply to the CLC socket */ - if (unlikely(!smc->clcsock->ops->getsockopt)) + if (unlikely(!smc->clcsock->ops->getsockopt)) { + mutex_unlock(&smc->clcsock_release_lock); return -EOPNOTSUPP; - return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname, - optval, optlen); + } + rc = smc->clcsock->ops->getsockopt(smc->clcsock, level, optname, + optval, optlen); + mutex_unlock(&smc->clcsock_release_lock); + return rc; } static int smc_ioctl(struct socket *sock, unsigned int cmd, |