summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-12-05 10:25:06 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2024-12-05 10:25:06 -0800
commit896d8946da97332d4dc80fa1937d8dd6b1c35ad4 (patch)
treec36709d93c420caebb7d1637b3bb1f777f52c8d7 /net
parent9d6a414ad31e8eb296cd6f2c1834b2c6994960a0 (diff)
parent31f1b55d5d7e531cd827419e5d71c19f24de161c (diff)
Merge tag 'net-6.13-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Paolo Abeni: "Including fixes from can and netfilter. Current release - regressions: - rtnetlink: fix double call of rtnl_link_get_net_ifla() - tcp: populate XPS related fields of timewait sockets - ethtool: fix access to uninitialized fields in set RXNFC command - selinux: use sk_to_full_sk() in selinux_ip_output() Current release - new code bugs: - net: make napi_hash_lock irq safe - eth: - bnxt_en: support header page pool in queue API - ice: fix NULL pointer dereference in switchdev Previous releases - regressions: - core: fix icmp host relookup triggering ip_rt_bug - ipv6: - avoid possible NULL deref in modify_prefix_route() - release expired exception dst cached in socket - smc: fix LGR and link use-after-free issue - hsr: avoid potential out-of-bound access in fill_frame_info() - can: hi311x: fix potential use-after-free - eth: ice: fix VLAN pruning in switchdev mode Previous releases - always broken: - netfilter: - ipset: hold module reference while requesting a module - nft_inner: incorrect percpu area handling under softirq - can: j1939: fix skb reference counting - eth: - mlxsw: use correct key block on Spectrum-4 - mlx5: fix memory leak in mlx5hws_definer_calc_layout" * tag 'net-6.13-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (76 commits) net :mana :Request a V2 response version for MANA_QUERY_GF_STAT net: avoid potential UAF in default_operstate() vsock/test: verify socket options after setting them vsock/test: fix parameter types in SO_VM_SOCKETS_* calls vsock/test: fix failures due to wrong SO_RCVLOWAT parameter net/mlx5e: Remove workaround to avoid syndrome for internal port net/mlx5e: SD, Use correct mdev to build channel param net/mlx5: E-Switch, Fix switching to switchdev mode in MPV net/mlx5: E-Switch, Fix switching to switchdev mode with IB device disabled net/mlx5: HWS: Properly set bwc queue locks lock classes net/mlx5: HWS: Fix memory leak in mlx5hws_definer_calc_layout bnxt_en: handle tpa_info in queue API implementation bnxt_en: refactor bnxt_alloc_rx_rings() to call bnxt_alloc_rx_agg_bmap() bnxt_en: refactor tpa_info alloc/free into helpers geneve: do not assume mac header is set in geneve_xmit_skb() mlxsw: spectrum_acl_flex_keys: Use correct key block on Spectrum-4 ethtool: Fix wrong mod state in case of verbose and no_mask bitset ipmr: tune the ipmr_can_free_table() checks. netfilter: nft_set_hash: skip duplicated elements pending gc run netfilter: ipset: Hold module reference while requesting a module ...
Diffstat (limited to 'net')
-rw-r--r--net/can/j1939/transport.c2
-rw-r--r--net/core/dev.c18
-rw-r--r--net/core/link_watch.c7
-rw-r--r--net/core/rtnetlink.c44
-rw-r--r--net/dccp/feat.c6
-rw-r--r--net/ethtool/bitset.c48
-rw-r--r--net/ethtool/ioctl.c3
-rw-r--r--net/hsr/hsr_device.c19
-rw-r--r--net/hsr/hsr_forward.c2
-rw-r--r--net/ipv4/icmp.c3
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/tcp_minisocks.c4
-rw-r--r--net/ipv4/udp.c14
-rw-r--r--net/ipv6/addrconf.c13
-rw-r--r--net/ipv6/ip6mr.c2
-rw-r--r--net/ipv6/route.c6
-rw-r--r--net/netfilter/ipset/ip_set_core.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_proto.c4
-rw-r--r--net/netfilter/nft_inner.c57
-rw-r--r--net/netfilter/nft_set_hash.c16
-rw-r--r--net/netfilter/nft_socket.c2
-rw-r--r--net/netfilter/xt_LED.c4
-rw-r--r--net/sched/cls_flower.c5
-rw-r--r--net/sched/sch_cake.c2
-rw-r--r--net/sched/sch_choke.c2
-rw-r--r--net/sched/sch_tbf.c18
-rw-r--r--net/smc/af_smc.c6
-rw-r--r--net/tipc/udp_media.c2
28 files changed, 222 insertions, 94 deletions
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index 319f47df3330..95f7a7e65a73 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -1505,7 +1505,7 @@ static struct j1939_session *j1939_session_new(struct j1939_priv *priv,
session->state = J1939_SESSION_NEW;
skb_queue_head_init(&session->skb_queue);
- skb_queue_tail(&session->skb_queue, skb);
+ skb_queue_tail(&session->skb_queue, skb_get(skb));
skcb = j1939_skb_to_cb(skb);
memcpy(&session->skcb, skcb, sizeof(session->skcb));
diff --git a/net/core/dev.c b/net/core/dev.c
index 13d00fc10f55..45a8c3dd4a64 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6557,18 +6557,22 @@ static void __napi_hash_add_with_id(struct napi_struct *napi,
static void napi_hash_add_with_id(struct napi_struct *napi,
unsigned int napi_id)
{
- spin_lock(&napi_hash_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&napi_hash_lock, flags);
WARN_ON_ONCE(napi_by_id(napi_id));
__napi_hash_add_with_id(napi, napi_id);
- spin_unlock(&napi_hash_lock);
+ spin_unlock_irqrestore(&napi_hash_lock, flags);
}
static void napi_hash_add(struct napi_struct *napi)
{
+ unsigned long flags;
+
if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state))
return;
- spin_lock(&napi_hash_lock);
+ spin_lock_irqsave(&napi_hash_lock, flags);
/* 0..NR_CPUS range is reserved for sender_cpu use */
do {
@@ -6578,7 +6582,7 @@ static void napi_hash_add(struct napi_struct *napi)
__napi_hash_add_with_id(napi, napi_gen_id);
- spin_unlock(&napi_hash_lock);
+ spin_unlock_irqrestore(&napi_hash_lock, flags);
}
/* Warning : caller is responsible to make sure rcu grace period
@@ -6586,11 +6590,13 @@ static void napi_hash_add(struct napi_struct *napi)
*/
static void napi_hash_del(struct napi_struct *napi)
{
- spin_lock(&napi_hash_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&napi_hash_lock, flags);
hlist_del_init_rcu(&napi->napi_hash_node);
- spin_unlock(&napi_hash_lock);
+ spin_unlock_irqrestore(&napi_hash_lock, flags);
}
static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index ab150641142a..1b4d39e38084 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -45,9 +45,14 @@ static unsigned int default_operstate(const struct net_device *dev)
int iflink = dev_get_iflink(dev);
struct net_device *peer;
- if (iflink == dev->ifindex)
+ /* If called from netdev_run_todo()/linkwatch_sync_dev(),
+ * dev_net(dev) can be already freed, and RTNL is not held.
+ */
+ if (dev->reg_state == NETREG_UNREGISTERED ||
+ iflink == dev->ifindex)
return IF_OPER_DOWN;
+ ASSERT_RTNL();
peer = __dev_get_by_index(dev_net(dev), iflink);
if (!peer)
return IF_OPER_DOWN;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 58df76fe408a..ab5f201bf0ab 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3746,6 +3746,7 @@ static int rtnl_group_changelink(const struct sk_buff *skb,
static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm,
const struct rtnl_link_ops *ops,
struct net *tgt_net, struct net *link_net,
+ struct net *peer_net,
const struct nlmsghdr *nlh,
struct nlattr **tb, struct nlattr **data,
struct netlink_ext_ack *extack)
@@ -3776,8 +3777,13 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm,
dev->ifindex = ifm->ifi_index;
+ if (link_net)
+ net = link_net;
+ if (peer_net)
+ net = peer_net;
+
if (ops->newlink)
- err = ops->newlink(link_net ? : net, dev, tb, data, extack);
+ err = ops->newlink(net, dev, tb, data, extack);
else
err = register_netdevice(dev);
if (err < 0) {
@@ -3812,40 +3818,33 @@ out_unregister:
goto out;
}
-static int rtnl_add_peer_net(struct rtnl_nets *rtnl_nets,
- const struct rtnl_link_ops *ops,
- struct nlattr *data[],
- struct netlink_ext_ack *extack)
+static struct net *rtnl_get_peer_net(const struct rtnl_link_ops *ops,
+ struct nlattr *data[],
+ struct netlink_ext_ack *extack)
{
struct nlattr *tb[IFLA_MAX + 1];
- struct net *net;
int err;
if (!data || !data[ops->peer_type])
- return 0;
+ return NULL;
err = rtnl_nla_parse_ifinfomsg(tb, data[ops->peer_type], extack);
if (err < 0)
- return err;
+ return ERR_PTR(err);
if (ops->validate) {
err = ops->validate(tb, NULL, extack);
if (err < 0)
- return err;
+ return ERR_PTR(err);
}
- net = rtnl_link_get_net_ifla(tb);
- if (IS_ERR(net))
- return PTR_ERR(net);
- if (net)
- rtnl_nets_add(rtnl_nets, net);
-
- return 0;
+ return rtnl_link_get_net_ifla(tb);
}
static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
const struct rtnl_link_ops *ops,
struct net *tgt_net, struct net *link_net,
+ struct net *peer_net,
struct rtnl_newlink_tbs *tbs,
struct nlattr **data,
struct netlink_ext_ack *extack)
@@ -3894,14 +3893,15 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
return -EOPNOTSUPP;
}
- return rtnl_newlink_create(skb, ifm, ops, tgt_net, link_net, nlh, tb, data, extack);
+ return rtnl_newlink_create(skb, ifm, ops, tgt_net, link_net, peer_net, nlh,
+ tb, data, extack);
}
static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ struct net *tgt_net, *link_net = NULL, *peer_net = NULL;
struct nlattr **tb, **linkinfo, **data = NULL;
- struct net *tgt_net, *link_net = NULL;
struct rtnl_link_ops *ops = NULL;
struct rtnl_newlink_tbs *tbs;
struct rtnl_nets rtnl_nets;
@@ -3971,9 +3971,11 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
}
if (ops->peer_type) {
- ret = rtnl_add_peer_net(&rtnl_nets, ops, data, extack);
- if (ret < 0)
+ peer_net = rtnl_get_peer_net(ops, data, extack);
+ if (IS_ERR(peer_net))
goto put_ops;
+ if (peer_net)
+ rtnl_nets_add(&rtnl_nets, peer_net);
}
}
@@ -4004,7 +4006,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
}
rtnl_nets_lock(&rtnl_nets);
- ret = __rtnl_newlink(skb, nlh, ops, tgt_net, link_net, tbs, data, extack);
+ ret = __rtnl_newlink(skb, nlh, ops, tgt_net, link_net, peer_net, tbs, data, extack);
rtnl_nets_unlock(&rtnl_nets);
put_net:
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 54086bb05c42..f7554dcdaaba 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -1166,8 +1166,12 @@ static u8 dccp_feat_change_recv(struct list_head *fn, u8 is_mandatory, u8 opt,
goto not_valid_or_not_known;
}
- return dccp_feat_push_confirm(fn, feat, local, &fval);
+ if (dccp_feat_push_confirm(fn, feat, local, &fval)) {
+ kfree(fval.sp.vec);
+ return DCCP_RESET_CODE_TOO_BUSY;
+ }
+ return 0;
} else if (entry->state == FEAT_UNSTABLE) { /* 6.6.2 */
return 0;
}
diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c
index 0515d6604b3b..f0883357d12e 100644
--- a/net/ethtool/bitset.c
+++ b/net/ethtool/bitset.c
@@ -425,12 +425,32 @@ static int ethnl_parse_bit(unsigned int *index, bool *val, unsigned int nbits,
return 0;
}
+/**
+ * ethnl_bitmap32_equal() - Compare two bitmaps
+ * @map1: first bitmap
+ * @map2: second bitmap
+ * @nbits: bit size to compare
+ *
+ * Return: true if first @nbits are equal, false if not
+ */
+static bool ethnl_bitmap32_equal(const u32 *map1, const u32 *map2,
+ unsigned int nbits)
+{
+ if (memcmp(map1, map2, nbits / 32 * sizeof(u32)))
+ return false;
+ if (nbits % 32 == 0)
+ return true;
+ return !((map1[nbits / 32] ^ map2[nbits / 32]) &
+ ethnl_lower_bits(nbits % 32));
+}
+
static int
ethnl_update_bitset32_verbose(u32 *bitmap, unsigned int nbits,
const struct nlattr *attr, struct nlattr **tb,
ethnl_string_array_t names,
struct netlink_ext_ack *extack, bool *mod)
{
+ u32 *saved_bitmap = NULL;
struct nlattr *bit_attr;
bool no_mask;
int rem;
@@ -448,8 +468,20 @@ ethnl_update_bitset32_verbose(u32 *bitmap, unsigned int nbits,
}
no_mask = tb[ETHTOOL_A_BITSET_NOMASK];
- if (no_mask)
- ethnl_bitmap32_clear(bitmap, 0, nbits, mod);
+ if (no_mask) {
+ unsigned int nwords = DIV_ROUND_UP(nbits, 32);
+ unsigned int nbytes = nwords * sizeof(u32);
+ bool dummy;
+
+ /* The bitmap size is only the size of the map part without
+ * its mask part.
+ */
+ saved_bitmap = kcalloc(nwords, sizeof(u32), GFP_KERNEL);
+ if (!saved_bitmap)
+ return -ENOMEM;
+ memcpy(saved_bitmap, bitmap, nbytes);
+ ethnl_bitmap32_clear(bitmap, 0, nbits, &dummy);
+ }
nla_for_each_nested(bit_attr, tb[ETHTOOL_A_BITSET_BITS], rem) {
bool old_val, new_val;
@@ -458,22 +490,30 @@ ethnl_update_bitset32_verbose(u32 *bitmap, unsigned int nbits,
if (nla_type(bit_attr) != ETHTOOL_A_BITSET_BITS_BIT) {
NL_SET_ERR_MSG_ATTR(extack, bit_attr,
"only ETHTOOL_A_BITSET_BITS_BIT allowed in ETHTOOL_A_BITSET_BITS");
+ kfree(saved_bitmap);
return -EINVAL;
}
ret = ethnl_parse_bit(&idx, &new_val, nbits, bit_attr, no_mask,
names, extack);
- if (ret < 0)
+ if (ret < 0) {
+ kfree(saved_bitmap);
return ret;
+ }
old_val = bitmap[idx / 32] & ((u32)1 << (idx % 32));
if (new_val != old_val) {
if (new_val)
bitmap[idx / 32] |= ((u32)1 << (idx % 32));
else
bitmap[idx / 32] &= ~((u32)1 << (idx % 32));
- *mod = true;
+ if (!no_mask)
+ *mod = true;
}
}
+ if (no_mask && !ethnl_bitmap32_equal(saved_bitmap, bitmap, nbits))
+ *mod = true;
+
+ kfree(saved_bitmap);
return 0;
}
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 61df8ce44379..7bb94875a7ec 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -993,7 +993,8 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
return rc;
/* Nonzero ring with RSS only makes sense if NIC adds them together */
- if (info.flow_type & FLOW_RSS && !ops->cap_rss_rxnfc_adds &&
+ if (cmd == ETHTOOL_SRXCLSRLINS && info.flow_type & FLOW_RSS &&
+ !ops->cap_rss_rxnfc_adds &&
ethtool_get_flow_spec_ring(info.fs.ring_cookie))
return -EINVAL;
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 31a416ee21ad..03eadd6c51fd 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -246,20 +246,22 @@ static const struct header_ops hsr_header_ops = {
.parse = eth_header_parse,
};
-static struct sk_buff *hsr_init_skb(struct hsr_port *master)
+static struct sk_buff *hsr_init_skb(struct hsr_port *master, int extra)
{
struct hsr_priv *hsr = master->hsr;
struct sk_buff *skb;
int hlen, tlen;
+ int len;
hlen = LL_RESERVED_SPACE(master->dev);
tlen = master->dev->needed_tailroom;
+ len = sizeof(struct hsr_sup_tag) + sizeof(struct hsr_sup_payload);
/* skb size is same for PRP/HSR frames, only difference
* being, for PRP it is a trailer and for HSR it is a
- * header
+ * header.
+ * RedBox might use @extra more bytes.
*/
- skb = dev_alloc_skb(sizeof(struct hsr_sup_tag) +
- sizeof(struct hsr_sup_payload) + hlen + tlen);
+ skb = dev_alloc_skb(len + extra + hlen + tlen);
if (!skb)
return skb;
@@ -295,6 +297,7 @@ static void send_hsr_supervision_frame(struct hsr_port *port,
struct hsr_sup_tlv *hsr_stlv;
struct hsr_sup_tag *hsr_stag;
struct sk_buff *skb;
+ int extra = 0;
*interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL);
if (hsr->announce_count < 3 && hsr->prot_version == 0) {
@@ -303,7 +306,11 @@ static void send_hsr_supervision_frame(struct hsr_port *port,
hsr->announce_count++;
}
- skb = hsr_init_skb(port);
+ if (hsr->redbox)
+ extra = sizeof(struct hsr_sup_tlv) +
+ sizeof(struct hsr_sup_payload);
+
+ skb = hsr_init_skb(port, extra);
if (!skb) {
netdev_warn_once(port->dev, "HSR: Could not send supervision frame\n");
return;
@@ -362,7 +369,7 @@ static void send_prp_supervision_frame(struct hsr_port *master,
struct hsr_sup_tag *hsr_stag;
struct sk_buff *skb;
- skb = hsr_init_skb(master);
+ skb = hsr_init_skb(master, 0);
if (!skb) {
netdev_warn_once(master->dev, "PRP: Could not send supervision frame\n");
return;
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index aa6acebc7c1e..87bb3a91598e 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -700,6 +700,8 @@ static int fill_frame_info(struct hsr_frame_info *frame,
frame->is_vlan = true;
if (frame->is_vlan) {
+ if (skb->mac_len < offsetofend(struct hsr_vlan_ethhdr, vlanhdr))
+ return -EINVAL;
vlan_hdr = (struct hsr_vlan_ethhdr *)ethhdr;
proto = vlan_hdr->vlanhdr.h_vlan_encapsulated_proto;
}
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4f088fa1c2f2..963a89ae9c26 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -517,6 +517,9 @@ static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4,
if (!IS_ERR(dst)) {
if (rt != rt2)
return rt;
+ if (inet_addr_type_dev_table(net, route_lookup_dev,
+ fl4->daddr) == RTN_LOCAL)
+ return rt;
} else if (PTR_ERR(dst) == -EPERM) {
rt = NULL;
} else {
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c5b8ec5c0a8c..99d8faa508e5 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -122,7 +122,7 @@ static void ipmr_expire_process(struct timer_list *t);
static bool ipmr_can_free_table(struct net *net)
{
- return !check_net(net) || !net->ipv4.mr_rules_ops;
+ return !check_net(net) || !net_initialized(net);
}
static struct mr_table *ipmr_mr_table_iter(struct net *net,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index bb1fe1ba867a..7121d8573928 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -326,6 +326,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tcptw->tw_last_oow_ack_time = 0;
tcptw->tw_tx_delay = tp->tcp_tx_delay;
tw->tw_txhash = sk->sk_txhash;
+ tw->tw_tx_queue_mapping = sk->sk_tx_queue_mapping;
+#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
+ tw->tw_rx_queue_mapping = sk->sk_rx_queue_mapping;
+#endif
#if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == PF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6a01905d379f..e8953e88efef 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1674,7 +1674,6 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
struct sk_buff_head *list = &sk->sk_receive_queue;
int rmem, err = -ENOMEM;
spinlock_t *busy = NULL;
- bool becomes_readable;
int size, rcvbuf;
/* Immediately drop when the receive queue is full.
@@ -1715,19 +1714,12 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
*/
sock_skb_set_dropcount(sk, skb);
- becomes_readable = skb_queue_empty(list);
__skb_queue_tail(list, skb);
spin_unlock(&list->lock);
- if (!sock_flag(sk, SOCK_DEAD)) {
- if (becomes_readable ||
- sk->sk_data_ready != sock_def_readable ||
- READ_ONCE(sk->sk_peek_off) >= 0)
- INDIRECT_CALL_1(sk->sk_data_ready,
- sock_def_readable, sk);
- else
- sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN);
- }
+ if (!sock_flag(sk, SOCK_DEAD))
+ INDIRECT_CALL_1(sk->sk_data_ready, sock_def_readable, sk);
+
busylock_release(busy);
return 0;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c489a1e6aec9..0e765466d7f7 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4821,7 +4821,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
ifm->ifa_prefixlen, extack);
}
-static int modify_prefix_route(struct inet6_ifaddr *ifp,
+static int modify_prefix_route(struct net *net, struct inet6_ifaddr *ifp,
unsigned long expires, u32 flags,
bool modify_peer)
{
@@ -4845,7 +4845,9 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
ifp->prefix_len,
ifp->rt_priority, ifp->idev->dev,
expires, flags, GFP_KERNEL);
- } else {
+ return 0;
+ }
+ if (f6i != net->ipv6.fib6_null_entry) {
table = f6i->fib6_table;
spin_lock_bh(&table->tb6_lock);
@@ -4858,9 +4860,8 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
}
spin_unlock_bh(&table->tb6_lock);
-
- fib6_info_release(f6i);
}
+ fib6_info_release(f6i);
return 0;
}
@@ -4939,7 +4940,7 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
int rc = -ENOENT;
if (had_prefixroute)
- rc = modify_prefix_route(ifp, expires, flags, false);
+ rc = modify_prefix_route(net, ifp, expires, flags, false);
/* prefix route could have been deleted; if so restore it */
if (rc == -ENOENT) {
@@ -4949,7 +4950,7 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
}
if (had_prefixroute && !ipv6_addr_any(&ifp->peer_addr))
- rc = modify_prefix_route(ifp, expires, flags, true);
+ rc = modify_prefix_route(net, ifp, expires, flags, true);
if (rc == -ENOENT && !ipv6_addr_any(&ifp->peer_addr)) {
addrconf_prefix_route(&ifp->peer_addr, ifp->prefix_len,
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 7f1902ac3586..578ff1336afe 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -110,7 +110,7 @@ static void ipmr_expire_process(struct timer_list *t);
static bool ip6mr_can_free_table(struct net *net)
{
- return !check_net(net) || !net->ipv6.mr6_rules_ops;
+ return !check_net(net) || !net_initialized(net);
}
static struct mr_table *ip6mr_mr_table_iter(struct net *net,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 63d7681c929f..67ff16c04718 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2780,10 +2780,10 @@ static void ip6_negative_advice(struct sock *sk,
if (rt->rt6i_flags & RTF_CACHE) {
rcu_read_lock();
if (rt6_check_expired(rt)) {
- /* counteract the dst_release() in sk_dst_reset() */
- dst_hold(dst);
+ /* rt/dst can not be destroyed yet,
+ * because of rcu_read_lock()
+ */
sk_dst_reset(sk);
-
rt6_remove_exception_rt(rt);
}
rcu_read_unlock();
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 61431690cbd5..cc20e6d56807 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -104,14 +104,19 @@ find_set_type(const char *name, u8 family, u8 revision)
static bool
load_settype(const char *name)
{
+ if (!try_module_get(THIS_MODULE))
+ return false;
+
nfnl_unlock(NFNL_SUBSYS_IPSET);
pr_debug("try to load ip_set_%s\n", name);
if (request_module("ip_set_%s", name) < 0) {
pr_warn("Can't find ip_set type %s\n", name);
nfnl_lock(NFNL_SUBSYS_IPSET);
+ module_put(THIS_MODULE);
return false;
}
nfnl_lock(NFNL_SUBSYS_IPSET);
+ module_put(THIS_MODULE);
return true;
}
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index f100da4ba3bc..a9fd1d3fc2cb 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -340,7 +340,7 @@ void __net_exit ip_vs_protocol_net_cleanup(struct netns_ipvs *ipvs)
int __init ip_vs_protocol_init(void)
{
- char protocols[64];
+ char protocols[64] = { 0 };
#define REGISTER_PROTOCOL(p) \
do { \
register_ip_vs_protocol(p); \
@@ -348,8 +348,6 @@ int __init ip_vs_protocol_init(void)
strcat(protocols, (p)->name); \
} while (0)
- protocols[0] = '\0';
- protocols[2] = '\0';
#ifdef CONFIG_IP_VS_PROTO_TCP
REGISTER_PROTOCOL(&ip_vs_protocol_tcp);
#endif
diff --git a/net/netfilter/nft_inner.c b/net/netfilter/nft_inner.c
index 928312d01eb1..817ab978d24a 100644
--- a/net/netfilter/nft_inner.c
+++ b/net/netfilter/nft_inner.c
@@ -210,35 +210,66 @@ static int nft_inner_parse(const struct nft_inner *priv,
struct nft_pktinfo *pkt,
struct nft_inner_tun_ctx *tun_ctx)
{
- struct nft_inner_tun_ctx ctx = {};
u32 off = pkt->inneroff;
if (priv->flags & NFT_INNER_HDRSIZE &&
- nft_inner_parse_tunhdr(priv, pkt, &ctx, &off) < 0)
+ nft_inner_parse_tunhdr(priv, pkt, tun_ctx, &off) < 0)
return -1;
if (priv->flags & (NFT_INNER_LL | NFT_INNER_NH)) {
- if (nft_inner_parse_l2l3(priv, pkt, &ctx, off) < 0)
+ if (nft_inner_parse_l2l3(priv, pkt, tun_ctx, off) < 0)
return -1;
} else if (priv->flags & NFT_INNER_TH) {
- ctx.inner_thoff = off;
- ctx.flags |= NFT_PAYLOAD_CTX_INNER_TH;
+ tun_ctx->inner_thoff = off;
+ tun_ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH;
}
- *tun_ctx = ctx;
tun_ctx->type = priv->type;
+ tun_ctx->cookie = (unsigned long)pkt->skb;
pkt->flags |= NFT_PKTINFO_INNER_FULL;
return 0;
}
+static bool nft_inner_restore_tun_ctx(const struct nft_pktinfo *pkt,
+ struct nft_inner_tun_ctx *tun_ctx)
+{
+ struct nft_inner_tun_ctx *this_cpu_tun_ctx;
+
+ local_bh_disable();
+ this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx);
+ if (this_cpu_tun_ctx->cookie != (unsigned long)pkt->skb) {
+ local_bh_enable();
+ return false;
+ }
+ *tun_ctx = *this_cpu_tun_ctx;
+ local_bh_enable();
+
+ return true;
+}
+
+static void nft_inner_save_tun_ctx(const struct nft_pktinfo *pkt,
+ const struct nft_inner_tun_ctx *tun_ctx)
+{
+ struct nft_inner_tun_ctx *this_cpu_tun_ctx;
+
+ local_bh_disable();
+ this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx);
+ if (this_cpu_tun_ctx->cookie != tun_ctx->cookie)
+ *this_cpu_tun_ctx = *tun_ctx;
+ local_bh_enable();
+}
+
static bool nft_inner_parse_needed(const struct nft_inner *priv,
const struct nft_pktinfo *pkt,
- const struct nft_inner_tun_ctx *tun_ctx)
+ struct nft_inner_tun_ctx *tun_ctx)
{
if (!(pkt->flags & NFT_PKTINFO_INNER_FULL))
return true;
+ if (!nft_inner_restore_tun_ctx(pkt, tun_ctx))
+ return true;
+
if (priv->type != tun_ctx->type)
return true;
@@ -248,27 +279,29 @@ static bool nft_inner_parse_needed(const struct nft_inner *priv,
static void nft_inner_eval(const struct nft_expr *expr, struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
- struct nft_inner_tun_ctx *tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx);
const struct nft_inner *priv = nft_expr_priv(expr);
+ struct nft_inner_tun_ctx tun_ctx = {};
if (nft_payload_inner_offset(pkt) < 0)
goto err;
- if (nft_inner_parse_needed(priv, pkt, tun_ctx) &&
- nft_inner_parse(priv, (struct nft_pktinfo *)pkt, tun_ctx) < 0)
+ if (nft_inner_parse_needed(priv, pkt, &tun_ctx) &&
+ nft_inner_parse(priv, (struct nft_pktinfo *)pkt, &tun_ctx) < 0)
goto err;
switch (priv->expr_type) {
case NFT_INNER_EXPR_PAYLOAD:
- nft_payload_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, tun_ctx);
+ nft_payload_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, &tun_ctx);
break;
case NFT_INNER_EXPR_META:
- nft_meta_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, tun_ctx);
+ nft_meta_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, &tun_ctx);
break;
default:
WARN_ON_ONCE(1);
goto err;
}
+ nft_inner_save_tun_ctx(pkt, &tun_ctx);
+
return;
err:
regs->verdict.code = NFT_BREAK;
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 65bd291318f2..8bfac4185ac7 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -24,11 +24,13 @@
struct nft_rhash {
struct rhashtable ht;
struct delayed_work gc_work;
+ u32 wq_gc_seq;
};
struct nft_rhash_elem {
struct nft_elem_priv priv;
struct rhash_head node;
+ u32 wq_gc_seq;
struct nft_set_ext ext;
};
@@ -338,6 +340,10 @@ static void nft_rhash_gc(struct work_struct *work)
if (!gc)
goto done;
+ /* Elements never collected use a zero gc worker sequence number. */
+ if (unlikely(++priv->wq_gc_seq == 0))
+ priv->wq_gc_seq++;
+
rhashtable_walk_enter(&priv->ht, &hti);
rhashtable_walk_start(&hti);
@@ -355,6 +361,14 @@ static void nft_rhash_gc(struct work_struct *work)
goto try_later;
}
+ /* rhashtable walk is unstable, already seen in this gc run?
+ * Then, skip this element. In case of (unlikely) sequence
+ * wraparound and stale element wq_gc_seq, next gc run will
+ * just find this expired element.
+ */
+ if (he->wq_gc_seq == priv->wq_gc_seq)
+ continue;
+
if (nft_set_elem_is_dead(&he->ext))
goto dead_elem;
@@ -371,6 +385,8 @@ dead_elem:
if (!gc)
goto try_later;
+ /* annotate gc sequence for this attempt. */
+ he->wq_gc_seq = priv->wq_gc_seq;
nft_trans_gc_elem_add(gc, he);
}
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index f5da0c1775f2..35d0409b0095 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -68,7 +68,7 @@ static noinline int nft_socket_cgroup_subtree_level(void)
cgroup_put(cgrp);
- if (WARN_ON_ONCE(level > 255))
+ if (level > 255)
return -ERANGE;
if (WARN_ON_ONCE(level < 0))
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index f7b0286d106a..8a80fd76fe45 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -96,7 +96,9 @@ static int led_tg_check(const struct xt_tgchk_param *par)
struct xt_led_info_internal *ledinternal;
int err;
- if (ledinfo->id[0] == '\0')
+ /* Bail out if empty string or not a string at all. */
+ if (ledinfo->id[0] == '\0' ||
+ !memchr(ledinfo->id, '\0', sizeof(ledinfo->id)))
return -EINVAL;
mutex_lock(&xt_led_mutex);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index e280c27cb9f9..1008ec8a464c 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1369,7 +1369,6 @@ static int fl_set_erspan_opt(const struct nlattr *nla, struct fl_flow_key *key,
int err;
md = (struct erspan_metadata *)&key->enc_opts.data[key->enc_opts.len];
- memset(md, 0xff, sizeof(*md));
md->version = 1;
if (!depth)
@@ -1398,9 +1397,9 @@ static int fl_set_erspan_opt(const struct nlattr *nla, struct fl_flow_key *key,
NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option index");
return -EINVAL;
}
+ memset(&md->u.index, 0xff, sizeof(md->u.index));
if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX]) {
nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX];
- memset(&md->u, 0x00, sizeof(md->u));
md->u.index = nla_get_be32(nla);
}
} else if (md->version == 2) {
@@ -1409,10 +1408,12 @@ static int fl_set_erspan_opt(const struct nlattr *nla, struct fl_flow_key *key,
NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option dir or hwid");
return -EINVAL;
}
+ md->u.md2.dir = 1;
if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR]) {
nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR];
md->u.md2.dir = nla_get_u8(nla);
}
+ set_hwid(&md->u.md2, 0xff);
if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID]) {
nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID];
set_hwid(&md->u.md2, nla_get_u8(nla));
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index f2f9b75008bb..8d8b2db4653c 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -1525,7 +1525,6 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
b->backlogs[idx] -= len;
b->tin_backlog -= len;
sch->qstats.backlog -= len;
- qdisc_tree_reduce_backlog(sch, 1, len);
flow->dropped++;
b->tin_dropped++;
@@ -1536,6 +1535,7 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
__qdisc_drop(skb, to_free);
sch->q.qlen--;
+ qdisc_tree_reduce_backlog(sch, 1, len);
cake_heapify(q, 0);
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 1e940ad0d2fa..59e7bdf5063e 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -123,10 +123,10 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx,
if (idx == q->tail)
choke_zap_tail_holes(q);
+ --sch->q.qlen;
qdisc_qstats_backlog_dec(sch, skb);
qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
qdisc_drop(skb, sch, to_free);
- --sch->q.qlen;
}
struct choke_skb_cb {
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index f1d09183ae63..dc26b22d53c7 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -208,7 +208,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
struct tbf_sched_data *q = qdisc_priv(sch);
struct sk_buff *segs, *nskb;
netdev_features_t features = netif_skb_features(skb);
- unsigned int len = 0, prev_len = qdisc_pkt_len(skb);
+ unsigned int len = 0, prev_len = qdisc_pkt_len(skb), seg_len;
int ret, nb;
segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
@@ -219,21 +219,27 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
nb = 0;
skb_list_walk_safe(segs, segs, nskb) {
skb_mark_not_on_list(segs);
- qdisc_skb_cb(segs)->pkt_len = segs->len;
- len += segs->len;
+ seg_len = segs->len;
+ qdisc_skb_cb(segs)->pkt_len = seg_len;
ret = qdisc_enqueue(segs, q->qdisc, to_free);
if (ret != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(ret))
qdisc_qstats_drop(sch);
} else {
nb++;
+ len += seg_len;
}
}
sch->q.qlen += nb;
- if (nb > 1)
+ sch->qstats.backlog += len;
+ if (nb > 0) {
qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
- consume_skb(skb);
- return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
+ consume_skb(skb);
+ return NET_XMIT_SUCCESS;
+ }
+
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
}
static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 9d76e902fd77..9e6c69d18581 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -383,6 +383,7 @@ void smc_sk_init(struct net *net, struct sock *sk, int protocol)
smc->limit_smc_hs = net->smc.limit_smc_hs;
smc->use_fallback = false; /* assume rdma capability first */
smc->fallback_rsn = 0;
+ smc_close_init(smc);
}
static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
@@ -1299,7 +1300,6 @@ static int smc_connect_rdma(struct smc_sock *smc,
goto connect_abort;
}
- smc_close_init(smc);
smc_rx_init(smc);
if (ini->first_contact_local) {
@@ -1435,7 +1435,6 @@ static int smc_connect_ism(struct smc_sock *smc,
goto connect_abort;
}
}
- smc_close_init(smc);
smc_rx_init(smc);
smc_tx_init(smc);
@@ -1901,6 +1900,7 @@ static void smc_listen_out(struct smc_sock *new_smc)
if (tcp_sk(new_smc->clcsock->sk)->syn_smc)
atomic_dec(&lsmc->queued_smc_hs);
+ release_sock(newsmcsk); /* lock in smc_listen_work() */
if (lsmc->sk.sk_state == SMC_LISTEN) {
lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
smc_accept_enqueue(&lsmc->sk, newsmcsk);
@@ -2422,6 +2422,7 @@ static void smc_listen_work(struct work_struct *work)
u8 accept_version;
int rc = 0;
+ lock_sock(&new_smc->sk); /* release in smc_listen_out() */
if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN)
return smc_listen_out_err(new_smc);
@@ -2479,7 +2480,6 @@ static void smc_listen_work(struct work_struct *work)
goto out_decl;
mutex_lock(&smc_server_lgr_pending);
- smc_close_init(new_smc);
smc_rx_init(new_smc);
smc_tx_init(new_smc);
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 439f75539977..b7e25e7e9933 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -814,10 +814,10 @@ static void cleanup_bearer(struct work_struct *work)
kfree_rcu(rcast, rcu);
}
- atomic_dec(&tipc_net(sock_net(ub->ubsock->sk))->wq_count);
dst_cache_destroy(&ub->rcast.dst_cache);
udp_tunnel_sock_release(ub->ubsock);
synchronize_net();
+ atomic_dec(&tipc_net(sock_net(ub->ubsock->sk))->wq_count);
kfree(ub);
}