summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2024-02-26 11:46:13 +0000
committerDavid S. Miller <davem@davemloft.net>2024-02-26 11:46:13 +0000
commit5fc3903c46a743781cd35fdecfdd889c522e2c3b (patch)
tree41ed04a18e31b487f65bd4c80a0d7f502317a572
parent5f6000aa24b939a8853dbc76642ba3cd12765bd8 (diff)
parent0ec4e48c3a233820e0bce1f5ba9ed3e4520f90e9 (diff)
Merge branch 'rtnetlink-reduce-rtnl-pressure'
Eric Dumazet says: ==================== rtnetlink: reduce RTNL pressure for dumps This series restarts the conversion of rtnl dump operations to RCU protection, instead of requiring RTNL. In this new attempt (prior one failed in 2011), I chose to allow a gradual conversion of selected operations. After this series, "ip -6 addr" and "ip -4 ro" no longer need to acquire RTNL. I refrained from changing inet_dump_ifaddr() and inet6_dump_addr() to avoid merge conflicts because of two fixes in net tree. I also started the work for "ip link" future conversion. v2: rtnl_fill_link_ifmap() always emit IFLA_MAP (Jiri Pirko) Added "nexthop: allow nexthop_mpath_fill_node() to be called without RTNL" to avoid a lockdep splat (Ido Schimmel) ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c4
-rw-r--r--drivers/net/can/vxcan.c2
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c2
-rw-r--r--drivers/net/ipvlan/ipvlan_main.c2
-rw-r--r--drivers/net/macsec.c2
-rw-r--r--drivers/net/macvlan.c2
-rw-r--r--drivers/net/netkit.c2
-rw-r--r--drivers/net/veth.c2
-rw-r--r--drivers/net/wireless/virtual/virt_wifi.c2
-rw-r--r--include/linux/netdevice.h6
-rw-r--r--include/linux/netlink.h2
-rw-r--r--include/net/ip_fib.h1
-rw-r--r--include/net/nexthop.h2
-rw-r--r--include/net/rtnetlink.h1
-rw-r--r--net/8021q/vlan_dev.c4
-rw-r--r--net/core/dev.c6
-rw-r--r--net/core/rtnetlink.c36
-rw-r--r--net/dsa/user.c2
-rw-r--r--net/ieee802154/6lowpan/core.c2
-rw-r--r--net/ipv4/fib_frontend.c50
-rw-r--r--net/ipv4/fib_trie.c4
-rw-r--r--net/ipv4/ipmr.c4
-rw-r--r--net/ipv6/addrconf.c222
-rw-r--r--net/ipv6/ip6_fib.c7
-rw-r--r--net/ipv6/ip6_tunnel.c2
-rw-r--r--net/ipv6/ip6mr.c4
-rw-r--r--net/ipv6/ndisc.c2
-rw-r--r--net/mpls/af_mpls.c4
-rw-r--r--net/netlink/af_netlink.c46
-rw-r--r--net/netlink/af_netlink.h5
-rw-r--r--net/netlink/diag.c2
-rw-r--r--net/xfrm/xfrm_interface_core.c2
32 files changed, 238 insertions, 198 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 7a5be705d718..6f2a688fccbf 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1272,10 +1272,10 @@ static int ipoib_get_iflink(const struct net_device *dev)
/* parent interface */
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
- return dev->ifindex;
+ return READ_ONCE(dev->ifindex);
/* child/vlan interface */
- return priv->parent->ifindex;
+ return READ_ONCE(priv->parent->ifindex);
}
static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr)
diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c
index 98c669ad5141..f7fabba707ea 100644
--- a/drivers/net/can/vxcan.c
+++ b/drivers/net/can/vxcan.c
@@ -119,7 +119,7 @@ static int vxcan_get_iflink(const struct net_device *dev)
rcu_read_lock();
peer = rcu_dereference(priv->peer);
- iflink = peer ? peer->ifindex : 0;
+ iflink = peer ? READ_ONCE(peer->ifindex) : 0;
rcu_read_unlock();
return iflink;
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
index 046b5f7d8e7c..9d2a9562c96f 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
@@ -98,7 +98,7 @@ static int rmnet_vnd_get_iflink(const struct net_device *dev)
{
struct rmnet_priv *priv = netdev_priv(dev);
- return priv->real_dev->ifindex;
+ return READ_ONCE(priv->real_dev->ifindex);
}
static int rmnet_vnd_init(struct net_device *dev)
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index df7c43a109e1..5920f7e63352 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -349,7 +349,7 @@ static int ipvlan_get_iflink(const struct net_device *dev)
{
struct ipvl_dev *ipvlan = netdev_priv(dev);
- return ipvlan->phy_dev->ifindex;
+ return READ_ONCE(ipvlan->phy_dev->ifindex);
}
static const struct net_device_ops ipvlan_netdev_ops = {
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 7f5426285c61..4b5513c9c2be 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -3753,7 +3753,7 @@ static void macsec_get_stats64(struct net_device *dev,
static int macsec_get_iflink(const struct net_device *dev)
{
- return macsec_priv(dev)->real_dev->ifindex;
+ return READ_ONCE(macsec_priv(dev)->real_dev->ifindex);
}
static const struct net_device_ops macsec_netdev_ops = {
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index a3cc665757e8..0cec2783a3e7 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1158,7 +1158,7 @@ static int macvlan_dev_get_iflink(const struct net_device *dev)
{
struct macvlan_dev *vlan = netdev_priv(dev);
- return vlan->lowerdev->ifindex;
+ return READ_ONCE(vlan->lowerdev->ifindex);
}
static const struct ethtool_ops macvlan_ethtool_ops = {
diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c
index 39171380ccf2..a4d2e76a8d58 100644
--- a/drivers/net/netkit.c
+++ b/drivers/net/netkit.c
@@ -145,7 +145,7 @@ static int netkit_get_iflink(const struct net_device *dev)
rcu_read_lock();
peer = rcu_dereference(nk->peer);
if (peer)
- iflink = peer->ifindex;
+ iflink = READ_ONCE(peer->ifindex);
rcu_read_unlock();
return iflink;
}
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 500b9dfccd08..dd5aa8ab65a8 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -1461,7 +1461,7 @@ static int veth_get_iflink(const struct net_device *dev)
rcu_read_lock();
peer = rcu_dereference(priv->peer);
- iflink = peer ? peer->ifindex : 0;
+ iflink = peer ? READ_ONCE(peer->ifindex) : 0;
rcu_read_unlock();
return iflink;
diff --git a/drivers/net/wireless/virtual/virt_wifi.c b/drivers/net/wireless/virtual/virt_wifi.c
index ba14d83353a4..6a84ec58d618 100644
--- a/drivers/net/wireless/virtual/virt_wifi.c
+++ b/drivers/net/wireless/virtual/virt_wifi.c
@@ -453,7 +453,7 @@ static int virt_wifi_net_device_get_iflink(const struct net_device *dev)
{
struct virt_wifi_netdev_priv *priv = netdev_priv(dev);
- return priv->lowerdev->ifindex;
+ return READ_ONCE(priv->lowerdev->ifindex);
}
static const struct net_device_ops virt_wifi_ops = {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f07c8374f29c..09023e44db4e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4354,8 +4354,10 @@ static inline bool netif_testing(const struct net_device *dev)
*/
static inline bool netif_oper_up(const struct net_device *dev)
{
- return (dev->operstate == IF_OPER_UP ||
- dev->operstate == IF_OPER_UNKNOWN /* backward compat */);
+ unsigned int operstate = READ_ONCE(dev->operstate);
+
+ return operstate == IF_OPER_UP ||
+ operstate == IF_OPER_UNKNOWN /* backward compat */;
}
/**
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 1a4445bf2ab9..5df7340d4dab 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -291,6 +291,7 @@ struct netlink_callback {
u16 answer_flags;
u32 min_dump_alloc;
unsigned int prev_seq, seq;
+ int flags;
bool strict_check;
union {
u8 ctx[48];
@@ -323,6 +324,7 @@ struct netlink_dump_control {
void *data;
struct module *module;
u32 min_dump_alloc;
+ int flags;
};
int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index d4667b7797e3..9b2f69ba5e49 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -264,6 +264,7 @@ struct fib_dump_filter {
bool filter_set;
bool dump_routes;
bool dump_exceptions;
+ bool rtnl_held;
unsigned char protocol;
unsigned char rt_type;
unsigned int flags;
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index 6647ad509faa..77e99cba60ad 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -317,7 +317,7 @@ static inline
int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh,
u8 rt_family)
{
- struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
+ struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
int i;
for (i = 0; i < nhg->num_nh; i++) {
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 6506221c5fe3..3bfb80bad173 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -12,6 +12,7 @@ typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *);
enum rtnl_link_flags {
RTNL_FLAG_DOIT_UNLOCKED = BIT(0),
RTNL_FLAG_BULK_DEL_SUPPORTED = BIT(1),
+ RTNL_FLAG_DUMP_UNLOCKED = BIT(2),
};
enum rtnl_kinds {
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index df5552518251..39876eff51d2 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -762,9 +762,9 @@ static void vlan_dev_netpoll_cleanup(struct net_device *dev)
static int vlan_dev_get_iflink(const struct net_device *dev)
{
- struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
+ const struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
- return real_dev->ifindex;
+ return READ_ONCE(real_dev->ifindex);
}
static int vlan_dev_fill_forward_path(struct net_device_path_ctx *ctx,
diff --git a/net/core/dev.c b/net/core/dev.c
index c588808be77f..275fd5259a4a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -641,7 +641,7 @@ int dev_get_iflink(const struct net_device *dev)
if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink)
return dev->netdev_ops->ndo_get_iflink(dev);
- return dev->ifindex;
+ return READ_ONCE(dev->ifindex);
}
EXPORT_SYMBOL(dev_get_iflink);
@@ -8632,12 +8632,12 @@ unsigned int dev_get_flags(const struct net_device *dev)
{
unsigned int flags;
- flags = (dev->flags & ~(IFF_PROMISC |
+ flags = (READ_ONCE(dev->flags) & ~(IFF_PROMISC |
IFF_ALLMULTI |
IFF_RUNNING |
IFF_LOWER_UP |
IFF_DORMANT)) |
- (dev->gflags & (IFF_PROMISC |
+ (READ_ONCE(dev->gflags) & (IFF_PROMISC |
IFF_ALLMULTI));
if (netif_running(dev)) {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index c54dbe05c4c5..39f17d0b6cea 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1455,17 +1455,18 @@ static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb,
return 0;
}
-static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
+static int rtnl_fill_link_ifmap(struct sk_buff *skb,
+ const struct net_device *dev)
{
struct rtnl_link_ifmap map;
memset(&map, 0, sizeof(map));
- map.mem_start = dev->mem_start;
- map.mem_end = dev->mem_end;
- map.base_addr = dev->base_addr;
- map.irq = dev->irq;
- map.dma = dev->dma;
- map.port = dev->if_port;
+ map.mem_start = READ_ONCE(dev->mem_start);
+ map.mem_end = READ_ONCE(dev->mem_end);
+ map.base_addr = READ_ONCE(dev->base_addr);
+ map.irq = READ_ONCE(dev->irq);
+ map.dma = READ_ONCE(dev->dma);
+ map.port = READ_ONCE(dev->if_port);
if (nla_put_64bit(skb, IFLA_MAP, sizeof(map), &map, IFLA_PAD))
return -EMSGSIZE;
@@ -1611,10 +1612,10 @@ static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev)
static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev,
bool force)
{
- int ifindex = dev_get_iflink(dev);
+ int iflink = dev_get_iflink(dev);
- if (force || dev->ifindex != ifindex)
- return nla_put_u32(skb, IFLA_LINK, ifindex);
+ if (force || READ_ONCE(dev->ifindex) != iflink)
+ return nla_put_u32(skb, IFLA_LINK, iflink);
return 0;
}
@@ -1698,7 +1699,7 @@ static int rtnl_fill_alt_ifnames(struct sk_buff *skb,
struct netdev_name_node *name_node;
int count = 0;
- list_for_each_entry(name_node, &dev->name_node->list, list) {
+ list_for_each_entry_rcu(name_node, &dev->name_node->list, list) {
if (nla_put_string(skb, IFLA_ALT_IFNAME, name_node->name))
return -EMSGSIZE;
count++;
@@ -1706,6 +1707,7 @@ static int rtnl_fill_alt_ifnames(struct sk_buff *skb,
return count;
}
+/* RCU protected. */
static int rtnl_fill_prop_list(struct sk_buff *skb,
const struct net_device *dev)
{
@@ -1875,9 +1877,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
goto nla_put_failure;
}
- if (rtnl_fill_link_ifmap(skb, dev))
- goto nla_put_failure;
-
if (dev->addr_len) {
if (nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr) ||
nla_put(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast))
@@ -1927,10 +1926,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
rcu_read_lock();
if (rtnl_fill_link_af(skb, dev, ext_filter_mask))
goto nla_put_failure_rcu;
- rcu_read_unlock();
-
+ if (rtnl_fill_link_ifmap(skb, dev))
+ goto nla_put_failure_rcu;
if (rtnl_fill_prop_list(skb, dev))
- goto nla_put_failure;
+ goto nla_put_failure_rcu;
+ rcu_read_unlock();
if (dev->dev.parent &&
nla_put_string(skb, IFLA_PARENT_DEV_NAME,
@@ -6532,6 +6532,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
}
owner = link->owner;
dumpit = link->dumpit;
+ flags = link->flags;
if (type == RTM_GETLINK - RTM_BASE)
min_dump_alloc = rtnl_calcit(skb, nlh);
@@ -6549,6 +6550,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
.dump = dumpit,
.min_dump_alloc = min_dump_alloc,
.module = owner,
+ .flags = flags,
};
err = netlink_dump_start(rtnl, skb, nlh, &c);
/* netlink_dump_start() will keep a reference on
diff --git a/net/dsa/user.c b/net/dsa/user.c
index 4d53c76a9840..9c42a6edcdc8 100644
--- a/net/dsa/user.c
+++ b/net/dsa/user.c
@@ -352,7 +352,7 @@ void dsa_user_mii_bus_init(struct dsa_switch *ds)
/* user device handling ****************************************************/
static int dsa_user_get_iflink(const struct net_device *dev)
{
- return dsa_user_to_conduit(dev)->ifindex;
+ return READ_ONCE(dsa_user_to_conduit(dev)->ifindex);
}
static int dsa_user_open(struct net_device *dev)
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index e643f52663f9..77b4e92027c5 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -93,7 +93,7 @@ static int lowpan_neigh_construct(struct net_device *dev, struct neighbour *n)
static int lowpan_get_iflink(const struct net_device *dev)
{
- return lowpan_802154_dev(dev)->wdev->ifindex;
+ return READ_ONCE(lowpan_802154_dev(dev)->wdev->ifindex);
}
static const struct net_device_ops lowpan_netdev_ops = {
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 390f4be7f7be..bf3a2214fe29 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -916,7 +916,8 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
struct rtmsg *rtm;
int err, i;
- ASSERT_RTNL();
+ if (filter->rtnl_held)
+ ASSERT_RTNL();
if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
NL_SET_ERR_MSG(extack, "Invalid header for FIB dump request");
@@ -961,7 +962,10 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
break;
case RTA_OIF:
ifindex = nla_get_u32(tb[i]);
- filter->dev = __dev_get_by_index(net, ifindex);
+ if (filter->rtnl_held)
+ filter->dev = __dev_get_by_index(net, ifindex);
+ else
+ filter->dev = dev_get_by_index_rcu(net, ifindex);
if (!filter->dev)
return -ENODEV;
break;
@@ -983,20 +987,24 @@ EXPORT_SYMBOL_GPL(ip_valid_fib_dump_req);
static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct fib_dump_filter filter = { .dump_routes = true,
- .dump_exceptions = true };
+ struct fib_dump_filter filter = {
+ .dump_routes = true,
+ .dump_exceptions = true,
+ .rtnl_held = false,
+ };
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
unsigned int h, s_h;
unsigned int e = 0, s_e;
struct fib_table *tb;
struct hlist_head *head;
- int dumped = 0, err;
+ int dumped = 0, err = 0;
+ rcu_read_lock();
if (cb->strict_check) {
err = ip_valid_fib_dump_req(net, nlh, &filter, cb);
if (err < 0)
- return err;
+ goto unlock;
} else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
struct rtmsg *rtm = nlmsg_data(nlh);
@@ -1005,29 +1013,28 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
/* ipv4 does not use prefix flag */
if (filter.flags & RTM_F_PREFIX)
- return skb->len;
+ goto unlock;
if (filter.table_id) {
tb = fib_get_table(net, filter.table_id);
if (!tb) {
if (rtnl_msg_family(cb->nlh) != PF_INET)
- return skb->len;
+ goto unlock;
NL_SET_ERR_MSG(cb->extack, "ipv4: FIB table does not exist");
- return -ENOENT;
+ err = -ENOENT;
+ goto unlock;
}
-
- rcu_read_lock();
err = fib_table_dump(tb, skb, cb, &filter);
- rcu_read_unlock();
- return skb->len ? : err;
+ if (err < 0 && skb->len)
+ err = skb->len;
+ goto unlock;
}
s_h = cb->args[0];
s_e = cb->args[1];
- rcu_read_lock();
-
+ err = 0;
for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
e = 0;
head = &net->ipv4.fib_table_hash[h];
@@ -1040,9 +1047,8 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
err = fib_table_dump(tb, skb, cb, &filter);
if (err < 0) {
if (likely(skb->len))
- goto out;
-
- goto out_err;
+ err = skb->len;
+ goto out;
}
dumped = 1;
next:
@@ -1050,13 +1056,12 @@ next:
}
}
out:
- err = skb->len;
-out_err:
- rcu_read_unlock();
cb->args[1] = e;
cb->args[0] = h;
+unlock:
+ rcu_read_unlock();
return err;
}
@@ -1659,5 +1664,6 @@ void __init ip_fib_init(void)
rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, 0);
rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, 0);
- rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, 0);
+ rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib,
+ RTNL_FLAG_DUMP_UNLOCKED);
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 0fc7ab5832d1..f474106464d2 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2368,7 +2368,7 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
* and key == 0 means the dump has wrapped around and we are done.
*/
if (count && !key)
- return skb->len;
+ return 0;
while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
int err;
@@ -2394,7 +2394,7 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
cb->args[3] = key;
cb->args[2] = count;
- return skb->len;
+ return 0;
}
void __init fib_trie_init(void)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 5561bce3a37e..0708ac6f6c58 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2587,7 +2587,9 @@ errout_free:
static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct fib_dump_filter filter = {};
+ struct fib_dump_filter filter = {
+ .rtnl_held = true,
+ };
int err;
if (cb->strict_check) {
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c669ea266ab7..a280614b3765 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3492,7 +3492,8 @@ static void addrconf_dev_config(struct net_device *dev)
/* this device type has no EUI support */
if (dev->type == ARPHRD_NONE &&
idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)
- idev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_RANDOM;
+ WRITE_ONCE(idev->cnf.addr_gen_mode,
+ IN6_ADDR_GEN_MODE_RANDOM);
addrconf_addr_gen(idev, false);
}
@@ -3764,7 +3765,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
rt6_mtu_change(dev, dev->mtu);
idev->cnf.mtu6 = dev->mtu;
}
- idev->tstamp = jiffies;
+ WRITE_ONCE(idev->tstamp, jiffies);
inet6_ifinfo_notify(RTM_NEWLINK, idev);
/*
@@ -4006,7 +4007,7 @@ restart:
ipv6_mc_down(idev);
}
- idev->tstamp = jiffies;
+ WRITE_ONCE(idev->tstamp, jiffies);
idev->ra_mtu = 0;
/* Last: Shot the device (if unregistered) */
@@ -5634,87 +5635,97 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err);
}
-static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
- __s32 *array, int bytes)
+static void ipv6_store_devconf(const struct ipv6_devconf *cnf,
+ __s32 *array, int bytes)
{
BUG_ON(bytes < (DEVCONF_MAX * 4));
memset(array, 0, bytes);
- array[DEVCONF_FORWARDING] = cnf->forwarding;
- array[DEVCONF_HOPLIMIT] = cnf->hop_limit;
- array[DEVCONF_MTU6] = cnf->mtu6;
- array[DEVCONF_ACCEPT_RA] = cnf->accept_ra;
- array[DEVCONF_ACCEPT_REDIRECTS] = cnf->accept_redirects;
- array[DEVCONF_AUTOCONF] = cnf->autoconf;
- array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits;
- array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits;
+ array[DEVCONF_FORWARDING] = READ_ONCE(cnf->forwarding);
+ array[DEVCONF_HOPLIMIT] = READ_ONCE(cnf->hop_limit);
+ array[DEVCONF_MTU6] = READ_ONCE(cnf->mtu6);
+ array[DEVCONF_ACCEPT_RA] = READ_ONCE(cnf->accept_ra);
+ array[DEVCONF_ACCEPT_REDIRECTS] = READ_ONCE(cnf->accept_redirects);
+ array[DEVCONF_AUTOCONF] = READ_ONCE(cnf->autoconf);
+ array[DEVCONF_DAD_TRANSMITS] = READ_ONCE(cnf->dad_transmits);
+ array[DEVCONF_RTR_SOLICITS] = READ_ONCE(cnf->rtr_solicits);
array[DEVCONF_RTR_SOLICIT_INTERVAL] =
- jiffies_to_msecs(cnf->rtr_solicit_interval);
+ jiffies_to_msecs(READ_ONCE(cnf->rtr_solicit_interval));
array[DEVCONF_RTR_SOLICIT_MAX_INTERVAL] =
- jiffies_to_msecs(cnf->rtr_solicit_max_interval);
+ jiffies_to_msecs(READ_ONCE(cnf->rtr_solicit_max_interval));
array[DEVCONF_RTR_SOLICIT_DELAY] =
- jiffies_to_msecs(cnf->rtr_solicit_delay);
- array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version;
+ jiffies_to_msecs(READ_ONCE(cnf->rtr_solicit_delay));
+ array[DEVCONF_FORCE_MLD_VERSION] = READ_ONCE(cnf->force_mld_version);
array[DEVCONF_MLDV1_UNSOLICITED_REPORT_INTERVAL] =
- jiffies_to_msecs(cnf->mldv1_unsolicited_report_interval);
+ jiffies_to_msecs(READ_ONCE(cnf->mldv1_unsolicited_report_interval));
array[DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL] =
- jiffies_to_msecs(cnf->mldv2_unsolicited_report_interval);
- array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;
- array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft;
- array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft;
- array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry;
- array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor;
- array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses;
- array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr;
- array[DEVCONF_RA_DEFRTR_METRIC] = cnf->ra_defrtr_metric;
- array[DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT] = cnf->accept_ra_min_hop_limit;
- array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo;
+ jiffies_to_msecs(READ_ONCE(cnf->mldv2_unsolicited_report_interval));
+ array[DEVCONF_USE_TEMPADDR] = READ_ONCE(cnf->use_tempaddr);
+ array[DEVCONF_TEMP_VALID_LFT] = READ_ONCE(cnf->temp_valid_lft);
+ array[DEVCONF_TEMP_PREFERED_LFT] = READ_ONCE(cnf->temp_prefered_lft);
+ array[DEVCONF_REGEN_MAX_RETRY] = READ_ONCE(cnf->regen_max_retry);
+ array[DEVCONF_MAX_DESYNC_FACTOR] = READ_ONCE(cnf->max_desync_factor);
+ array[DEVCONF_MAX_ADDRESSES] = READ_ONCE(cnf->max_addresses);
+ array[DEVCONF_ACCEPT_RA_DEFRTR] = READ_ONCE(cnf->accept_ra_defrtr);
+ array[DEVCONF_RA_DEFRTR_METRIC] = READ_ONCE(cnf->ra_defrtr_metric);
+ array[DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT] =
+ READ_ONCE(cnf->accept_ra_min_hop_limit);
+ array[DEVCONF_ACCEPT_RA_PINFO] = READ_ONCE(cnf->accept_ra_pinfo);
#ifdef CONFIG_IPV6_ROUTER_PREF
- array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref;
+ array[DEVCONF_ACCEPT_RA_RTR_PREF] = READ_ONCE(cnf->accept_ra_rtr_pref);
array[DEVCONF_RTR_PROBE_INTERVAL] =
- jiffies_to_msecs(cnf->rtr_probe_interval);
+ jiffies_to_msecs(READ_ONCE(cnf->rtr_probe_interval));
#ifdef CONFIG_IPV6_ROUTE_INFO
- array[DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN] = cnf->accept_ra_rt_info_min_plen;
- array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
+ array[DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN] =
+ READ_ONCE(cnf->accept_ra_rt_info_min_plen);
+ array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] =
+ READ_ONCE(cnf->accept_ra_rt_info_max_plen);
#endif
#endif
- array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp;
- array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route;
+ array[DEVCONF_PROXY_NDP] = READ_ONCE(cnf->proxy_ndp);
+ array[DEVCONF_ACCEPT_SOURCE_ROUTE] =
+ READ_ONCE(cnf->accept_source_route);
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
- array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad;
- array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic;
+ array[DEVCONF_OPTIMISTIC_DAD] = READ_ONCE(cnf->optimistic_dad);
+ array[DEVCONF_USE_OPTIMISTIC] = READ_ONCE(cnf->use_optimistic);
#endif
#ifdef CONFIG_IPV6_MROUTE
array[DEVCONF_MC_FORWARDING] = atomic_read(&cnf->mc_forwarding);
#endif
- array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6;
- array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
- array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
- array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify;
- array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc;
- array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local;
- array[DEVCONF_ACCEPT_RA_MTU] = cnf->accept_ra_mtu;
- array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] = cnf->ignore_routes_with_linkdown;
+ array[DEVCONF_DISABLE_IPV6] = READ_ONCE(cnf->disable_ipv6);
+ array[DEVCONF_ACCEPT_DAD] = READ_ONCE(cnf->accept_dad);
+ array[DEVCONF_FORCE_TLLAO] = READ_ONCE(cnf->force_tllao);
+ array[DEVCONF_NDISC_NOTIFY] = READ_ONCE(cnf->ndisc_notify);
+ array[DEVCONF_SUPPRESS_FRAG_NDISC] =
+ READ_ONCE(cnf->suppress_frag_ndisc);
+ array[DEVCONF_ACCEPT_RA_FROM_LOCAL] =
+ READ_ONCE(cnf->accept_ra_from_local);
+ array[DEVCONF_ACCEPT_RA_MTU] = READ_ONCE(cnf->accept_ra_mtu);
+ array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] =
+ READ_ONCE(cnf->ignore_routes_with_linkdown);
/* we omit DEVCONF_STABLE_SECRET for now */
- array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only;
- array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast;
- array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na;
- array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down;
- array[DEVCONF_SEG6_ENABLED] = cnf->seg6_enabled;
+ array[DEVCONF_USE_OIF_ADDRS_ONLY] = READ_ONCE(cnf->use_oif_addrs_only);
+ array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] =
+ READ_ONCE(cnf->drop_unicast_in_l2_multicast);
+ array[DEVCONF_DROP_UNSOLICITED_NA] = READ_ONCE(cnf->drop_unsolicited_na);
+ array[DEVCONF_KEEP_ADDR_ON_DOWN] = READ_ONCE(cnf->keep_addr_on_down);
+ array[DEVCONF_SEG6_ENABLED] = READ_ONCE(cnf->seg6_enabled);
#ifdef CONFIG_IPV6_SEG6_HMAC
- array[DEVCONF_SEG6_REQUIRE_HMAC] = cnf->seg6_require_hmac;
+ array[DEVCONF_SEG6_REQUIRE_HMAC] = READ_ONCE(cnf->seg6_require_hmac);
#endif
- array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad;
- array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode;
- array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy;
- array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass;
- array[DEVCONF_RPL_SEG_ENABLED] = cnf->rpl_seg_enabled;
- array[DEVCONF_IOAM6_ENABLED] = cnf->ioam6_enabled;
- array[DEVCONF_IOAM6_ID] = cnf->ioam6_id;
- array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide;
- array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier;
- array[DEVCONF_ACCEPT_UNTRACKED_NA] = cnf->accept_untracked_na;
- array[DEVCONF_ACCEPT_RA_MIN_LFT] = cnf->accept_ra_min_lft;
+ array[DEVCONF_ENHANCED_DAD] = READ_ONCE(cnf->enhanced_dad);
+ array[DEVCONF_ADDR_GEN_MODE] = READ_ONCE(cnf->addr_gen_mode);
+ array[DEVCONF_DISABLE_POLICY] = READ_ONCE(cnf->disable_policy);
+ array[DEVCONF_NDISC_TCLASS] = READ_ONCE(cnf->ndisc_tclass);
+ array[DEVCONF_RPL_SEG_ENABLED] = READ_ONCE(cnf->rpl_seg_enabled);
+ array[DEVCONF_IOAM6_ENABLED] = READ_ONCE(cnf->ioam6_enabled);
+ array[DEVCONF_IOAM6_ID] = READ_ONCE(cnf->ioam6_id);
+ array[DEVCONF_IOAM6_ID_WIDE] = READ_ONCE(cnf->ioam6_id_wide);
+ array[DEVCONF_NDISC_EVICT_NOCARRIER] =
+ READ_ONCE(cnf->ndisc_evict_nocarrier);
+ array[DEVCONF_ACCEPT_UNTRACKED_NA] =
+ READ_ONCE(cnf->accept_untracked_na);
+ array[DEVCONF_ACCEPT_RA_MIN_LFT] = READ_ONCE(cnf->accept_ra_min_lft);
}
static inline size_t inet6_ifla6_size(void)
@@ -5794,13 +5805,14 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
u32 ext_filter_mask)
{
- struct nlattr *nla;
struct ifla_cacheinfo ci;
+ struct nlattr *nla;
+ u32 ra_mtu;
- if (nla_put_u32(skb, IFLA_INET6_FLAGS, idev->if_flags))
+ if (nla_put_u32(skb, IFLA_INET6_FLAGS, READ_ONCE(idev->if_flags)))
goto nla_put_failure;
ci.max_reasm_len = IPV6_MAXPLEN;
- ci.tstamp = cstamp_delta(idev->tstamp);
+ ci.tstamp = cstamp_delta(READ_ONCE(idev->tstamp));
ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time);
ci.retrans_time = jiffies_to_msecs(NEIGH_VAR(idev->nd_parms, RETRANS_TIME));
if (nla_put(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci))
@@ -5832,11 +5844,12 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla));
read_unlock_bh(&idev->lock);
- if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode))
+ if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE,
+ READ_ONCE(idev->cnf.addr_gen_mode)))
goto nla_put_failure;
- if (idev->ra_mtu &&
- nla_put_u32(skb, IFLA_INET6_RA_MTU, idev->ra_mtu))
+ ra_mtu = READ_ONCE(idev->ra_mtu);
+ if (ra_mtu && nla_put_u32(skb, IFLA_INET6_RA_MTU, ra_mtu))
goto nla_put_failure;
return 0;
@@ -6037,7 +6050,7 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla,
if (tb[IFLA_INET6_ADDR_GEN_MODE]) {
u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]);
- idev->cnf.addr_gen_mode = mode;
+ WRITE_ONCE(idev->cnf.addr_gen_mode, mode);
}
return 0;
@@ -6049,6 +6062,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
struct net_device *dev = idev->dev;
struct ifinfomsg *hdr;
struct nlmsghdr *nlh;
+ int ifindex, iflink;
void *protoinfo;
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
@@ -6059,16 +6073,18 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
hdr->ifi_family = AF_INET6;
hdr->__ifi_pad = 0;
hdr->ifi_type = dev->type;
- hdr->ifi_index = dev->ifindex;
+ ifindex = READ_ONCE(dev->ifindex);
+ hdr->ifi_index = ifindex;
hdr->ifi_flags = dev_get_flags(dev);
hdr->ifi_change = 0;
+ iflink = dev_get_iflink(dev);
if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
(dev->addr_len &&
nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
- nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
- (dev->ifindex != dev_get_iflink(dev) &&
- nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) ||
+ nla_put_u32(skb, IFLA_MTU, READ_ONCE(dev->mtu)) ||
+ (ifindex != iflink &&
+ nla_put_u32(skb, IFLA_LINK, iflink)) ||
nla_put_u8(skb, IFLA_OPERSTATE,
netif_running(dev) ? READ_ONCE(dev->operstate) : IF_OPER_DOWN))
goto nla_put_failure;
@@ -6116,50 +6132,42 @@ static int inet6_valid_dump_ifinfo(const struct nlmsghdr *nlh,
static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
- int h, s_h;
- int idx = 0, s_idx;
+ struct {
+ unsigned long ifindex;
+ } *ctx = (void *)cb->ctx;
struct net_device *dev;
struct inet6_dev *idev;
- struct hlist_head *head;
+ int err;
/* only requests using strict checking can pass data to
* influence the dump
*/
if (cb->strict_check) {
- int err = inet6_valid_dump_ifinfo(cb->nlh, cb->extack);
+ err = inet6_valid_dump_ifinfo(cb->nlh, cb->extack);
if (err < 0)
return err;
}
- s_h = cb->args[0];
- s_idx = cb->args[1];
-
+ err = 0;
rcu_read_lock();
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- idx = 0;
- head = &net->dev_index_head[h];
- hlist_for_each_entry_rcu(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- idev = __in6_dev_get(dev);
- if (!idev)
- goto cont;
- if (inet6_fill_ifinfo(skb, idev,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWLINK, NLM_F_MULTI) < 0)
- goto out;
-cont:
- idx++;
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
+ idev = __in6_dev_get(dev);
+ if (!idev)
+ continue;
+ err = inet6_fill_ifinfo(skb, idev,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWLINK, NLM_F_MULTI);
+ if (err < 0) {
+ if (likely(skb->len))
+ err = skb->len;
+ break;
}
}
-out:
rcu_read_unlock();
- cb->args[1] = idx;
- cb->args[0] = h;
- return skb->len;
+ return err;
}
void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
@@ -6516,7 +6524,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
}
if (idev->cnf.addr_gen_mode != new_val) {
- idev->cnf.addr_gen_mode = new_val;
+ WRITE_ONCE(idev->cnf.addr_gen_mode, new_val);
addrconf_init_auto_addrs(idev->dev);
}
} else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) {
@@ -6527,7 +6535,8 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
idev = __in6_dev_get(dev);
if (idev &&
idev->cnf.addr_gen_mode != new_val) {
- idev->cnf.addr_gen_mode = new_val;
+ WRITE_ONCE(idev->cnf.addr_gen_mode,
+ new_val);
addrconf_init_auto_addrs(idev->dev);
}
}
@@ -6592,14 +6601,15 @@ static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write,
struct inet6_dev *idev = __in6_dev_get(dev);
if (idev) {
- idev->cnf.addr_gen_mode =
- IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
+ WRITE_ONCE(idev->cnf.addr_gen_mode,
+ IN6_ADDR_GEN_MODE_STABLE_PRIVACY);
}
}
} else {
struct inet6_dev *idev = ctl->extra1;
- idev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
+ WRITE_ONCE(idev->cnf.addr_gen_mode,
+ IN6_ADDR_GEN_MODE_STABLE_PRIVACY);
}
out:
@@ -7452,7 +7462,7 @@ int __init addrconf_init(void)
rtnl_af_register(&inet6_ops);
err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETLINK,
- NULL, inet6_dump_ifinfo, 0);
+ NULL, inet6_dump_ifinfo, RTNL_FLAG_DUMP_UNLOCKED);
if (err < 0)
goto errout;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 6540d877d369..5c558dc1c683 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -620,8 +620,11 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct rt6_rtnl_dump_arg arg = { .filter.dump_exceptions = true,
- .filter.dump_routes = true };
+ struct rt6_rtnl_dump_arg arg = {
+ .filter.dump_exceptions = true,
+ .filter.dump_routes = true,
+ .filter.rtnl_held = true,
+ };
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
unsigned int h, s_h;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 44406c28445d..5fd07581efaf 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1756,7 +1756,7 @@ int ip6_tnl_get_iflink(const struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- return t->parms.link;
+ return READ_ONCE(t->parms.link);
}
EXPORT_SYMBOL(ip6_tnl_get_iflink);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 1f19743f2540..cb0ee81a068a 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -2592,7 +2592,9 @@ static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
const struct nlmsghdr *nlh = cb->nlh;
- struct fib_dump_filter filter = {};
+ struct fib_dump_filter filter = {
+ .rtnl_held = true,
+ };
int err;
if (cb->strict_check) {
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 73cb31afe935..8523f0595b01 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1975,7 +1975,7 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void *buffer,
if (ctl->data == &NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME))
idev->nd_parms->reachable_time =
neigh_rand_reach_time(NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME));
- idev->tstamp = jiffies;
+ WRITE_ONCE(idev->tstamp, jiffies);
inet6_ifinfo_notify(RTM_NEWLINK, idev);
in6_dev_put(idev);
}
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 1af29af65388..6dab883a08dd 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -2179,7 +2179,9 @@ static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb)
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
struct mpls_route __rcu **platform_label;
- struct fib_dump_filter filter = {};
+ struct fib_dump_filter filter = {
+ .rtnl_held = true,
+ };
unsigned int flags = NLM_F_MULTI;
size_t platform_labels;
unsigned int index;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 9c962347cf85..be5792b638aa 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -130,7 +130,7 @@ static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = {
"nlk_cb_mutex-MAX_LINKS"
};
-static int netlink_dump(struct sock *sk);
+static int netlink_dump(struct sock *sk, bool lock_taken);
/* nl_table locking explained:
* Lookup and traversal are protected with an RCU read-side lock. Insertion
@@ -636,7 +636,7 @@ static struct proto netlink_proto = {
};
static int __netlink_create(struct net *net, struct socket *sock,
- struct mutex *cb_mutex, int protocol,
+ struct mutex *dump_cb_mutex, int protocol,
int kern)
{
struct sock *sk;
@@ -651,15 +651,11 @@ static int __netlink_create(struct net *net, struct socket *sock,
sock_init_data(sock, sk);
nlk = nlk_sk(sk);
- if (cb_mutex) {
- nlk->cb_mutex = cb_mutex;
- } else {
- nlk->cb_mutex = &nlk->cb_def_mutex;
- mutex_init(nlk->cb_mutex);
- lockdep_set_class_and_name(nlk->cb_mutex,
+ mutex_init(&nlk->nl_cb_mutex);
+ lockdep_set_class_and_name(&nlk->nl_cb_mutex,
nlk_cb_mutex_keys + protocol,
nlk_cb_mutex_key_strings[protocol]);
- }
+ nlk->dump_cb_mutex = dump_cb_mutex;
init_waitqueue_head(&nlk->wait);
sk->sk_destruct = netlink_sock_destruct;
@@ -1987,7 +1983,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (READ_ONCE(nlk->cb_running) &&
atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
- ret = netlink_dump(sk);
+ ret = netlink_dump(sk, false);
if (ret) {
WRITE_ONCE(sk->sk_err, -ret);
sk_error_report(sk);
@@ -2196,7 +2192,7 @@ static int netlink_dump_done(struct netlink_sock *nlk, struct sk_buff *skb,
return 0;
}
-static int netlink_dump(struct sock *sk)
+static int netlink_dump(struct sock *sk, bool lock_taken)
{
struct netlink_sock *nlk = nlk_sk(sk);
struct netlink_ext_ack extack = {};
@@ -2208,7 +2204,8 @@ static int netlink_dump(struct sock *sk)
int alloc_min_size;
int alloc_size;
- mutex_lock(nlk->cb_mutex);
+ if (!lock_taken)
+ mutex_lock(&nlk->nl_cb_mutex);
if (!nlk->cb_running) {
err = -EINVAL;
goto errout_skb;
@@ -2260,14 +2257,24 @@ static int netlink_dump(struct sock *sk)
netlink_skb_set_owner_r(skb, sk);
if (nlk->dump_done_errno > 0) {
+ struct mutex *extra_mutex = nlk->dump_cb_mutex;
+
cb->extack = &extack;
+
+ if (cb->flags & RTNL_FLAG_DUMP_UNLOCKED)
+ extra_mutex = NULL;
+ if (extra_mutex)
+ mutex_lock(extra_mutex);
nlk->dump_done_errno = cb->dump(skb, cb);
+ if (extra_mutex)
+ mutex_unlock(extra_mutex);
+
cb->extack = NULL;
}
if (nlk->dump_done_errno > 0 ||
skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) {
- mutex_unlock(nlk->cb_mutex);
+ mutex_unlock(&nlk->nl_cb_mutex);
if (sk_filter(sk, skb))
kfree_skb(skb);
@@ -2301,13 +2308,13 @@ static int netlink_dump(struct sock *sk)
WRITE_ONCE(nlk->cb_running, false);
module = cb->module;
skb = cb->skb;
- mutex_unlock(nlk->cb_mutex);
+ mutex_unlock(&nlk->nl_cb_mutex);
module_put(module);
consume_skb(skb);
return 0;
errout_skb:
- mutex_unlock(nlk->cb_mutex);
+ mutex_unlock(&nlk->nl_cb_mutex);
kfree_skb(skb);
return err;
}
@@ -2330,7 +2337,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
}
nlk = nlk_sk(sk);
- mutex_lock(nlk->cb_mutex);
+ mutex_lock(&nlk->nl_cb_mutex);
/* A dump is in progress... */
if (nlk->cb_running) {
ret = -EBUSY;
@@ -2350,6 +2357,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
cb->data = control->data;
cb->module = control->module;
cb->min_dump_alloc = control->min_dump_alloc;
+ cb->flags = control->flags;
cb->skb = skb;
cb->strict_check = nlk_test_bit(STRICT_CHK, NETLINK_CB(skb).sk);
@@ -2365,9 +2373,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
WRITE_ONCE(nlk->cb_running, true);
nlk->dump_done_errno = INT_MAX;
- mutex_unlock(nlk->cb_mutex);
-
- ret = netlink_dump(sk);
+ ret = netlink_dump(sk, true);
sock_put(sk);
@@ -2383,7 +2389,7 @@ error_put:
module_put(control->module);
error_unlock:
sock_put(sk);
- mutex_unlock(nlk->cb_mutex);
+ mutex_unlock(&nlk->nl_cb_mutex);
error_free:
kfree_skb(skb);
return ret;
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 2145979b9986..9751e29d4bbb 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -39,8 +39,9 @@ struct netlink_sock {
bool cb_running;
int dump_done_errno;
struct netlink_callback cb;
- struct mutex *cb_mutex;
- struct mutex cb_def_mutex;
+ struct mutex nl_cb_mutex;
+
+ struct mutex *dump_cb_mutex;
void (*netlink_rcv)(struct sk_buff *skb);
int (*netlink_bind)(struct net *net, int group);
void (*netlink_unbind)(struct net *net, int group);
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index e12c90d5f6ad..61981e01fd6f 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -207,7 +207,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
err = __netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num);
}
- return err < 0 ? err : skb->len;
+ return err <= 0 ? err : skb->len;
}
static int netlink_diag_dump_done(struct netlink_callback *cb)
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index dafefef3cf51..717855b9acf1 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -727,7 +727,7 @@ static int xfrmi_get_iflink(const struct net_device *dev)
{
struct xfrm_if *xi = netdev_priv(dev);
- return xi->p.link;
+ return READ_ONCE(xi->p.link);
}
static const struct net_device_ops xfrmi_netdev_ops = {