From 91b0383fef06f20b847fa9e4f0e3054ead0b1a1b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 24 Feb 2022 18:01:54 +0200 Subject: net: dcb: flush lingering app table entries for unregistered devices If I'm not mistaken (and I don't think I am), the way in which the dcbnl_ops work is that drivers call dcb_ieee_setapp() and this populates the application table with dynamically allocated struct dcb_app_type entries that are kept in the module-global dcb_app_list. However, nobody keeps exact track of these entries, and although dcb_ieee_delapp() is supposed to remove them, nobody does so when the interface goes away (example: driver unbinds from device). So the dcb_app_list will contain lingering entries with an ifindex that no longer matches any device in dcb_app_lookup(). Reclaim the lost memory by listening for the NETDEV_UNREGISTER event and flushing the app table entries of interfaces that are now gone. In fact something like this used to be done as part of the initial commit (blamed below), but it was done in dcbnl_exit() -> dcb_flushapp(), essentially at module_exit time. That became dead code after commit 7a6b6f515f77 ("DCB: fix kconfig option") which essentially merged "tristate config DCB" and "bool config DCBNL" into a single "bool config DCB", so net/dcb/dcbnl.c could not be built as a module anymore. Commit 36b9ad8084bd ("net/dcb: make dcbnl.c explicitly non-modular") recognized this and deleted dcbnl_exit() and dcb_flushapp() altogether, leaving us with the version we have today. Since flushing application table entries can and should be done as soon as the netdevice disappears, fundamentally the commit that is to blame is the one that introduced the design of this API. Fixes: 9ab933ab2cc8 ("dcbnl: add appliction tlv handlers") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/dcb/dcbnl.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'net/dcb/dcbnl.c') diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index b441ab330fd3..36c91273daac 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -2073,8 +2073,52 @@ u8 dcb_ieee_getapp_default_prio_mask(const struct net_device *dev) } EXPORT_SYMBOL(dcb_ieee_getapp_default_prio_mask); +static void dcbnl_flush_dev(struct net_device *dev) +{ + struct dcb_app_type *itr, *tmp; + + spin_lock(&dcb_lock); + + list_for_each_entry_safe(itr, tmp, &dcb_app_list, list) { + if (itr->ifindex == dev->ifindex) { + list_del(&itr->list); + kfree(itr); + } + } + + spin_unlock(&dcb_lock); +} + +static int dcbnl_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + switch (event) { + case NETDEV_UNREGISTER: + if (!dev->dcbnl_ops) + return NOTIFY_DONE; + + dcbnl_flush_dev(dev); + + return NOTIFY_OK; + default: + return NOTIFY_DONE; + } +} + +static struct notifier_block dcbnl_nb __read_mostly = { + .notifier_call = dcbnl_netdevice_event, +}; + static int __init dcbnl_init(void) { + int err; + + err = register_netdevice_notifier(&dcbnl_nb); + if (err) + return err; + rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, 0); rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, 0); -- cgit v1.2.3-70-g09d2 From 10b6bb62ae1a49ee818fc479cf57b8900176773e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 2 Mar 2022 21:39:39 +0200 Subject: net: dcb: disable softirqs in dcbnl_flush_dev() Ido Schimmel points out that since commit 52cff74eef5d ("dcbnl : Disable software interrupts before taking dcb_lock"), the DCB API can be called by drivers from softirq context. One such in-tree example is the chelsio cxgb4 driver: dcb_rpl -> cxgb4_dcb_handle_fw_update -> dcb_ieee_setapp If the firmware for this driver happened to send an event which resulted in a call to dcb_ieee_setapp() at the exact same time as another DCB-enabled interface was unregistering on the same CPU, the softirq would deadlock, because the interrupted process was already holding the dcb_lock in dcbnl_flush_dev(). Fix this unlikely event by using spin_lock_bh() in dcbnl_flush_dev() as in the rest of the dcbnl code. Fixes: 91b0383fef06 ("net: dcb: flush lingering app table entries for unregistered devices") Reported-by: Ido Schimmel Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20220302193939.1368823-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/dcb/dcbnl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/dcb/dcbnl.c') diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 36c91273daac..dc4fb699b56c 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -2077,7 +2077,7 @@ static void dcbnl_flush_dev(struct net_device *dev) { struct dcb_app_type *itr, *tmp; - spin_lock(&dcb_lock); + spin_lock_bh(&dcb_lock); list_for_each_entry_safe(itr, tmp, &dcb_app_list, list) { if (itr->ifindex == dev->ifindex) { @@ -2086,7 +2086,7 @@ static void dcbnl_flush_dev(struct net_device *dev) } } - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); } static int dcbnl_netdevice_event(struct notifier_block *nb, -- cgit v1.2.3-70-g09d2