From 046339eaab26804f52f6604877f5674f70815b26 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 22 Apr 2016 00:33:03 +0300 Subject: net/mlx5e: Device's mtu field is u16 and not int For set/query MTU port firmware commands the MTU field is 16 bits, here I changed all the "int mtu" parameters of the functions wrapping those firmware commands to be u16. Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/port.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index a1d145abd4eb..b30250ab7604 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -54,9 +54,9 @@ int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status *status); -int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port); -void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, u8 port); -void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, +int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port); +void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu, u8 port); +void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, u16 *oper_mtu, u8 port); int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev, -- cgit v1.2.3-70-g09d2 From cd255efff9baadd654d6160e52d17ae7c568c9d3 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 22 Apr 2016 00:33:05 +0300 Subject: net/mlx5e: Use vport MTU rather than physical port MTU Set and report vport MTU rather than physical MTU, Driver will set both vport and physical port mtu and will rely on the query of vport mtu. SRIOV VFs have to report their MTU to their vport manager (PF), and this will allow them to work with any MTU they need without failing the request. Also for some cases where the PF is not a port owner, PF can work with MTU less than the physical port mtu if set physical port mtu didn't take effect. Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 44 ++++++++++++++++++----- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 40 +++++++++++++++++++++ include/linux/mlx5/vport.h | 2 ++ 3 files changed, 77 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 93e4ef43fa9f..85773f8573e5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1404,24 +1404,50 @@ static int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5e_priv *priv) return 0; } -static int mlx5e_set_dev_port_mtu(struct net_device *netdev) +static int mlx5e_set_mtu(struct mlx5e_priv *priv, u16 mtu) { - struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - u16 hw_mtu; + u16 hw_mtu = MLX5E_SW2HW_MTU(mtu); int err; - err = mlx5_set_port_mtu(mdev, MLX5E_SW2HW_MTU(netdev->mtu), 1); + err = mlx5_set_port_mtu(mdev, hw_mtu, 1); if (err) return err; - mlx5_query_port_oper_mtu(mdev, &hw_mtu, 1); + /* Update vport context MTU */ + mlx5_modify_nic_vport_mtu(mdev, hw_mtu); + return 0; +} + +static void mlx5e_query_mtu(struct mlx5e_priv *priv, u16 *mtu) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u16 hw_mtu = 0; + int err; + + err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu); + if (err || !hw_mtu) /* fallback to port oper mtu */ + mlx5_query_port_oper_mtu(mdev, &hw_mtu, 1); + + *mtu = MLX5E_HW2SW_MTU(hw_mtu); +} + +static int mlx5e_set_dev_port_mtu(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + u16 mtu; + int err; + + err = mlx5e_set_mtu(priv, netdev->mtu); + if (err) + return err; - if (MLX5E_HW2SW_MTU(hw_mtu) != netdev->mtu) - netdev_warn(netdev, "%s: Port MTU %d is different than netdev mtu %d\n", - __func__, MLX5E_HW2SW_MTU(hw_mtu), netdev->mtu); + mlx5e_query_mtu(priv, &mtu); + if (mtu != netdev->mtu) + netdev_warn(netdev, "%s: VPort MTU %d is different than netdev mtu %d\n", + __func__, mtu, netdev->mtu); - netdev->mtu = MLX5E_HW2SW_MTU(hw_mtu); + netdev->mtu = mtu; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index bd518405859e..b69dadcfb897 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -196,6 +196,46 @@ int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev, } EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_address); +int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu) +{ + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + u32 *out; + int err; + + out = mlx5_vzalloc(outlen); + if (!out) + return -ENOMEM; + + err = mlx5_query_nic_vport_context(mdev, 0, out, outlen); + if (!err) + *mtu = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.mtu); + + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mtu); + +int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *in; + int err; + + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1); + MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu, mtu); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mtu); + int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, u32 vport, enum mlx5_list_type list_type, diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index bd93e6323603..301da4a5e6bf 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -45,6 +45,8 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u16 vport, u8 *addr); int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev, u16 vport, u8 *addr); +int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu); +int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu); int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, u64 *system_image_guid); int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid); -- cgit v1.2.3-70-g09d2 From 5fc7197d3a256d9c5de3134870304b24892a4908 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Fri, 22 Apr 2016 00:33:07 +0300 Subject: net/mlx5: Add pci shutdown callback This patch introduces kexec support for mlx5. When switching kernels, kexec() calls shutdown, which unloads the driver and cleans its resources. In addition, remove unregister netdev from shutdown flow. This will allow a clean shutdown, even if some netdev clients did not release their reference from this netdev. Releasing The HW resources only is enough as the kernel is shutting down Signed-off-by: Majd Dibbiny Signed-off-by: Tariq Toukan Signed-off-by: Haggai Abramovsky Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 15 +++++++++++++-- drivers/net/ethernet/mellanox/mlx5/core/main.c | 23 +++++++++++++++++++---- include/linux/mlx5/driver.h | 7 ++++--- 3 files changed, 36 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 85773f8573e5..67d548b70e14 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2633,7 +2633,16 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv) schedule_work(&priv->set_rx_mode_work); mlx5e_disable_async_events(priv); flush_scheduled_work(); - unregister_netdev(netdev); + if (test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) { + netif_device_detach(netdev); + mutex_lock(&priv->state_lock); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_close_locked(netdev); + mutex_unlock(&priv->state_lock); + } else { + unregister_netdev(netdev); + } + mlx5e_tc_cleanup(priv); mlx5e_vxlan_cleanup(priv); mlx5e_destroy_flow_tables(priv); @@ -2646,7 +2655,9 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv) mlx5_core_dealloc_transport_domain(priv->mdev, priv->tdn); mlx5_core_dealloc_pd(priv->mdev, priv->pdn); mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar); - free_netdev(netdev); + + if (!test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) + free_netdev(netdev); } static void *mlx5e_get_netdev(void *vpriv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index ddd352ae3dbd..6892746fd10d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -966,7 +966,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) int err; mutex_lock(&dev->intf_state_mutex); - if (dev->interface_state == MLX5_INTERFACE_STATE_UP) { + if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { dev_warn(&dev->pdev->dev, "%s: interface is up, NOP\n", __func__); goto out; @@ -1133,7 +1133,8 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) if (err) pr_info("failed request module on %s\n", MLX5_IB_MOD); - dev->interface_state = MLX5_INTERFACE_STATE_UP; + clear_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state); + set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); out: mutex_unlock(&dev->intf_state_mutex); @@ -1207,7 +1208,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) } mutex_lock(&dev->intf_state_mutex); - if (dev->interface_state == MLX5_INTERFACE_STATE_DOWN) { + if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) { dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", __func__); goto out; @@ -1241,7 +1242,8 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_cmd_cleanup(dev); out: - dev->interface_state = MLX5_INTERFACE_STATE_DOWN; + clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); + set_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state); mutex_unlock(&dev->intf_state_mutex); return err; } @@ -1452,6 +1454,18 @@ static const struct pci_error_handlers mlx5_err_handler = { .resume = mlx5_pci_resume }; +static void shutdown(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_priv *priv = &dev->priv; + + dev_info(&pdev->dev, "Shutdown was called\n"); + /* Notify mlx5 clients that the kernel is being shut down */ + set_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &dev->intf_state); + mlx5_unload_one(dev, priv); + mlx5_pci_disable_device(dev); +} + static const struct pci_device_id mlx5_core_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x1011) }, /* Connect-IB */ { PCI_VDEVICE(MELLANOX, 0x1012), MLX5_PCI_DEV_IS_VF}, /* Connect-IB VF */ @@ -1471,6 +1485,7 @@ static struct pci_driver mlx5_core_driver = { .id_table = mlx5_core_pci_table, .probe = init_one, .remove = remove_one, + .shutdown = shutdown, .err_handler = &mlx5_err_handler, .sriov_configure = mlx5_core_sriov_configure, }; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index dcd5ac8d3b14..369c837d40f5 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -519,8 +519,9 @@ enum mlx5_device_state { }; enum mlx5_interface_state { - MLX5_INTERFACE_STATE_DOWN, - MLX5_INTERFACE_STATE_UP, + MLX5_INTERFACE_STATE_DOWN = BIT(0), + MLX5_INTERFACE_STATE_UP = BIT(1), + MLX5_INTERFACE_STATE_SHUTDOWN = BIT(2), }; enum mlx5_pci_status { @@ -544,7 +545,7 @@ struct mlx5_core_dev { enum mlx5_device_state state; /* sync interface state */ struct mutex intf_state_mutex; - enum mlx5_interface_state interface_state; + unsigned long intf_state; void (*event) (struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); -- cgit v1.2.3-70-g09d2 From 841645b5f2dfceac69b78fcd0c9050868d41ea61 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 25 Apr 2016 15:33:55 -0400 Subject: ipv6: Revert optional address flusing on ifdown. This reverts the following three commits: 70af921db6f8835f4b11c65731116560adb00c14 799977d9aafbf0ca0b9c39b04cbfb16db71302c9 f1705ec197e705b79ea40fe7a2cc5acfa1d3bfac The feature was ill conceived, has terrible semantics, and has added nothing but regressions to the already fragile ipv6 stack. Fixes: f1705ec197e7 ("net: ipv6: Make address flushing on ifdown optional") Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 9 -- include/linux/ipv6.h | 1 - include/uapi/linux/ipv6.h | 1 - net/ipv6/addrconf.c | 162 +++------------------------------ 4 files changed, 12 insertions(+), 161 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index b183e2b606c8..e0ac25210f7c 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1568,15 +1568,6 @@ temp_prefered_lft - INTEGER Preferred lifetime (in seconds) for temporary addresses. Default: 86400 (1 day) -keep_addr_on_down - INTEGER - Keep all IPv6 addresses on an interface down event. If set static - global addresses with no expiration time are not flushed. - >0 : enabled - 0 : system default - <0 : disabled - - Default: 0 (addresses are removed) - max_desync_factor - INTEGER Maximum value for DESYNC_FACTOR, which is a random value that ensures that clients don't synchronize with each diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 7edc14fb66b6..4b2267e1b7c3 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -62,7 +62,6 @@ struct ipv6_devconf { struct in6_addr secret; } stable_secret; __s32 use_oif_addrs_only; - __s32 keep_addr_on_down; void *sysctl; }; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 395876060f50..ec117b65d5a5 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -176,7 +176,6 @@ enum { DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN, DEVCONF_DROP_UNICAST_IN_L2_MULTICAST, DEVCONF_DROP_UNSOLICITED_NA, - DEVCONF_KEEP_ADDR_ON_DOWN, DEVCONF_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 23cec53b568a..d77ba395d593 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -216,7 +216,6 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { }, .use_oif_addrs_only = 0, .ignore_routes_with_linkdown = 0, - .keep_addr_on_down = 0, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -261,7 +260,6 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { }, .use_oif_addrs_only = 0, .ignore_routes_with_linkdown = 0, - .keep_addr_on_down = 0, }; /* Check if a valid qdisc is available */ @@ -3176,81 +3174,6 @@ static void addrconf_gre_config(struct net_device *dev) } #endif -#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) -/* If the host route is cached on the addr struct make sure it is associated - * with the proper table. e.g., enslavement can change and if so the cached - * host route needs to move to the new table. - */ -static void l3mdev_check_host_rt(struct inet6_dev *idev, - struct inet6_ifaddr *ifp) -{ - if (ifp->rt) { - u32 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL; - - if (tb_id != ifp->rt->rt6i_table->tb6_id) { - ip6_del_rt(ifp->rt); - ifp->rt = NULL; - } - } -} -#else -static void l3mdev_check_host_rt(struct inet6_dev *idev, - struct inet6_ifaddr *ifp) -{ -} -#endif - -static int fixup_permanent_addr(struct inet6_dev *idev, - struct inet6_ifaddr *ifp) -{ - l3mdev_check_host_rt(idev, ifp); - - if (!ifp->rt) { - struct rt6_info *rt; - - rt = addrconf_dst_alloc(idev, &ifp->addr, false); - if (unlikely(IS_ERR(rt))) - return PTR_ERR(rt); - - ifp->rt = rt; - } - - if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) { - addrconf_prefix_route(&ifp->addr, ifp->prefix_len, - idev->dev, 0, 0); - } - - addrconf_dad_start(ifp); - - return 0; -} - -static void addrconf_permanent_addr(struct net_device *dev) -{ - struct inet6_ifaddr *ifp, *tmp; - struct inet6_dev *idev; - - idev = __in6_dev_get(dev); - if (!idev) - return; - - write_lock_bh(&idev->lock); - - list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) { - if ((ifp->flags & IFA_F_PERMANENT) && - fixup_permanent_addr(idev, ifp) < 0) { - write_unlock_bh(&idev->lock); - ipv6_del_addr(ifp); - write_lock_bh(&idev->lock); - - net_info_ratelimited("%s: Failed to add prefix route for address %pI6c; dropping\n", - idev->dev->name, &ifp->addr); - } - } - - write_unlock_bh(&idev->lock); -} - static int addrconf_notify(struct notifier_block *this, unsigned long event, void *ptr) { @@ -3337,9 +3260,6 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, run_pending = 1; } - /* restore routes for permanent addresses */ - addrconf_permanent_addr(dev); - switch (dev->type) { #if IS_ENABLED(CONFIG_IPV6_SIT) case ARPHRD_SIT: @@ -3448,20 +3368,11 @@ static void addrconf_type_change(struct net_device *dev, unsigned long event) ipv6_mc_unmap(idev); } -static bool addr_is_local(const struct in6_addr *addr) -{ - return ipv6_addr_type(addr) & - (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); -} - static int addrconf_ifdown(struct net_device *dev, int how) { struct net *net = dev_net(dev); struct inet6_dev *idev; - struct inet6_ifaddr *ifa, *tmp; - struct list_head del_list; - int _keep_addr; - bool keep_addr; + struct inet6_ifaddr *ifa; int state, i; ASSERT_RTNL(); @@ -3488,16 +3399,6 @@ static int addrconf_ifdown(struct net_device *dev, int how) } - /* aggregate the system setting and interface setting */ - _keep_addr = net->ipv6.devconf_all->keep_addr_on_down; - if (!_keep_addr) - _keep_addr = idev->cnf.keep_addr_on_down; - - /* combine the user config with event to determine if permanent - * addresses are to be removed from address hash table - */ - keep_addr = !(how || _keep_addr <= 0); - /* Step 2: clear hash table */ for (i = 0; i < IN6_ADDR_HSIZE; i++) { struct hlist_head *h = &inet6_addr_lst[i]; @@ -3506,16 +3407,9 @@ static int addrconf_ifdown(struct net_device *dev, int how) restart: hlist_for_each_entry_rcu(ifa, h, addr_lst) { if (ifa->idev == idev) { + hlist_del_init_rcu(&ifa->addr_lst); addrconf_del_dad_work(ifa); - /* combined flag + permanent flag decide if - * address is retained on a down event - */ - if (!keep_addr || - !(ifa->flags & IFA_F_PERMANENT) || - addr_is_local(&ifa->addr)) { - hlist_del_init_rcu(&ifa->addr_lst); - goto restart; - } + goto restart; } } spin_unlock_bh(&addrconf_hash_lock); @@ -3549,54 +3443,31 @@ restart: write_lock_bh(&idev->lock); } - /* re-combine the user config with event to determine if permanent - * addresses are to be removed from the interface list - */ - keep_addr = (!how && _keep_addr > 0); - - INIT_LIST_HEAD(&del_list); - list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { + while (!list_empty(&idev->addr_list)) { + ifa = list_first_entry(&idev->addr_list, + struct inet6_ifaddr, if_list); addrconf_del_dad_work(ifa); - write_unlock_bh(&idev->lock); - spin_lock_bh(&ifa->lock); - - if (keep_addr && (ifa->flags & IFA_F_PERMANENT) && - !addr_is_local(&ifa->addr)) { - /* set state to skip the notifier below */ - state = INET6_IFADDR_STATE_DEAD; - ifa->state = 0; - if (!(ifa->flags & IFA_F_NODAD)) - ifa->flags |= IFA_F_TENTATIVE; - } else { - state = ifa->state; - ifa->state = INET6_IFADDR_STATE_DEAD; + list_del(&ifa->if_list); - list_del(&ifa->if_list); - list_add(&ifa->if_list, &del_list); - } + write_unlock_bh(&idev->lock); + spin_lock_bh(&ifa->lock); + state = ifa->state; + ifa->state = INET6_IFADDR_STATE_DEAD; spin_unlock_bh(&ifa->lock); if (state != INET6_IFADDR_STATE_DEAD) { __ipv6_ifa_notify(RTM_DELADDR, ifa); inet6addr_notifier_call_chain(NETDEV_DOWN, ifa); } + in6_ifa_put(ifa); write_lock_bh(&idev->lock); } write_unlock_bh(&idev->lock); - /* now clean up addresses to be removed */ - while (!list_empty(&del_list)) { - ifa = list_first_entry(&del_list, - struct inet6_ifaddr, if_list); - list_del(&ifa->if_list); - - in6_ifa_put(ifa); - } - /* Step 5: Discard anycast and multicast list */ if (how) { ipv6_ac_destroy_dev(idev); @@ -4861,7 +4732,6 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only; array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast; array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na; - array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down; } static inline size_t inet6_ifla6_size(void) @@ -5949,14 +5819,6 @@ static struct addrconf_sysctl_table .mode = 0644, .proc_handler = proc_dointvec, }, - { - .procname = "keep_addr_on_down", - .data = &ipv6_devconf.keep_addr_on_down, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - - }, { /* sentinel */ } -- cgit v1.2.3-70-g09d2 From 6a923934c33c750a595868af6bef5f1a1fa90054 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 26 Apr 2016 11:47:41 -0400 Subject: Revert "ipv6: Revert optional address flusing on ifdown." This reverts commit 841645b5f2dfceac69b78fcd0c9050868d41ea61. Ok, this puts the feature back. I've decided to apply David A.'s bug fix and run with that rather than make everyone wait another whole release for this feature. Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 9 ++ include/linux/ipv6.h | 1 + include/uapi/linux/ipv6.h | 1 + net/ipv6/addrconf.c | 162 ++++++++++++++++++++++++++++++--- 4 files changed, 161 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index e0ac25210f7c..b183e2b606c8 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1568,6 +1568,15 @@ temp_prefered_lft - INTEGER Preferred lifetime (in seconds) for temporary addresses. Default: 86400 (1 day) +keep_addr_on_down - INTEGER + Keep all IPv6 addresses on an interface down event. If set static + global addresses with no expiration time are not flushed. + >0 : enabled + 0 : system default + <0 : disabled + + Default: 0 (addresses are removed) + max_desync_factor - INTEGER Maximum value for DESYNC_FACTOR, which is a random value that ensures that clients don't synchronize with each diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 4b2267e1b7c3..7edc14fb66b6 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -62,6 +62,7 @@ struct ipv6_devconf { struct in6_addr secret; } stable_secret; __s32 use_oif_addrs_only; + __s32 keep_addr_on_down; void *sysctl; }; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index ec117b65d5a5..395876060f50 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -176,6 +176,7 @@ enum { DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN, DEVCONF_DROP_UNICAST_IN_L2_MULTICAST, DEVCONF_DROP_UNSOLICITED_NA, + DEVCONF_KEEP_ADDR_ON_DOWN, DEVCONF_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d77ba395d593..23cec53b568a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -216,6 +216,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { }, .use_oif_addrs_only = 0, .ignore_routes_with_linkdown = 0, + .keep_addr_on_down = 0, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -260,6 +261,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { }, .use_oif_addrs_only = 0, .ignore_routes_with_linkdown = 0, + .keep_addr_on_down = 0, }; /* Check if a valid qdisc is available */ @@ -3174,6 +3176,81 @@ static void addrconf_gre_config(struct net_device *dev) } #endif +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) +/* If the host route is cached on the addr struct make sure it is associated + * with the proper table. e.g., enslavement can change and if so the cached + * host route needs to move to the new table. + */ +static void l3mdev_check_host_rt(struct inet6_dev *idev, + struct inet6_ifaddr *ifp) +{ + if (ifp->rt) { + u32 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL; + + if (tb_id != ifp->rt->rt6i_table->tb6_id) { + ip6_del_rt(ifp->rt); + ifp->rt = NULL; + } + } +} +#else +static void l3mdev_check_host_rt(struct inet6_dev *idev, + struct inet6_ifaddr *ifp) +{ +} +#endif + +static int fixup_permanent_addr(struct inet6_dev *idev, + struct inet6_ifaddr *ifp) +{ + l3mdev_check_host_rt(idev, ifp); + + if (!ifp->rt) { + struct rt6_info *rt; + + rt = addrconf_dst_alloc(idev, &ifp->addr, false); + if (unlikely(IS_ERR(rt))) + return PTR_ERR(rt); + + ifp->rt = rt; + } + + if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) { + addrconf_prefix_route(&ifp->addr, ifp->prefix_len, + idev->dev, 0, 0); + } + + addrconf_dad_start(ifp); + + return 0; +} + +static void addrconf_permanent_addr(struct net_device *dev) +{ + struct inet6_ifaddr *ifp, *tmp; + struct inet6_dev *idev; + + idev = __in6_dev_get(dev); + if (!idev) + return; + + write_lock_bh(&idev->lock); + + list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) { + if ((ifp->flags & IFA_F_PERMANENT) && + fixup_permanent_addr(idev, ifp) < 0) { + write_unlock_bh(&idev->lock); + ipv6_del_addr(ifp); + write_lock_bh(&idev->lock); + + net_info_ratelimited("%s: Failed to add prefix route for address %pI6c; dropping\n", + idev->dev->name, &ifp->addr); + } + } + + write_unlock_bh(&idev->lock); +} + static int addrconf_notify(struct notifier_block *this, unsigned long event, void *ptr) { @@ -3260,6 +3337,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, run_pending = 1; } + /* restore routes for permanent addresses */ + addrconf_permanent_addr(dev); + switch (dev->type) { #if IS_ENABLED(CONFIG_IPV6_SIT) case ARPHRD_SIT: @@ -3368,11 +3448,20 @@ static void addrconf_type_change(struct net_device *dev, unsigned long event) ipv6_mc_unmap(idev); } +static bool addr_is_local(const struct in6_addr *addr) +{ + return ipv6_addr_type(addr) & + (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); +} + static int addrconf_ifdown(struct net_device *dev, int how) { struct net *net = dev_net(dev); struct inet6_dev *idev; - struct inet6_ifaddr *ifa; + struct inet6_ifaddr *ifa, *tmp; + struct list_head del_list; + int _keep_addr; + bool keep_addr; int state, i; ASSERT_RTNL(); @@ -3399,6 +3488,16 @@ static int addrconf_ifdown(struct net_device *dev, int how) } + /* aggregate the system setting and interface setting */ + _keep_addr = net->ipv6.devconf_all->keep_addr_on_down; + if (!_keep_addr) + _keep_addr = idev->cnf.keep_addr_on_down; + + /* combine the user config with event to determine if permanent + * addresses are to be removed from address hash table + */ + keep_addr = !(how || _keep_addr <= 0); + /* Step 2: clear hash table */ for (i = 0; i < IN6_ADDR_HSIZE; i++) { struct hlist_head *h = &inet6_addr_lst[i]; @@ -3407,9 +3506,16 @@ static int addrconf_ifdown(struct net_device *dev, int how) restart: hlist_for_each_entry_rcu(ifa, h, addr_lst) { if (ifa->idev == idev) { - hlist_del_init_rcu(&ifa->addr_lst); addrconf_del_dad_work(ifa); - goto restart; + /* combined flag + permanent flag decide if + * address is retained on a down event + */ + if (!keep_addr || + !(ifa->flags & IFA_F_PERMANENT) || + addr_is_local(&ifa->addr)) { + hlist_del_init_rcu(&ifa->addr_lst); + goto restart; + } } } spin_unlock_bh(&addrconf_hash_lock); @@ -3443,31 +3549,54 @@ restart: write_lock_bh(&idev->lock); } - while (!list_empty(&idev->addr_list)) { - ifa = list_first_entry(&idev->addr_list, - struct inet6_ifaddr, if_list); - addrconf_del_dad_work(ifa); + /* re-combine the user config with event to determine if permanent + * addresses are to be removed from the interface list + */ + keep_addr = (!how && _keep_addr > 0); - list_del(&ifa->if_list); + INIT_LIST_HEAD(&del_list); + list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { + addrconf_del_dad_work(ifa); write_unlock_bh(&idev->lock); - spin_lock_bh(&ifa->lock); - state = ifa->state; - ifa->state = INET6_IFADDR_STATE_DEAD; + + if (keep_addr && (ifa->flags & IFA_F_PERMANENT) && + !addr_is_local(&ifa->addr)) { + /* set state to skip the notifier below */ + state = INET6_IFADDR_STATE_DEAD; + ifa->state = 0; + if (!(ifa->flags & IFA_F_NODAD)) + ifa->flags |= IFA_F_TENTATIVE; + } else { + state = ifa->state; + ifa->state = INET6_IFADDR_STATE_DEAD; + + list_del(&ifa->if_list); + list_add(&ifa->if_list, &del_list); + } + spin_unlock_bh(&ifa->lock); if (state != INET6_IFADDR_STATE_DEAD) { __ipv6_ifa_notify(RTM_DELADDR, ifa); inet6addr_notifier_call_chain(NETDEV_DOWN, ifa); } - in6_ifa_put(ifa); write_lock_bh(&idev->lock); } write_unlock_bh(&idev->lock); + /* now clean up addresses to be removed */ + while (!list_empty(&del_list)) { + ifa = list_first_entry(&del_list, + struct inet6_ifaddr, if_list); + list_del(&ifa->if_list); + + in6_ifa_put(ifa); + } + /* Step 5: Discard anycast and multicast list */ if (how) { ipv6_ac_destroy_dev(idev); @@ -4732,6 +4861,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only; array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast; array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na; + array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down; } static inline size_t inet6_ifla6_size(void) @@ -5819,6 +5949,14 @@ static struct addrconf_sysctl_table .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "keep_addr_on_down", + .data = &ipv6_devconf.keep_addr_on_down, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + + }, { /* sentinel */ } -- cgit v1.2.3-70-g09d2