diff options
author | David S. Miller <davem@davemloft.net> | 2017-11-03 15:40:42 +0900 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-11-03 15:40:42 +0900 |
commit | 6437b112f70612212c20359a708af106a564d17c (patch) | |
tree | 83492098d0c91c46eb68b578c6318efcbd23241d | |
parent | a882d20cdb7775ff8b4aac880255eff6a2c1c85e (diff) | |
parent | 28678f07f127d151354ff12b0d05557ae897e972 (diff) |
Merge branch 'mlxsw-Align-multipath-hash-parameters-with-kernels'
Jiri Pirko says:
====================
mlxsw: Align multipath hash parameters with kernel's
Ido says:
This set makes sure the device is using the same parameters as the
kernel when it computes the multipath hash during IP forwarding.
First patch adds a new netevent to let interested listeners know that
the multipath hash policy has changed.
Next two patches do small and non-functional changes in the mlxsw
driver.
Last patches configure the multipath hash policy upon driver
initialization and as a response to netevents.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/reg.h | 132 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 7 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 133 | ||||
-rw-r--r-- | include/net/netevent.h | 1 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 20 |
6 files changed, 270 insertions, 25 deletions
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index db6cd263dd61..5066553dd0b6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -5844,6 +5844,137 @@ static inline void mlxsw_reg_rigr2_erif_entry_pack(char *payload, int index, mlxsw_reg_rigr2_erif_entry_erif_set(payload, index, erif); } +/* RECR-V2 - Router ECMP Configuration Version 2 Register + * ------------------------------------------------------ + */ +#define MLXSW_REG_RECR2_ID 0x8025 +#define MLXSW_REG_RECR2_LEN 0x38 + +MLXSW_REG_DEFINE(recr2, MLXSW_REG_RECR2_ID, MLXSW_REG_RECR2_LEN); + +/* reg_recr2_pp + * Per-port configuration + * Access: Index + */ +MLXSW_ITEM32(reg, recr2, pp, 0x00, 24, 1); + +/* reg_recr2_sh + * Symmetric hash + * Access: RW + */ +MLXSW_ITEM32(reg, recr2, sh, 0x00, 8, 1); + +/* reg_recr2_seed + * Seed + * Access: RW + */ +MLXSW_ITEM32(reg, recr2, seed, 0x08, 0, 32); + +enum { + /* Enable IPv4 fields if packet is not TCP and not UDP */ + MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP = 3, + /* Enable IPv4 fields if packet is TCP or UDP */ + MLXSW_REG_RECR2_IPV4_EN_TCP_UDP = 4, + /* Enable IPv6 fields if packet is not TCP and not UDP */ + MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP = 5, + /* Enable IPv6 fields if packet is TCP or UDP */ + MLXSW_REG_RECR2_IPV6_EN_TCP_UDP = 6, + /* Enable TCP/UDP header fields if packet is IPv4 */ + MLXSW_REG_RECR2_TCP_UDP_EN_IPV4 = 7, + /* Enable TCP/UDP header fields if packet is IPv6 */ + MLXSW_REG_RECR2_TCP_UDP_EN_IPV6 = 8, +}; + +/* reg_recr2_outer_header_enables + * Bit mask where each bit enables a specific layer to be included in + * the hash calculation. + * Access: RW + */ +MLXSW_ITEM_BIT_ARRAY(reg, recr2, outer_header_enables, 0x10, 0x04, 1); + +enum { + /* IPv4 Source IP */ + MLXSW_REG_RECR2_IPV4_SIP0 = 9, + MLXSW_REG_RECR2_IPV4_SIP3 = 12, + /* IPv4 Destination IP */ + MLXSW_REG_RECR2_IPV4_DIP0 = 13, + MLXSW_REG_RECR2_IPV4_DIP3 = 16, + /* IP Protocol */ + MLXSW_REG_RECR2_IPV4_PROTOCOL = 17, + /* IPv6 Source IP */ + MLXSW_REG_RECR2_IPV6_SIP0_7 = 21, + MLXSW_REG_RECR2_IPV6_SIP8 = 29, + MLXSW_REG_RECR2_IPV6_SIP15 = 36, + /* IPv6 Destination IP */ + MLXSW_REG_RECR2_IPV6_DIP0_7 = 37, + MLXSW_REG_RECR2_IPV6_DIP8 = 45, + MLXSW_REG_RECR2_IPV6_DIP15 = 52, + /* IPv6 Next Header */ + MLXSW_REG_RECR2_IPV6_NEXT_HEADER = 53, + /* IPv6 Flow Label */ + MLXSW_REG_RECR2_IPV6_FLOW_LABEL = 57, + /* TCP/UDP Source Port */ + MLXSW_REG_RECR2_TCP_UDP_SPORT = 74, + /* TCP/UDP Destination Port */ + MLXSW_REG_RECR2_TCP_UDP_DPORT = 75, +}; + +/* reg_recr2_outer_header_fields_enable + * Packet fields to enable for ECMP hash subject to outer_header_enable. + * Access: RW + */ +MLXSW_ITEM_BIT_ARRAY(reg, recr2, outer_header_fields_enable, 0x14, 0x14, 1); + +static inline void mlxsw_reg_recr2_ipv4_sip_enable(char *payload) +{ + int i; + + for (i = MLXSW_REG_RECR2_IPV4_SIP0; i <= MLXSW_REG_RECR2_IPV4_SIP3; i++) + mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, + true); +} + +static inline void mlxsw_reg_recr2_ipv4_dip_enable(char *payload) +{ + int i; + + for (i = MLXSW_REG_RECR2_IPV4_DIP0; i <= MLXSW_REG_RECR2_IPV4_DIP3; i++) + mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, + true); +} + +static inline void mlxsw_reg_recr2_ipv6_sip_enable(char *payload) +{ + int i = MLXSW_REG_RECR2_IPV6_SIP0_7; + + mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, true); + + i = MLXSW_REG_RECR2_IPV6_SIP8; + for (; i <= MLXSW_REG_RECR2_IPV6_SIP15; i++) + mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, + true); +} + +static inline void mlxsw_reg_recr2_ipv6_dip_enable(char *payload) +{ + int i = MLXSW_REG_RECR2_IPV6_DIP0_7; + + mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, true); + + i = MLXSW_REG_RECR2_IPV6_DIP8; + for (; i <= MLXSW_REG_RECR2_IPV6_DIP15; i++) + mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, + true); +} + +static inline void mlxsw_reg_recr2_pack(char *payload, u32 seed) +{ + MLXSW_REG_ZERO(recr2, payload); + mlxsw_reg_recr2_pp_set(payload, false); + mlxsw_reg_recr2_sh_set(payload, true); + mlxsw_reg_recr2_seed_set(payload, seed); +} + /* RMFT-V2 - Router Multicast Forwarding Table Version 2 Register * -------------------------------------------------------------- * The RMFT_V2 register is used to configure and query the multicast table. @@ -7313,6 +7444,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(raleu), MLXSW_REG(rauhtd), MLXSW_REG(rigr2), + MLXSW_REG(recr2), MLXSW_REG(rmft2), MLXSW_REG(mfcr), MLXSW_REG(mfsc), diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 3f4be9556e56..52f38b480669 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4574,10 +4574,6 @@ static struct notifier_block mlxsw_sp_inet6addr_nb __read_mostly = { .notifier_call = mlxsw_sp_inet6addr_event, }; -static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = { - .notifier_call = mlxsw_sp_router_netevent_event, -}; - static const struct pci_device_id mlxsw_sp_pci_id_table[] = { {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SPECTRUM), 0}, {0, }, @@ -4596,7 +4592,6 @@ static int __init mlxsw_sp_module_init(void) register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); register_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); register_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); - register_netevent_notifier(&mlxsw_sp_router_netevent_nb); err = mlxsw_core_driver_register(&mlxsw_sp_driver); if (err) @@ -4611,7 +4606,6 @@ static int __init mlxsw_sp_module_init(void) err_pci_driver_register: mlxsw_core_driver_unregister(&mlxsw_sp_driver); err_core_driver_register: - unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); @@ -4623,7 +4617,6 @@ static void __exit mlxsw_sp_module_exit(void) { mlxsw_pci_driver_unregister(&mlxsw_sp_pci_driver); mlxsw_core_driver_unregister(&mlxsw_sp_driver); - unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index aa0cefb25e18..b2393bb8cef9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -385,8 +385,6 @@ static inline void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port) /* spectrum_router.c */ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp); -int mlxsw_sp_router_netevent_event(struct notifier_block *unused, - unsigned long event, void *ptr); int mlxsw_sp_netdevice_router_port_event(struct net_device *dev); int mlxsw_sp_inetaddr_event(struct notifier_block *unused, unsigned long event, void *ptr); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 9fe4cdb23189..d657f01f2d79 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -47,6 +47,7 @@ #include <linux/socket.h> #include <linux/route.h> #include <linux/gcd.h> +#include <linux/random.h> #include <net/netevent.h> #include <net/neighbour.h> #include <net/arp.h> @@ -96,6 +97,7 @@ struct mlxsw_sp_router { struct list_head ipip_list; bool aborted; struct notifier_block fib_nb; + struct notifier_block netevent_nb; const struct mlxsw_sp_rif_ops **rif_ops_arr; const struct mlxsw_sp_ipip_ops **ipip_ops_arr; }; @@ -2025,7 +2027,7 @@ mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true); } -struct mlxsw_sp_neigh_event_work { +struct mlxsw_sp_netevent_work { struct work_struct work; struct mlxsw_sp *mlxsw_sp; struct neighbour *n; @@ -2033,11 +2035,11 @@ struct mlxsw_sp_neigh_event_work { static void mlxsw_sp_router_neigh_event_work(struct work_struct *work) { - struct mlxsw_sp_neigh_event_work *neigh_work = - container_of(work, struct mlxsw_sp_neigh_event_work, work); - struct mlxsw_sp *mlxsw_sp = neigh_work->mlxsw_sp; + struct mlxsw_sp_netevent_work *net_work = + container_of(work, struct mlxsw_sp_netevent_work, work); + struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp; struct mlxsw_sp_neigh_entry *neigh_entry; - struct neighbour *n = neigh_work->n; + struct neighbour *n = net_work->n; unsigned char ha[ETH_ALEN]; bool entry_connected; u8 nud_state, dead; @@ -2073,18 +2075,32 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work) out: rtnl_unlock(); neigh_release(n); - kfree(neigh_work); + kfree(net_work); } -int mlxsw_sp_router_netevent_event(struct notifier_block *unused, - unsigned long event, void *ptr) +static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp); + +static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work) +{ + struct mlxsw_sp_netevent_work *net_work = + container_of(work, struct mlxsw_sp_netevent_work, work); + struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp; + + mlxsw_sp_mp_hash_init(mlxsw_sp); + kfree(net_work); +} + +static int mlxsw_sp_router_netevent_event(struct notifier_block *nb, + unsigned long event, void *ptr) { - struct mlxsw_sp_neigh_event_work *neigh_work; + struct mlxsw_sp_netevent_work *net_work; struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp_router *router; struct mlxsw_sp *mlxsw_sp; unsigned long interval; struct neigh_parms *p; struct neighbour *n; + struct net *net; switch (event) { case NETEVENT_DELAY_PROBE_TIME_UPDATE: @@ -2118,24 +2134,39 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused, if (!mlxsw_sp_port) return NOTIFY_DONE; - neigh_work = kzalloc(sizeof(*neigh_work), GFP_ATOMIC); - if (!neigh_work) { + net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC); + if (!net_work) { mlxsw_sp_port_dev_put(mlxsw_sp_port); return NOTIFY_BAD; } - INIT_WORK(&neigh_work->work, mlxsw_sp_router_neigh_event_work); - neigh_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - neigh_work->n = n; + INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work); + net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + net_work->n = n; /* Take a reference to ensure the neighbour won't be * destructed until we drop the reference in delayed * work. */ neigh_clone(n); - mlxsw_core_schedule_work(&neigh_work->work); + mlxsw_core_schedule_work(&net_work->work); mlxsw_sp_port_dev_put(mlxsw_sp_port); break; + case NETEVENT_MULTIPATH_HASH_UPDATE: + net = ptr; + + if (!net_eq(net, &init_net)) + return NOTIFY_DONE; + + net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC); + if (!net_work) + return NOTIFY_BAD; + + router = container_of(nb, struct mlxsw_sp_router, netevent_nb); + INIT_WORK(&net_work->work, mlxsw_sp_router_mp_hash_event_work); + net_work->mlxsw_sp = router->mlxsw_sp; + mlxsw_core_schedule_work(&net_work->work); + break; } return NOTIFY_DONE; @@ -6643,6 +6674,64 @@ static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb) mlxsw_sp_router_fib_flush(router->mlxsw_sp); } +#ifdef CONFIG_IP_ROUTE_MULTIPATH +static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header) +{ + mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true); +} + +static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field) +{ + mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true); +} + +static void mlxsw_sp_mp4_hash_init(char *recr2_pl) +{ + bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy; + + mlxsw_sp_mp_hash_header_set(recr2_pl, + MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP); + mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP); + mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl); + mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl); + if (only_l3) + return; + mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4); + mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL); + mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT); + mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT); +} + +static void mlxsw_sp_mp6_hash_init(char *recr2_pl) +{ + mlxsw_sp_mp_hash_header_set(recr2_pl, + MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP); + mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP); + mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl); + mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl); + mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_FLOW_LABEL); + mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER); +} + +static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp) +{ + char recr2_pl[MLXSW_REG_RECR2_LEN]; + u32 seed; + + get_random_bytes(&seed, sizeof(seed)); + mlxsw_reg_recr2_pack(recr2_pl, seed); + mlxsw_sp_mp4_hash_init(recr2_pl); + mlxsw_sp_mp6_hash_init(recr2_pl); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl); +} +#else +static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp) +{ + return 0; +} +#endif + static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) { char rgcr_pl[MLXSW_REG_RGCR_LEN]; @@ -6720,6 +6809,16 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_neigh_init; + mlxsw_sp->router->netevent_nb.notifier_call = + mlxsw_sp_router_netevent_event; + err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb); + if (err) + goto err_register_netevent_notifier; + + err = mlxsw_sp_mp_hash_init(mlxsw_sp); + if (err) + goto err_mp_hash_init; + mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event; err = register_fib_notifier(&mlxsw_sp->router->fib_nb, mlxsw_sp_router_fib_dump_flush); @@ -6729,6 +6828,9 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) return 0; err_register_fib_notifier: +err_mp_hash_init: + unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb); +err_register_netevent_notifier: mlxsw_sp_neigh_fini(mlxsw_sp); err_neigh_init: mlxsw_sp_vrs_fini(mlxsw_sp); @@ -6754,6 +6856,7 @@ err_router_init: void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) { unregister_fib_notifier(&mlxsw_sp->router->fib_nb); + unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb); mlxsw_sp_neigh_fini(mlxsw_sp); mlxsw_sp_vrs_fini(mlxsw_sp); mlxsw_sp_mr_fini(mlxsw_sp); diff --git a/include/net/netevent.h b/include/net/netevent.h index f440df172b56..e3f0e8f2f6e8 100644 --- a/include/net/netevent.h +++ b/include/net/netevent.h @@ -25,6 +25,7 @@ enum netevent_notif_type { NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */ + NETEVENT_MULTIPATH_HASH_UPDATE, /* arg is struct net ptr */ }; int register_netevent_notifier(struct notifier_block *nb); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 4602af6d5358..8dcc2b185fcc 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -25,6 +25,7 @@ #include <net/inet_frag.h> #include <net/ping.h> #include <net/protocol.h> +#include <net/netevent.h> static int zero; static int one = 1; @@ -385,6 +386,23 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl, return ret; } +#ifdef CONFIG_IP_ROUTE_MULTIPATH +static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct net *net = container_of(table->data, struct net, + ipv4.sysctl_fib_multipath_hash_policy); + int ret; + + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (write && ret == 0) + call_netevent_notifiers(NETEVENT_MULTIPATH_HASH_UPDATE, net); + + return ret; +} +#endif + static struct ctl_table ipv4_table[] = { { .procname = "tcp_max_orphans", @@ -907,7 +925,7 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_fib_multipath_hash_policy, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_fib_multipath_hash_policy, .extra1 = &zero, .extra2 = &one, }, |