From fcd13f42c9d6ab7b1024b9b7125a2e8db3cc00b2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 24 Mar 2011 07:01:24 +0000 Subject: ipv4: fix fib metrics Alessandro Suardi reported that we could not change route metrics : ip ro change default .... advmss 1400 This regression came with commit 9c150e82ac50 (Allocate fib metrics dynamically). fib_metrics is no longer an array, but a pointer to an array. Reported-by: Alessandro Suardi Signed-off-by: Eric Dumazet Tested-by: Alessandro Suardi Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/fib_semantics.c') diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 622ac4c95026..75b9fb5d5d37 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -251,7 +251,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi) nfi->fib_prefsrc == fi->fib_prefsrc && nfi->fib_priority == fi->fib_priority && memcmp(nfi->fib_metrics, fi->fib_metrics, - sizeof(fi->fib_metrics)) == 0 && + sizeof(u32) * RTAX_MAX) == 0 && ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 && (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) return fi; -- cgit v1.2.3-70-g09d2 From 436c3b66ec9824a633724ae42de1c416af4f2063 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Mar 2011 17:42:21 -0700 Subject: ipv4: Invalidate nexthop cache nh_saddr more correctly. Any operation that: 1) Brings up an interface 2) Adds an IP address to an interface 3) Deletes an IP address from an interface can potentially invalidate the nh_saddr value, requiring it to be recomputed. Perform the recomputation lazily using a generation ID. Reported-by: Julian Anastasov Signed-off-by: David S. Miller --- include/net/ip_fib.h | 12 ++++++++++-- include/net/netns/ipv4.h | 1 + net/ipv4/fib_frontend.c | 11 +++++++---- net/ipv4/fib_semantics.c | 32 +++++++++++--------------------- net/ipv4/route.c | 6 ++++-- 5 files changed, 33 insertions(+), 29 deletions(-) (limited to 'net/ipv4/fib_semantics.c') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index a1a858035913..cd92b923a578 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -62,6 +62,7 @@ struct fib_nh { int nh_oif; __be32 nh_gw; __be32 nh_saddr; + int nh_saddr_genid; }; /* @@ -141,12 +142,19 @@ struct fib_result_nl { #endif /* CONFIG_IP_ROUTE_MULTIPATH */ -#define FIB_RES_SADDR(res) (FIB_RES_NH(res).nh_saddr) +extern __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); + +#define FIB_RES_SADDR(net, res) \ + ((FIB_RES_NH(res).nh_saddr_genid == \ + atomic_read(&(net)->ipv4.dev_addr_genid)) ? \ + FIB_RES_NH(res).nh_saddr : \ + fib_info_update_nh_saddr((net), &FIB_RES_NH(res))) #define FIB_RES_GW(res) (FIB_RES_NH(res).nh_gw) #define FIB_RES_DEV(res) (FIB_RES_NH(res).nh_dev) #define FIB_RES_OIF(res) (FIB_RES_NH(res).nh_oif) -#define FIB_RES_PREFSRC(res) ((res).fi->fib_prefsrc ? : FIB_RES_SADDR(res)) +#define FIB_RES_PREFSRC(net, res) ((res).fi->fib_prefsrc ? : \ + FIB_RES_SADDR(net, res)) struct fib_table { struct hlist_node tb_hlist; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index e2e2ef57eca2..542195d9469e 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -55,6 +55,7 @@ struct netns_ipv4 { int current_rt_cache_rebuild_count; atomic_t rt_genid; + atomic_t dev_addr_genid; #ifdef CONFIG_IP_MROUTE #ifndef CONFIG_IP_MROUTE_MULTIPLE_TABLES diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 02c3ba61884a..f116ce8f1b46 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -228,7 +228,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, if (res.type != RTN_LOCAL || !accept_local) goto e_inval; } - *spec_dst = FIB_RES_PREFSRC(res); + *spec_dst = FIB_RES_PREFSRC(net, res); fib_combine_itag(itag, &res); dev_match = false; @@ -258,7 +258,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, ret = 0; if (fib_lookup(net, &fl4, &res) == 0) { if (res.type == RTN_UNICAST) { - *spec_dst = FIB_RES_PREFSRC(res); + *spec_dst = FIB_RES_PREFSRC(net, res); ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; } } @@ -960,6 +960,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, { struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; struct net_device *dev = ifa->ifa_dev->dev; + struct net *net = dev_net(dev); switch (event) { case NETDEV_UP: @@ -967,12 +968,12 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, #ifdef CONFIG_IP_ROUTE_MULTIPATH fib_sync_up(dev); #endif - fib_update_nh_saddrs(dev); + atomic_inc(&net->ipv4.dev_addr_genid); rt_cache_flush(dev_net(dev), -1); break; case NETDEV_DOWN: fib_del_ifaddr(ifa, NULL); - fib_update_nh_saddrs(dev); + atomic_inc(&net->ipv4.dev_addr_genid); if (ifa->ifa_dev->ifa_list == NULL) { /* Last address was deleted from this interface. * Disable IP. @@ -990,6 +991,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo { struct net_device *dev = ptr; struct in_device *in_dev = __in_dev_get_rtnl(dev); + struct net *net = dev_net(dev); if (event == NETDEV_UNREGISTER) { fib_disable_ip(dev, 2, -1); @@ -1007,6 +1009,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo #ifdef CONFIG_IP_ROUTE_MULTIPATH fib_sync_up(dev); #endif + atomic_inc(&net->ipv4.dev_addr_genid); rt_cache_flush(dev_net(dev), -1); break; case NETDEV_DOWN: diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 75b9fb5d5d37..2d4bebca671a 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -695,6 +695,16 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash, fib_info_hash_free(old_laddrhash, bytes); } +__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh) +{ + nh->nh_saddr = inet_select_addr(nh->nh_dev, + nh->nh_gw, + nh->nh_cfg_scope); + nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid); + + return nh->nh_saddr; +} + struct fib_info *fib_create_info(struct fib_config *cfg) { int err; @@ -855,9 +865,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) change_nexthops(fi) { nexthop_nh->nh_cfg_scope = cfg->fc_scope; - nexthop_nh->nh_saddr = inet_select_addr(nexthop_nh->nh_dev, - nexthop_nh->nh_gw, - nexthop_nh->nh_cfg_scope); + fib_info_update_nh_saddr(net, nexthop_nh); } endfor_nexthops(fi) link_it: @@ -1128,24 +1136,6 @@ out: return; } -void fib_update_nh_saddrs(struct net_device *dev) -{ - struct hlist_head *head; - struct hlist_node *node; - struct fib_nh *nh; - unsigned int hash; - - hash = fib_devindex_hashfn(dev->ifindex); - head = &fib_info_devhash[hash]; - hlist_for_each_entry(nh, node, head, nh_hash) { - if (nh->nh_dev != dev) - continue; - nh->nh_saddr = inet_select_addr(nh->nh_dev, - nh->nh_gw, - nh->nh_cfg_scope); - } -} - #ifdef CONFIG_IP_ROUTE_MULTIPATH /* diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 34921b0d3f8e..4b0c81180804 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1718,7 +1718,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) rcu_read_lock(); if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) - src = FIB_RES_PREFSRC(res); + src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res); else src = inet_select_addr(rt->dst.dev, rt->rt_gateway, RT_SCOPE_UNIVERSE); @@ -2615,7 +2615,7 @@ static struct rtable *ip_route_output_slow(struct net *net, fib_select_default(&res); if (!fl4.saddr) - fl4.saddr = FIB_RES_PREFSRC(res); + fl4.saddr = FIB_RES_PREFSRC(net, res); dev_out = FIB_RES_DEV(res); fl4.flowi4_oif = dev_out->ifindex; @@ -3219,6 +3219,8 @@ static __net_init int rt_genid_init(struct net *net) { get_random_bytes(&net->ipv4.rt_genid, sizeof(net->ipv4.rt_genid)); + get_random_bytes(&net->ipv4.dev_addr_genid, + sizeof(net->ipv4.dev_addr_genid)); return 0; } -- cgit v1.2.3-70-g09d2 From 37e826c513883099c298317bad1b3b677b2905fb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Mar 2011 18:06:47 -0700 Subject: ipv4: Fix nexthop caching wrt. scoping. Move the scope value out of the fib alias entries and into fib_info, so that we always use the correct scope when recomputing the nexthop cached source address. Reported-by: Julian Anastasov Signed-off-by: David S. Miller --- include/net/ip_fib.h | 6 +++--- net/ipv4/fib_lookup.h | 3 +-- net/ipv4/fib_semantics.c | 15 ++++++++------- net/ipv4/fib_trie.c | 12 ++++-------- 4 files changed, 16 insertions(+), 20 deletions(-) (limited to 'net/ipv4/fib_semantics.c') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index cd92b923a578..e5d66ec88cf6 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -51,7 +51,6 @@ struct fib_nh { struct fib_info *nh_parent; unsigned nh_flags; unsigned char nh_scope; - unsigned char nh_cfg_scope; #ifdef CONFIG_IP_ROUTE_MULTIPATH int nh_weight; int nh_power; @@ -75,9 +74,10 @@ struct fib_info { struct net *fib_net; int fib_treeref; atomic_t fib_clntref; - int fib_dead; unsigned fib_flags; - int fib_protocol; + unsigned char fib_dead; + unsigned char fib_protocol; + unsigned char fib_scope; __be32 fib_prefsrc; u32 fib_priority; u32 *fib_metrics; diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index 4ec323875a02..af0f14aba169 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -10,7 +10,6 @@ struct fib_alias { struct fib_info *fa_info; u8 fa_tos; u8 fa_type; - u8 fa_scope; u8 fa_state; struct rcu_head rcu; }; @@ -29,7 +28,7 @@ extern void fib_release_info(struct fib_info *); extern struct fib_info *fib_create_info(struct fib_config *cfg); extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, - u32 tb_id, u8 type, u8 scope, __be32 dst, + u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int); extern void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 2d4bebca671a..641a5a2a9f9c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -222,7 +222,7 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi) unsigned int mask = (fib_info_hash_size - 1); unsigned int val = fi->fib_nhs; - val ^= fi->fib_protocol; + val ^= (fi->fib_protocol << 8) | fi->fib_scope; val ^= (__force u32)fi->fib_prefsrc; val ^= fi->fib_priority; for_nexthops(fi) { @@ -248,6 +248,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi) if (fi->fib_nhs != nfi->fib_nhs) continue; if (nfi->fib_protocol == fi->fib_protocol && + nfi->fib_scope == fi->fib_scope && nfi->fib_prefsrc == fi->fib_prefsrc && nfi->fib_priority == fi->fib_priority && memcmp(nfi->fib_metrics, fi->fib_metrics, @@ -328,7 +329,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, goto errout; err = fib_dump_info(skb, info->pid, seq, event, tb_id, - fa->fa_type, fa->fa_scope, key, dst_len, + fa->fa_type, key, dst_len, fa->fa_tos, fa->fa_info, nlm_flags); if (err < 0) { /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ @@ -699,7 +700,7 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh) { nh->nh_saddr = inet_select_addr(nh->nh_dev, nh->nh_gw, - nh->nh_cfg_scope); + nh->nh_parent->fib_scope); nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid); return nh->nh_saddr; @@ -763,6 +764,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) fi->fib_net = hold_net(net); fi->fib_protocol = cfg->fc_protocol; + fi->fib_scope = cfg->fc_scope; fi->fib_flags = cfg->fc_flags; fi->fib_priority = cfg->fc_priority; fi->fib_prefsrc = cfg->fc_prefsrc; @@ -864,7 +866,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg) } change_nexthops(fi) { - nexthop_nh->nh_cfg_scope = cfg->fc_scope; fib_info_update_nh_saddr(net, nexthop_nh); } endfor_nexthops(fi) @@ -914,7 +915,7 @@ failure: } int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, - u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos, + u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int flags) { struct nlmsghdr *nlh; @@ -936,7 +937,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, NLA_PUT_U32(skb, RTA_TABLE, tb_id); rtm->rtm_type = type; rtm->rtm_flags = fi->fib_flags; - rtm->rtm_scope = scope; + rtm->rtm_scope = fi->fib_scope; rtm->rtm_protocol = fi->fib_protocol; if (rtm->rtm_dst_len) @@ -1092,7 +1093,7 @@ void fib_select_default(struct fib_result *res) list_for_each_entry_rcu(fa, fa_head, fa_list) { struct fib_info *next_fi = fa->fa_info; - if (fa->fa_scope != res->scope || + if (next_fi->fib_scope != res->scope || fa->fa_type != RTN_UNICAST) continue; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index ac87a49ad50b..90a3ff605591 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1245,7 +1245,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) if (fa->fa_info->fib_priority != fi->fib_priority) break; if (fa->fa_type == cfg->fc_type && - fa->fa_scope == cfg->fc_scope && fa->fa_info == fi) { fa_match = fa; break; @@ -1271,7 +1270,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_tos = fa->fa_tos; new_fa->fa_info = fi; new_fa->fa_type = cfg->fc_type; - new_fa->fa_scope = cfg->fc_scope; state = fa->fa_state; new_fa->fa_state = state & ~FA_S_ACCESSED; @@ -1308,7 +1306,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_info = fi; new_fa->fa_tos = tos; new_fa->fa_type = cfg->fc_type; - new_fa->fa_scope = cfg->fc_scope; new_fa->fa_state = 0; /* * Insert new entry to the list. @@ -1362,7 +1359,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) continue; - if (fa->fa_scope < flp->flowi4_scope) + if (fa->fa_info->fib_scope < flp->flowi4_scope) continue; fib_alias_accessed(fa); err = fib_props[fa->fa_type].error; @@ -1388,7 +1385,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, res->prefixlen = plen; res->nh_sel = nhsel; res->type = fa->fa_type; - res->scope = fa->fa_scope; + res->scope = fa->fa_info->fib_scope; res->fi = fi; res->table = tb; res->fa_head = &li->falh; @@ -1664,7 +1661,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && (cfg->fc_scope == RT_SCOPE_NOWHERE || - fa->fa_scope == cfg->fc_scope) && + fa->fa_info->fib_scope == cfg->fc_scope) && (!cfg->fc_prefsrc || fi->fib_prefsrc == cfg->fc_prefsrc) && (!cfg->fc_protocol || @@ -1863,7 +1860,6 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, RTM_NEWROUTE, tb->tb_id, fa->fa_type, - fa->fa_scope, xkey, plen, fa->fa_tos, @@ -2384,7 +2380,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) seq_indent(seq, iter->depth+1); seq_printf(seq, " /%d %s %s", li->plen, rtn_scope(buf1, sizeof(buf1), - fa->fa_scope), + fa->fa_info->fib_scope), rtn_type(buf2, sizeof(buf2), fa->fa_type)); if (fa->fa_tos) -- cgit v1.2.3-70-g09d2