From 09e05c3f78e9e82bda5958eb95bbf719f7a0ed6b Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Wed, 10 Sep 2014 16:41:56 +0300 Subject: net/mlx4: Set vlan stripping policy by the right command Changing the vlan stripping policy of the QP isn't supported by older firmware versions for the INIT2RTR command. Nevertheless, we've used it. Fix that by doing this policy change using INIT2RTR only if the firmware supports it, otherwise, we call UPDATE_QP command to do the task. Fixes: 7677fc9 ('net/mlx4: Strengthen VLAN tags/priorities enforcement in VST mode') Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 1 + include/linux/mlx4/qp.h | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 511c6e0d21a9..a5b7d7cfcedf 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -209,6 +209,7 @@ enum { MLX4_BMME_FLAG_TYPE_2_WIN = 1 << 9, MLX4_BMME_FLAG_RESERVED_LKEY = 1 << 10, MLX4_BMME_FLAG_FAST_REG_WR = 1 << 11, + MLX4_BMME_FLAG_VSD_INIT2RTR = 1 << 28, }; enum mlx4_event { diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 7040dc98ff8b..5f4e36cf0091 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -56,7 +56,8 @@ enum mlx4_qp_optpar { MLX4_QP_OPTPAR_RNR_RETRY = 1 << 13, MLX4_QP_OPTPAR_ACK_TIMEOUT = 1 << 14, MLX4_QP_OPTPAR_SCHED_QUEUE = 1 << 16, - MLX4_QP_OPTPAR_COUNTER_INDEX = 1 << 20 + MLX4_QP_OPTPAR_COUNTER_INDEX = 1 << 20, + MLX4_QP_OPTPAR_VLAN_STRIPPING = 1 << 21, }; enum mlx4_qp_state { @@ -423,13 +424,20 @@ struct mlx4_wqe_inline_seg { enum mlx4_update_qp_attr { MLX4_UPDATE_QP_SMAC = 1 << 0, + MLX4_UPDATE_QP_VSD = 1 << 2, + MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 2) - 1 +}; + +enum mlx4_update_qp_params_flags { + MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE = 1 << 0, }; struct mlx4_update_qp_params { u8 smac_index; + u32 flags; }; -int mlx4_update_qp(struct mlx4_dev *dev, struct mlx4_qp *qp, +int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn, enum mlx4_update_qp_attr attr, struct mlx4_update_qp_params *params); int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, -- cgit v1.2.3-70-g09d2 From 381f4dca48d23e155b936b86ccd3ff12f073cf0f Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 10 Sep 2014 23:23:02 +0200 Subject: ipv6: clean up anycast when an interface is destroyed If we try to rmmod the driver for an interface while sockets with setsockopt(JOIN_ANYCAST) are alive, some refcounts aren't cleaned up and we get stuck on: unregister_netdevice: waiting for ens3 to become free. Usage count = 1 If we LEAVE_ANYCAST/close everything before rmmod'ing, there is no problem. We need to perform a cleanup similar to the one for multicast in addrconf_ifdown(how == 1). Signed-off-by: Sabrina Dubroca Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/addrconf.h | 1 + net/ipv6/addrconf.c | 8 +++++--- net/ipv6/anycast.c | 21 +++++++++++++++++++++ 3 files changed, 27 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index f679877bb601..ec51e673b4b6 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -204,6 +204,7 @@ void ipv6_sock_ac_close(struct sock *sk); int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr); int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr); +void ipv6_ac_destroy_dev(struct inet6_dev *idev); bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr); bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index fc1fac2a0528..3342ee64f2e3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3094,11 +3094,13 @@ static int addrconf_ifdown(struct net_device *dev, int how) write_unlock_bh(&idev->lock); - /* Step 5: Discard multicast list */ - if (how) + /* Step 5: Discard anycast and multicast list */ + if (how) { + ipv6_ac_destroy_dev(idev); ipv6_mc_destroy_dev(idev); - else + } else { ipv6_mc_down(idev); + } idev->tstamp = jiffies; diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index ff2de7d9d8e6..9a386842fd62 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -351,6 +351,27 @@ static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr) return __ipv6_dev_ac_dec(idev, addr); } +void ipv6_ac_destroy_dev(struct inet6_dev *idev) +{ + struct ifacaddr6 *aca; + + write_lock_bh(&idev->lock); + while ((aca = idev->ac_list) != NULL) { + idev->ac_list = aca->aca_next; + write_unlock_bh(&idev->lock); + + addrconf_leave_solict(idev, &aca->aca_addr); + + dst_hold(&aca->aca_rt->dst); + ip6_del_rt(aca->aca_rt); + + aca_put(aca); + + write_lock_bh(&idev->lock); + } + write_unlock_bh(&idev->lock); +} + /* * check if the interface has this anycast address * called with rcu_read_lock() -- cgit v1.2.3-70-g09d2 From f92ee61982d6da15a9e49664ecd6405a15a2ee56 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 16 Sep 2014 10:08:40 +0200 Subject: xfrm: Generate blackhole routes only from route lookup functions Currently we genarate a blackhole route route whenever we have matching policies but can not resolve the states. Here we assume that dst_output() is called to kill the balckholed packets. Unfortunately this assumption is not true in all cases, so it is possible that these packets leave the system unwanted. We fix this by generating blackhole routes only from the route lookup functions, here we can guarantee a call to dst_output() afterwards. Fixes: 2774c131b1d ("xfrm: Handle blackhole route creation via afinfo.") Reported-by: Konstantinos Kolelis Signed-off-by: Steffen Klassert --- include/net/dst.h | 15 ++++++++++++++- net/ipv4/route.c | 6 +++--- net/ipv6/ip6_output.c | 4 ++-- net/xfrm/xfrm_policy.c | 18 +++++++++++++++++- 4 files changed, 36 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index 71c60f42be48..fa11c904d219 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -490,7 +490,16 @@ static inline struct dst_entry *xfrm_lookup(struct net *net, int flags) { return dst_orig; -} +} + +static inline struct dst_entry *xfrm_lookup_route(struct net *net, + struct dst_entry *dst_orig, + const struct flowi *fl, + struct sock *sk, + int flags) +{ + return dst_orig; +} static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst) { @@ -502,6 +511,10 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, const struct flowi *fl, struct sock *sk, int flags); +struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, + const struct flowi *fl, struct sock *sk, + int flags); + /* skb attached with this dst needs transformation if dst->xfrm is valid */ static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index eaa4b000c7b4..173e7ea54c70 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2265,9 +2265,9 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, return rt; if (flp4->flowi4_proto) - rt = (struct rtable *) xfrm_lookup(net, &rt->dst, - flowi4_to_flowi(flp4), - sk, 0); + rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst, + flowi4_to_flowi(flp4), + sk, 0); return rt; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 315a55d66079..0a3448b2888f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1009,7 +1009,7 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, if (final_dst) fl6->daddr = *final_dst; - return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); + return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); @@ -1041,7 +1041,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, if (final_dst) fl6->daddr = *final_dst; - return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); + return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index beeed602aeb3..7505674c9faa 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2138,7 +2138,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, xfrm_pols_put(pols, drop_pols); XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); - return make_blackhole(net, family, dst_orig); + return ERR_PTR(-EREMOTE); } err = -EAGAIN; @@ -2195,6 +2195,22 @@ dropdst: } EXPORT_SYMBOL(xfrm_lookup); +/* Callers of xfrm_lookup_route() must ensure a call to dst_output(). + * Otherwise we may send out blackholed packets. + */ +struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, + const struct flowi *fl, + struct sock *sk, int flags) +{ + struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk, flags); + + if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE) + return make_blackhole(net, dst_orig->ops->family, dst_orig); + + return dst; +} +EXPORT_SYMBOL(xfrm_lookup_route); + static inline int xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl) { -- cgit v1.2.3-70-g09d2 From b8c203b2d2fc961bafd53b41d5396bbcdec55998 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 16 Sep 2014 10:08:49 +0200 Subject: xfrm: Generate queueing routes only from route lookup functions Currently we genarate a queueing route if we have matching policies but can not resolve the states and the sysctl xfrm_larval_drop is disabled. Here we assume that dst_output() is called to kill the queued packets. Unfortunately this assumption is not true in all cases, so it is possible that these packets leave the system unwanted. We fix this by generating queueing routes only from the route lookup functions, here we can guarantee a call to dst_output() afterwards. Fixes: a0073fe18e71 ("xfrm: Add a state resolution packet queue") Reported-by: Konstantinos Kolelis Signed-off-by: Steffen Klassert --- include/net/dst.h | 1 + net/xfrm/xfrm_policy.c | 32 ++++++++++++++++++++++++-------- 2 files changed, 25 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index fa11c904d219..a8ae4e760778 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -480,6 +480,7 @@ void dst_init(void); /* Flags for xfrm_lookup flags argument. */ enum { XFRM_LOOKUP_ICMP = 1 << 0, + XFRM_LOOKUP_QUEUE = 1 << 1, }; struct flowi; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 7505674c9faa..fdde51f4271a 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -39,6 +39,11 @@ #define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ)) #define XFRM_MAX_QUEUE_LEN 100 +struct xfrm_flo { + struct dst_entry *dst_orig; + u8 flags; +}; + static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] __read_mostly; @@ -1877,13 +1882,14 @@ static int xdst_queue_output(struct sock *sk, struct sk_buff *skb) } static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, - struct dst_entry *dst, + struct xfrm_flo *xflo, const struct flowi *fl, int num_xfrms, u16 family) { int err; struct net_device *dev; + struct dst_entry *dst; struct dst_entry *dst1; struct xfrm_dst *xdst; @@ -1891,9 +1897,12 @@ static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, if (IS_ERR(xdst)) return xdst; - if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0) + if (!(xflo->flags & XFRM_LOOKUP_QUEUE) || + net->xfrm.sysctl_larval_drop || + num_xfrms <= 0) return xdst; + dst = xflo->dst_orig; dst1 = &xdst->u.dst; dst_hold(dst); xdst->route = dst; @@ -1935,7 +1944,7 @@ static struct flow_cache_object * xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct flow_cache_object *oldflo, void *ctx) { - struct dst_entry *dst_orig = (struct dst_entry *)ctx; + struct xfrm_flo *xflo = (struct xfrm_flo *)ctx; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; struct xfrm_dst *xdst, *new_xdst; int num_pols = 0, num_xfrms = 0, i, err, pol_dead; @@ -1976,7 +1985,8 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, goto make_dummy_bundle; } - new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig); + new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, + xflo->dst_orig); if (IS_ERR(new_xdst)) { err = PTR_ERR(new_xdst); if (err != -EAGAIN) @@ -2010,7 +2020,7 @@ make_dummy_bundle: /* We found policies, but there's no bundles to instantiate: * either because the policy blocks, has no transformations or * we could not build template (no xfrm_states).*/ - xdst = xfrm_create_dummy_bundle(net, dst_orig, fl, num_xfrms, family); + xdst = xfrm_create_dummy_bundle(net, xflo, fl, num_xfrms, family); if (IS_ERR(xdst)) { xfrm_pols_put(pols, num_pols); return ERR_CAST(xdst); @@ -2104,13 +2114,18 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, } if (xdst == NULL) { + struct xfrm_flo xflo; + + xflo.dst_orig = dst_orig; + xflo.flags = flags; + /* To accelerate a bit... */ if ((dst_orig->flags & DST_NOXFRM) || !net->xfrm.policy_count[XFRM_POLICY_OUT]) goto nopol; flo = flow_cache_lookup(net, fl, family, dir, - xfrm_bundle_lookup, dst_orig); + xfrm_bundle_lookup, &xflo); if (flo == NULL) goto nopol; if (IS_ERR(flo)) { @@ -2202,7 +2217,8 @@ struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, const struct flowi *fl, struct sock *sk, int flags) { - struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk, flags); + struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk, + flags | XFRM_LOOKUP_QUEUE); if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE) return make_blackhole(net, dst_orig->ops->family, dst_orig); @@ -2476,7 +2492,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) skb_dst_force(skb); - dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0); + dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE); if (IS_ERR(dst)) { res = 0; dst = NULL; -- cgit v1.2.3-70-g09d2 From 0d566379c5e15a2922dc6bb2ee6a4b7f7a3a0786 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 18 Sep 2014 10:31:03 +0200 Subject: genetlink: add function genl_has_listeners() This function is the counterpart of the function netlink_has_listeners(). Signed-off-by: Nicolas Dichtel Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/net/genetlink.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 93695f0e22a5..af10c2cf8a1d 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -394,4 +394,12 @@ static inline int genl_set_err(struct genl_family *family, struct net *net, return netlink_set_err(net->genl_sock, portid, group, code); } +static inline int genl_has_listeners(struct genl_family *family, + struct sock *sk, unsigned int group) +{ + if (WARN_ON_ONCE(group >= family->n_mcgrps)) + return -EINVAL; + group = family->mcgrp_offset + group; + return netlink_has_listeners(sk, group); +} #endif /* __NET_GENERIC_NETLINK_H */ -- cgit v1.2.3-70-g09d2 From 257117862634d89de33fec74858b1a0ba5ab444b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Sep 2014 08:02:05 -0700 Subject: net: sched: shrink struct qdisc_skb_cb to 28 bytes We cannot make struct qdisc_skb_cb bigger without impacting IPoIB, or increasing skb->cb[] size. Commit e0f31d849867 ("flow_keys: Record IP layer protocol in skb_flow_dissect()") broke IPoIB. Only current offender is sch_choke, and this one do not need an absolutely precise flow key. If we store 17 bytes of flow key, its more than enough. (Its the actual size of flow_keys if it was a packed structure, but we might add new fields at the end of it later) Signed-off-by: Eric Dumazet Fixes: e0f31d849867 ("flow_keys: Record IP layer protocol in skb_flow_dissect()") Signed-off-by: David S. Miller --- include/net/sch_generic.h | 3 ++- net/sched/sch_choke.c | 18 ++++++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index a3cfb8ebeb53..620e086c0cbe 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -231,7 +231,8 @@ struct qdisc_skb_cb { unsigned int pkt_len; u16 slave_dev_queue_mapping; u16 _pad; - unsigned char data[24]; +#define QDISC_CB_PRIV_LEN 20 + unsigned char data[QDISC_CB_PRIV_LEN]; }; static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index ed30e436128b..fb666d1e4de3 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -133,10 +133,16 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx) --sch->q.qlen; } +/* private part of skb->cb[] that a qdisc is allowed to use + * is limited to QDISC_CB_PRIV_LEN bytes. + * As a flow key might be too large, we store a part of it only. + */ +#define CHOKE_K_LEN min_t(u32, sizeof(struct flow_keys), QDISC_CB_PRIV_LEN - 3) + struct choke_skb_cb { u16 classid; u8 keys_valid; - struct flow_keys keys; + u8 keys[QDISC_CB_PRIV_LEN - 3]; }; static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb) @@ -163,22 +169,26 @@ static u16 choke_get_classid(const struct sk_buff *skb) static bool choke_match_flow(struct sk_buff *skb1, struct sk_buff *skb2) { + struct flow_keys temp; + if (skb1->protocol != skb2->protocol) return false; if (!choke_skb_cb(skb1)->keys_valid) { choke_skb_cb(skb1)->keys_valid = 1; - skb_flow_dissect(skb1, &choke_skb_cb(skb1)->keys); + skb_flow_dissect(skb1, &temp); + memcpy(&choke_skb_cb(skb1)->keys, &temp, CHOKE_K_LEN); } if (!choke_skb_cb(skb2)->keys_valid) { choke_skb_cb(skb2)->keys_valid = 1; - skb_flow_dissect(skb2, &choke_skb_cb(skb2)->keys); + skb_flow_dissect(skb2, &temp); + memcpy(&choke_skb_cb(skb2)->keys, &temp, CHOKE_K_LEN); } return !memcmp(&choke_skb_cb(skb1)->keys, &choke_skb_cb(skb2)->keys, - sizeof(struct flow_keys)); + CHOKE_K_LEN); } /* -- cgit v1.2.3-70-g09d2