From b81f884a547b5c264c13fdfaa3b65cf994bf1dcf Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 1 Jun 2017 14:57:56 +0800 Subject: xfrm: fix xfrm_dev_event() missing when compile without CONFIG_XFRM_OFFLOAD In commit d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API") we make xfrm_device.o only compiled when enable option CONFIG_XFRM_OFFLOAD. But this will make xfrm_dev_event() missing if we only enable default XFRM options. Then if we set down and unregister an interface with IPsec on it. there will no xfrm_garbage_collect(), which will cause dev usage count hold and get error like: unregister_netdevice: waiting for to become free. Usage count = 4 Fixes: d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API") Signed-off-by: Hangbin Liu Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 7 ++----- net/xfrm/Makefile | 3 +-- net/xfrm/xfrm_device.c | 2 ++ 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 7e7e2b0d2915..62f5a259e597 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1850,8 +1850,9 @@ static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb) } #endif -#ifdef CONFIG_XFRM_OFFLOAD void __net_init xfrm_dev_init(void); + +#ifdef CONFIG_XFRM_OFFLOAD int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features); int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo); @@ -1877,10 +1878,6 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x) } } #else -static inline void __net_init xfrm_dev_init(void) -{ -} - static inline int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features) { return 0; diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index abf81b329dc1..55b2ac300995 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -4,8 +4,7 @@ obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ xfrm_input.o xfrm_output.o \ - xfrm_sysctl.o xfrm_replay.o -obj-$(CONFIG_XFRM_OFFLOAD) += xfrm_device.o + xfrm_sysctl.o xfrm_replay.o xfrm_device.o obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 574e6f32f94f..5aba03685d7d 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -22,6 +22,7 @@ #include #include +#ifdef CONFIG_XFRM_OFFLOAD int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features) { int err; @@ -137,6 +138,7 @@ ok: return true; } EXPORT_SYMBOL_GPL(xfrm_dev_offload_ok); +#endif int xfrm_dev_register(struct net_device *dev) { -- cgit v1.2.3-70-g09d2 From 138437f591dd9a42d53c6fed1a3c85e02678851c Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sun, 11 Jun 2017 09:44:20 +0800 Subject: xfrm: move xfrm_garbage_collect out of xfrm_policy_flush Now we will force to do garbage collection if any policy removed in xfrm_policy_flush(). But during xfrm_net_exit(). We call flow_cache_fini() first and set set fc->percpu to NULL. Then after we call xfrm_policy_fini() -> frxm_policy_flush() -> flow_cache_flush(), we will get NULL pointer dereference when check percpu_empty. The code path looks like: flow_cache_fini() - fc->percpu = NULL xfrm_policy_fini() - xfrm_policy_flush() - xfrm_garbage_collect() - flow_cache_flush() - flow_cache_percpu_empty() - fcp = per_cpu_ptr(fc->percpu, cpu) To reproduce, just add ipsec in netns and then remove the netns. v2: As Xin Long suggested, since only two other places need to call it. move xfrm_garbage_collect() outside xfrm_policy_flush(). v3: Fix subject mismatch after v2 fix. Fixes: 35db06912189 ("xfrm: do the garbage collection after flushing policy") Signed-off-by: Hangbin Liu Reviewed-by: Xin Long Signed-off-by: Steffen Klassert --- net/key/af_key.c | 2 ++ net/xfrm/xfrm_policy.c | 4 ---- net/xfrm/xfrm_user.c | 1 + 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index 512dc43d0ce6..5103f92e2eb0 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2755,6 +2755,8 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad int err, err2; err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true); + if (!err) + xfrm_garbage_collect(net); err2 = unicast_flush_resp(sk, hdr); if (err || err2) { if (err == -ESRCH) /* empty table - old silent behavior */ diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ed4e52d95172..643a18f72032 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1006,10 +1006,6 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) err = -ESRCH; out: spin_unlock_bh(&net->xfrm.xfrm_policy_lock); - - if (cnt) - xfrm_garbage_collect(net); - return err; } EXPORT_SYMBOL(xfrm_policy_flush); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 38614df33ec8..86116e9aaf3d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2027,6 +2027,7 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; return err; } + xfrm_garbage_collect(net); c.data.type = type; c.event = nlh->nlmsg_type; -- cgit v1.2.3-70-g09d2 From 1e3d0c2c70cd3edb5deed186c5f5c75f2b84a633 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 14 Jun 2017 13:34:05 +0300 Subject: xfrm: Oops on error in pfkey_msg2xfrm_state() There are some missing error codes here so we accidentally return NULL instead of an error pointer. It results in a NULL pointer dereference. Fixes: df71837d5024 ("[LSM-IPSec]: Security association restriction.") Signed-off-by: Dan Carpenter Signed-off-by: Steffen Klassert --- net/key/af_key.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index 5103f92e2eb0..2b82adac5d8e 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1168,8 +1168,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, if (key) keysize = (key->sadb_key_bits + 7) / 8; x->aalg = kmalloc(sizeof(*x->aalg) + keysize, GFP_KERNEL); - if (!x->aalg) + if (!x->aalg) { + err = -ENOMEM; goto out; + } strcpy(x->aalg->alg_name, a->name); x->aalg->alg_key_len = 0; if (key) { @@ -1188,8 +1190,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, goto out; } x->calg = kmalloc(sizeof(*x->calg), GFP_KERNEL); - if (!x->calg) + if (!x->calg) { + err = -ENOMEM; goto out; + } strcpy(x->calg->alg_name, a->name); x->props.calgo = sa->sadb_sa_encrypt; } else { @@ -1203,8 +1207,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, if (key) keysize = (key->sadb_key_bits + 7) / 8; x->ealg = kmalloc(sizeof(*x->ealg) + keysize, GFP_KERNEL); - if (!x->ealg) + if (!x->ealg) { + err = -ENOMEM; goto out; + } strcpy(x->ealg->alg_name, a->name); x->ealg->alg_key_len = 0; if (key) { @@ -1249,8 +1255,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, struct xfrm_encap_tmpl *natt; x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL); - if (!x->encap) + if (!x->encap) { + err = -ENOMEM; goto out; + } natt = x->encap; n_type = ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]; -- cgit v1.2.3-70-g09d2 From e747f64336fc15e1c823344942923195b800aa1e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 14 Jun 2017 13:35:37 +0300 Subject: xfrm: NULL dereference on allocation failure The default error code in pfkey_msg2xfrm_state() is -ENOBUFS. We added a new call to security_xfrm_state_alloc() which sets "err" to zero so there several places where we can return ERR_PTR(0) if kmalloc() fails. The caller is expecting error pointers so it leads to a NULL dereference. Fixes: df71837d5024 ("[LSM-IPSec]: Security association restriction.") Signed-off-by: Dan Carpenter Signed-off-by: Steffen Klassert --- net/key/af_key.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/key/af_key.c b/net/key/af_key.c index 2b82adac5d8e..b1432b668033 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1157,6 +1157,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, goto out; } + err = -ENOBUFS; key = ext_hdrs[SADB_EXT_KEY_AUTH - 1]; if (sa->sadb_sa_auth) { int keysize = 0; -- cgit v1.2.3-70-g09d2 From 7c88e21aefcf86fb41b48b2e04528db5a30fbe18 Mon Sep 17 00:00:00 2001 From: Yossi Kuperman Date: Thu, 22 Jun 2017 11:37:10 +0300 Subject: xfrm6: Fix IPv6 payload_len in xfrm6_transport_finish IPv6 payload length indicates the size of the payload, including any extension headers. In xfrm6_transport_finish, ipv6_hdr(skb)->payload_len is set to the payload size only, regardless of the presence of any extension headers. After ESP GRO transport mode decapsulation, ipv6_rcv trims the packet according to the wrong payload_len, thus corrupting the packet. Set payload_len to account for extension headers as well. Fixes: 7785bba299a8 ("esp: Add a software GRO codepath") Signed-off-by: Yossi Kuperman Signed-off-by: Steffen Klassert --- net/ipv6/xfrm6_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 08a807b29298..3ef5d913e7a3 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -43,8 +43,8 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) return 1; #endif - ipv6_hdr(skb)->payload_len = htons(skb->len); __skb_push(skb, skb->data - skb_network_header(skb)); + ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); if (xo && (xo->flags & XFRM_GRO)) { skb_mac_header_rebuild(skb); -- cgit v1.2.3-70-g09d2 From ca3a1b856636f596c691ab5b3764045a142186db Mon Sep 17 00:00:00 2001 From: Yossi Kuperman Date: Thu, 22 Jun 2017 11:37:11 +0300 Subject: esp6_offload: Fix IP6CB(skb)->nhoff for ESP GRO IP6CB(skb)->nhoff is the offset of the nexthdr field in an IPv6 header, unless there are extension headers present, in which case nhoff points to the nexthdr field of the last extension header. In non-GRO code path, nhoff is set by ipv6_rcv before any XFRM code is executed. Conversely, in GRO code path (when esp6_offload is loaded), nhoff is not set. The following functions fail to read the correct value and eventually the packet is dropped: xfrm6_transport_finish xfrm6_tunnel_input xfrm6_rcv_tnl Set nhoff to the proper offset of nexthdr in esp6_gro_receive. Fixes: 7785bba299a8 ("esp: Add a software GRO codepath") Signed-off-by: Yossi Kuperman Signed-off-by: Steffen Klassert --- net/ipv6/esp6_offload.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index d950d43ba255..f02f131f6435 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -30,6 +30,25 @@ #include #include +static __u16 esp6_nexthdr_esp_offset(struct ipv6hdr *ipv6_hdr, int nhlen) +{ + int off = sizeof(struct ipv6hdr); + struct ipv6_opt_hdr *exthdr; + + if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP)) + return offsetof(struct ipv6hdr, nexthdr); + + while (off < nhlen) { + exthdr = (void *)ipv6_hdr + off; + if (exthdr->nexthdr == NEXTHDR_ESP) + return off; + + off += ipv6_optlen(exthdr); + } + + return 0; +} + static struct sk_buff **esp6_gro_receive(struct sk_buff **head, struct sk_buff *skb) { @@ -38,6 +57,7 @@ static struct sk_buff **esp6_gro_receive(struct sk_buff **head, struct xfrm_state *x; __be32 seq; __be32 spi; + int nhoff; int err; skb_pull(skb, offset); @@ -72,6 +92,11 @@ static struct sk_buff **esp6_gro_receive(struct sk_buff **head, xo->flags |= XFRM_GRO; + nhoff = esp6_nexthdr_esp_offset(ipv6_hdr(skb), offset); + if (!nhoff) + goto out; + + IP6CB(skb)->nhoff = nhoff; XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; XFRM_SPI_SKB_CB(skb)->family = AF_INET6; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); -- cgit v1.2.3-70-g09d2 From b866203d872d5deeafcecd25ea429d6748b5bd56 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Tue, 20 Jun 2017 12:48:11 -0500 Subject: net/phy: micrel: configure intterupts after autoneg workaround The commit ("net/phy: micrel: Add workaround for bad autoneg") fixes an autoneg failure case by resetting the hardware. This turns off intterupts. Things will work themselves out if the phy polls, as it will figure out it's state during a poll. However if the phy uses only intterupts, the phy will stall, since interrupts are off. This patch fixes the issue by calling config_intr after resetting the phy. Fixes: d2fd719bcb0e ("net/phy: micrel: Add workaround for bad autoneg ") Signed-off-by: Zach Brown Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index b9252b8d81ff..8b2038844ba9 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -619,6 +619,8 @@ static int ksz9031_read_status(struct phy_device *phydev) if ((regval & 0xFF) == 0xFF) { phy_init_hw(phydev); phydev->link = 0; + if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev)) + phydev->drv->config_intr(phydev); } return 0; -- cgit v1.2.3-70-g09d2 From 76da0704507bbc51875013f6557877ab308cfd0a Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 20 Jun 2017 11:42:27 -0700 Subject: ipv6: only call ip6_route_dev_notify() once for NETDEV_UNREGISTER In commit 242d3a49a2a1 ("ipv6: reorder ip6_route_dev_notifier after ipv6_dev_notf") I assumed NETDEV_REGISTER and NETDEV_UNREGISTER are paired, unfortunately, as reported by jeffy, netdev_wait_allrefs() could rebroadcast NETDEV_UNREGISTER event until all refs are gone. We have to add an additional check to avoid this corner case. For netdev_wait_allrefs() dev->reg_state is NETREG_UNREGISTERED, for dev_change_net_namespace(), dev->reg_state is NETREG_REGISTERED. So check for dev->reg_state != NETREG_UNREGISTERED. Fixes: 242d3a49a2a1 ("ipv6: reorder ip6_route_dev_notifier after ipv6_dev_notf") Reported-by: jeffy Cc: David Ahern Signed-off-by: Cong Wang Acked-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7cebd954d5bb..322bd62e688b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3722,7 +3722,11 @@ static int ip6_route_dev_notify(struct notifier_block *this, net->ipv6.ip6_blk_hole_entry->dst.dev = dev; net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); #endif - } else if (event == NETDEV_UNREGISTER) { + } else if (event == NETDEV_UNREGISTER && + dev->reg_state != NETREG_UNREGISTERED) { + /* NETDEV_UNREGISTER could be fired for multiple times by + * netdev_wait_allrefs(). Make sure we only call this once. + */ in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev); -- cgit v1.2.3-70-g09d2 From b88ff4f8c9ef0279f82bca0a83ed860b654f0f8d Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Wed, 21 Jun 2017 14:12:04 +0530 Subject: drivers: net: cpsw-common: Fix reading of mac address for am43 SoCs cpsw driver tries to get macid for am43xx SoCs using the compatible ti,am4372. But not all variants of am43x uses this complatible like epos evm uses ti,am438x. So use a generic compatible ti,am43 to get macid for all am43 based platforms. Reviewed-by: Dave Gerlach Signed-off-by: Lokesh Vutla Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/cpsw-common.c b/drivers/net/ethernet/ti/cpsw-common.c index 1562ab4151e1..56ba411421f0 100644 --- a/drivers/net/ethernet/ti/cpsw-common.c +++ b/drivers/net/ethernet/ti/cpsw-common.c @@ -90,7 +90,7 @@ int ti_cm_get_macid(struct device *dev, int slave, u8 *mac_addr) if (of_device_is_compatible(dev->of_node, "ti,dm816-emac")) return cpsw_am33xx_cm_get_macid(dev, 0x30, slave, mac_addr); - if (of_machine_is_compatible("ti,am4372")) + if (of_machine_is_compatible("ti,am43")) return cpsw_am33xx_cm_get_macid(dev, 0x630, slave, mac_addr); if (of_machine_is_compatible("ti,dra7")) -- cgit v1.2.3-70-g09d2 From 191cdb3822e5df6b3c8b9f8cb8c4bf93f6cc90c7 Mon Sep 17 00:00:00 2001 From: Serhey Popovych Date: Wed, 21 Jun 2017 12:12:24 +0300 Subject: veth: Be more robust on network device creation when no attributes There are number of problems with configuration peer network device in absence of IFLA_VETH_PEER attributes where attributes for main network device shared with peer. First it is not feasible to configure both network devices with same MAC address since this makes communication in such configuration problematic. This case can be reproduced with following sequence: # ip link add address 02:11:22:33:44:55 type veth # ip li sh ... 26: veth0@veth1: mtu 1500 qdisc \ noop state DOWN mode DEFAULT qlen 1000 link/ether 00:11:22:33:44:55 brd ff:ff:ff:ff:ff:ff 27: veth1@veth0: mtu 1500 qdisc \ noop state DOWN mode DEFAULT qlen 1000 link/ether 00:11:22:33:44:55 brd ff:ff:ff:ff:ff:ff Second it is not possible to register both main and peer network devices with same name, that happens when name for main interface is given with IFLA_IFNAME and same attribute reused for peer. This case can be reproduced with following sequence: # ip link add dev veth1a type veth RTNETLINK answers: File exists To fix both of the cases check if corresponding netlink attributes are taken from peer_tb when valid or name based on rtnl ops kind and random address is used. Signed-off-by: Serhey Popovych Signed-off-by: David S. Miller --- drivers/net/veth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 0156fe8cac17..364fa9d11d1a 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -383,7 +383,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev, tbp = tb; } - if (tbp[IFLA_IFNAME]) { + if (ifmp && tbp[IFLA_IFNAME]) { nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); name_assign_type = NET_NAME_USER; } else { @@ -402,7 +402,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev, return PTR_ERR(peer); } - if (tbp[IFLA_ADDRESS] == NULL) + if (!ifmp || !tbp[IFLA_ADDRESS]) eth_hw_addr_random(peer); if (ifmp && (dev->ifindex != 0)) -- cgit v1.2.3-70-g09d2 From dfa523ae9f2542bee4cddaea37b3be3e157f6e6b Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Wed, 21 Jun 2017 10:21:22 +0100 Subject: xen-netback: correctly schedule rate-limited queues Add a flag to indicate if a queue is rate-limited. Test the flag in NAPI poll handler and avoid rescheduling the queue if true, otherwise we risk locking up the host. The rescheduling will be done in the timer callback function. Reported-by: Jean-Louis Dupond Signed-off-by: Wei Liu Tested-by: Jean-Louis Dupond Reviewed-by: Paul Durrant Signed-off-by: David S. Miller --- drivers/net/xen-netback/common.h | 1 + drivers/net/xen-netback/interface.c | 6 +++++- drivers/net/xen-netback/netback.c | 6 +++++- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 530586be05b4..5b1d2e8402d9 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -199,6 +199,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */ unsigned long remaining_credit; struct timer_list credit_timeout; u64 credit_window_start; + bool rate_limited; /* Statistics */ struct xenvif_stats stats; diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 8397f6c92451..e322a862ddfe 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -106,7 +106,11 @@ static int xenvif_poll(struct napi_struct *napi, int budget) if (work_done < budget) { napi_complete_done(napi, work_done); - xenvif_napi_schedule_or_enable_events(queue); + /* If the queue is rate-limited, it shall be + * rescheduled in the timer callback. + */ + if (likely(!queue->rate_limited)) + xenvif_napi_schedule_or_enable_events(queue); } return work_done; diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 602d408fa25e..5042ff8d449a 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -180,6 +180,7 @@ static void tx_add_credit(struct xenvif_queue *queue) max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */ queue->remaining_credit = min(max_credit, max_burst); + queue->rate_limited = false; } void xenvif_tx_credit_callback(unsigned long data) @@ -686,8 +687,10 @@ static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size) msecs_to_jiffies(queue->credit_usec / 1000); /* Timer could already be pending in rare cases. */ - if (timer_pending(&queue->credit_timeout)) + if (timer_pending(&queue->credit_timeout)) { + queue->rate_limited = true; return true; + } /* Passed the point where we can replenish credit? */ if (time_after_eq64(now, next_credit)) { @@ -702,6 +705,7 @@ static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size) mod_timer(&queue->credit_timeout, next_credit); queue->credit_window_start = next_credit; + queue->rate_limited = true; return true; } -- cgit v1.2.3-70-g09d2 From e26f43faa0d79dd06e9e94829696b68b9940c2ee Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 21 Jun 2017 07:59:16 -0400 Subject: macvlan: Do not return error when setting the same mac address The user currently gets an EBUSY error when attempting to set the mac address on a macvlan device to the same value. This should really be a no-op as nothing changes. Catch the condition and return early. Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 67bf7ebae5c6..de214fbda173 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -703,6 +703,10 @@ static int macvlan_set_mac_address(struct net_device *dev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; + /* If the addresses are the same, this is a no-op */ + if (ether_addr_equal(dev->dev_addr, addr->sa_data)) + return 0; + if (vlan->mode == MACVLAN_MODE_PASSTHRU) { dev_set_mac_address(vlan->lowerdev, addr); return 0; -- cgit v1.2.3-70-g09d2 From e696cda7bd091411705a4e868ce480eaa3082dbf Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 21 Jun 2017 07:59:17 -0400 Subject: macvlan: Fix passthru macvlan mac address inheritance When a lower device of the passthru macvlan changes it's address, passthru macvlan is supposed to change it's own address as well. However, that doesn't happen correctly because the check in macvlan_addr_busy() will catch the fact that the lower level (port) mac address is the same as the address we are trying to assign to the macvlan, and return an error. As a reasult, the address of the passthru macvlan device is never changed. The same thing happens when the user attempts to change the mac address of the passthru macvlan. The simple solution appers to be to not check against the lower device in case of passthru macvlan device, since the 2 addresses are _supposed_ to be the same. Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index de214fbda173..0c9c6da3427a 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -185,7 +185,8 @@ static bool macvlan_addr_busy(const struct macvlan_port *port, * currently in use by the underlying device or * another macvlan. */ - if (ether_addr_equal_64bits(port->dev->dev_addr, addr)) + if (!port->passthru && + ether_addr_equal_64bits(port->dev->dev_addr, addr)) return true; if (macvlan_hash_lookup(port, addr)) -- cgit v1.2.3-70-g09d2 From 43c2d578a0ce0d6067a02b46461811aced551425 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 21 Jun 2017 07:59:18 -0400 Subject: macvlan: convert port passthru to flags. Convert the port passthru boolean into flags with accesor functions. Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 0c9c6da3427a..4d013ca79dae 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -39,13 +39,15 @@ #define MACVLAN_HASH_SIZE (1<flags & MACVLAN_F_PASSTHRU; +} + +static inline void macvlan_set_passthru(struct macvlan_port *port) +{ + port->flags |= MACVLAN_F_PASSTHRU; +} + /* Hash Ethernet address */ static u32 macvlan_eth_hash(const unsigned char *addr) { @@ -185,7 +197,7 @@ static bool macvlan_addr_busy(const struct macvlan_port *port, * currently in use by the underlying device or * another macvlan. */ - if (!port->passthru && + if (!macvlan_passthru(port) && ether_addr_equal_64bits(port->dev->dev_addr, addr)) return true; @@ -446,7 +458,7 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb) } macvlan_forward_source(skb, port, eth->h_source); - if (port->passthru) + if (macvlan_passthru(port)) vlan = list_first_or_null_rcu(&port->vlans, struct macvlan_dev, list); else @@ -575,7 +587,7 @@ static int macvlan_open(struct net_device *dev) struct net_device *lowerdev = vlan->lowerdev; int err; - if (vlan->port->passthru) { + if (macvlan_passthru(vlan->port)) { if (!(vlan->flags & MACVLAN_FLAG_NOPROMISC)) { err = dev_set_promiscuity(lowerdev, 1); if (err < 0) @@ -650,7 +662,7 @@ static int macvlan_stop(struct net_device *dev) dev_uc_unsync(lowerdev, dev); dev_mc_unsync(lowerdev, dev); - if (vlan->port->passthru) { + if (macvlan_passthru(vlan->port)) { if (!(vlan->flags & MACVLAN_FLAG_NOPROMISC)) dev_set_promiscuity(lowerdev, -1); goto hash_del; @@ -683,7 +695,7 @@ static int macvlan_sync_address(struct net_device *dev, unsigned char *addr) if (macvlan_addr_busy(vlan->port, addr)) return -EBUSY; - if (!vlan->port->passthru) { + if (!macvlan_passthru(vlan->port)) { err = dev_uc_add(lowerdev, addr); if (err) return err; @@ -933,7 +945,7 @@ static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], /* Support unicast filter only on passthru devices. * Multicast filter should be allowed on all devices. */ - if (!vlan->port->passthru && is_unicast_ether_addr(addr)) + if (!macvlan_passthru(vlan->port) && is_unicast_ether_addr(addr)) return -EOPNOTSUPP; if (flags & NLM_F_REPLACE) @@ -957,7 +969,7 @@ static int macvlan_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], /* Support unicast filter only on passthru devices. * Multicast filter should be allowed on all devices. */ - if (!vlan->port->passthru && is_unicast_ether_addr(addr)) + if (!macvlan_passthru(vlan->port) && is_unicast_ether_addr(addr)) return -EOPNOTSUPP; if (is_unicast_ether_addr(addr)) @@ -1125,7 +1137,6 @@ static int macvlan_port_create(struct net_device *dev) if (port == NULL) return -ENOMEM; - port->passthru = false; port->dev = dev; INIT_LIST_HEAD(&port->vlans); for (i = 0; i < MACVLAN_HASH_SIZE; i++) @@ -1331,7 +1342,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, port = macvlan_port_get_rtnl(lowerdev); /* Only 1 macvlan device can be created in passthru mode */ - if (port->passthru) { + if (macvlan_passthru(port)) { /* The macvlan port must be not created this time, * still goto destroy_macvlan_port for readability. */ @@ -1357,7 +1368,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, err = -EINVAL; goto destroy_macvlan_port; } - port->passthru = true; + macvlan_set_passthru(port); eth_hw_addr_inherit(dev, lowerdev); } @@ -1439,7 +1450,7 @@ static int macvlan_changelink(struct net_device *dev, if (data && data[IFLA_MACVLAN_FLAGS]) { __u16 flags = nla_get_u16(data[IFLA_MACVLAN_FLAGS]); bool promisc = (flags ^ vlan->flags) & MACVLAN_FLAG_NOPROMISC; - if (vlan->port->passthru && promisc) { + if (macvlan_passthru(vlan->port) && promisc) { int err; if (flags & MACVLAN_FLAG_NOPROMISC) @@ -1602,7 +1613,7 @@ static int macvlan_device_event(struct notifier_block *unused, } break; case NETDEV_CHANGEADDR: - if (!port->passthru) + if (!macvlan_passthru(port)) return NOTIFY_DONE; vlan = list_first_entry_or_null(&port->vlans, -- cgit v1.2.3-70-g09d2 From 18c8c54de9a619ba5533419e0170433e20c0ee3e Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 21 Jun 2017 07:59:19 -0400 Subject: macvlan: Let passthru macvlan correctly restore lower mac address Passthru macvlans directly change the mac address of the lower level device. That's OK, but after the macvlan is deleted, the lower device is left with changed address and one needs to reboot to bring back the origina HW addresses. This scenario is actually quite common with passthru macvtap devices. This patch attempts to solve this, by storing the mac address of the lower device in macvlan_port structure and keeping track of it through the changes. After this patch, any changes to the lower device mac address done trough the macvlan device, will be reverted back. Any changs done directly to the lower device mac address will be kept. Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 47 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 4d013ca79dae..72b801803aa4 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -40,6 +40,7 @@ #define MACVLAN_BC_QUEUE_LEN 1000 #define MACVLAN_F_PASSTHRU 1 +#define MACVLAN_F_ADDRCHANGE 2 struct macvlan_port { struct net_device *dev; @@ -51,6 +52,7 @@ struct macvlan_port { int count; struct hlist_head vlan_source_hash[MACVLAN_HASH_SIZE]; DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ); + unsigned char perm_addr[ETH_ALEN]; }; struct macvlan_source_entry { @@ -78,6 +80,21 @@ static inline void macvlan_set_passthru(struct macvlan_port *port) port->flags |= MACVLAN_F_PASSTHRU; } +static inline bool macvlan_addr_change(const struct macvlan_port *port) +{ + return port->flags & MACVLAN_F_ADDRCHANGE; +} + +static inline void macvlan_set_addr_change(struct macvlan_port *port) +{ + port->flags |= MACVLAN_F_ADDRCHANGE; +} + +static inline void macvlan_clear_addr_change(struct macvlan_port *port) +{ + port->flags &= ~MACVLAN_F_ADDRCHANGE; +} + /* Hash Ethernet address */ static u32 macvlan_eth_hash(const unsigned char *addr) { @@ -193,11 +210,11 @@ static void macvlan_hash_change_addr(struct macvlan_dev *vlan, static bool macvlan_addr_busy(const struct macvlan_port *port, const unsigned char *addr) { - /* Test to see if the specified multicast address is + /* Test to see if the specified address is * currently in use by the underlying device or * another macvlan. */ - if (!macvlan_passthru(port) && + if (!macvlan_passthru(port) && !macvlan_addr_change(port) && ether_addr_equal_64bits(port->dev->dev_addr, addr)) return true; @@ -685,6 +702,7 @@ static int macvlan_sync_address(struct net_device *dev, unsigned char *addr) { struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *lowerdev = vlan->lowerdev; + struct macvlan_port *port = vlan->port; int err; if (!(dev->flags & IFF_UP)) { @@ -695,7 +713,7 @@ static int macvlan_sync_address(struct net_device *dev, unsigned char *addr) if (macvlan_addr_busy(vlan->port, addr)) return -EBUSY; - if (!macvlan_passthru(vlan->port)) { + if (!macvlan_passthru(port)) { err = dev_uc_add(lowerdev, addr); if (err) return err; @@ -705,6 +723,15 @@ static int macvlan_sync_address(struct net_device *dev, unsigned char *addr) macvlan_hash_change_addr(vlan, addr); } + if (macvlan_passthru(port) && !macvlan_addr_change(port)) { + /* Since addr_change isn't set, we are here due to lower + * device change. Save the lower-dev address so we can + * restore it later. + */ + ether_addr_copy(vlan->port->perm_addr, + lowerdev->dev_addr); + } + macvlan_clear_addr_change(port); return 0; } @@ -721,6 +748,7 @@ static int macvlan_set_mac_address(struct net_device *dev, void *p) return 0; if (vlan->mode == MACVLAN_MODE_PASSTHRU) { + macvlan_set_addr_change(vlan->port); dev_set_mac_address(vlan->lowerdev, addr); return 0; } @@ -1138,6 +1166,7 @@ static int macvlan_port_create(struct net_device *dev) return -ENOMEM; port->dev = dev; + ether_addr_copy(port->perm_addr, dev->dev_addr); INIT_LIST_HEAD(&port->vlans); for (i = 0; i < MACVLAN_HASH_SIZE; i++) INIT_HLIST_HEAD(&port->vlan_hash[i]); @@ -1177,6 +1206,18 @@ static void macvlan_port_destroy(struct net_device *dev) kfree_skb(skb); } + /* If the lower device address has been changed by passthru + * macvlan, put it back. + */ + if (macvlan_passthru(port) && + !ether_addr_equal(port->dev->dev_addr, port->perm_addr)) { + struct sockaddr sa; + + sa.sa_family = port->dev->type; + memcpy(&sa.sa_data, port->perm_addr, port->dev->addr_len); + dev_set_mac_address(port->dev, &sa); + } + kfree(port); } -- cgit v1.2.3-70-g09d2 From 60abc0be96e00ca71bac083215ac91ad2e575096 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 21 Jun 2017 14:34:58 -0700 Subject: ipv6: avoid unregistering inet6_dev for loopback The per netns loopback_dev->ip6_ptr is unregistered and set to NULL when its mtu is set to smaller than IPV6_MIN_MTU, this leads to that we could set rt->rt6i_idev NULL after a rt6_uncached_list_flush_dev() and then crash after another call. In this case we should just bring its inet6_dev down, rather than unregistering it, at least prior to commit 176c39af29bc ("netns: fix addrconf_ifdown kernel panic") we always override the case for loopback. Thanks a lot to Andrey for finding a reliable reproducer. Fixes: 176c39af29bc ("netns: fix addrconf_ifdown kernel panic") Reported-by: Andrey Konovalov Cc: Andrey Konovalov Cc: Daniel Lezcano Cc: David Ahern Signed-off-by: Cong Wang Acked-by: David Ahern Tested-by: Andrey Konovalov Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 686c92375e81..1d2dbace42ff 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3369,6 +3369,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct netdev_notifier_changeupper_info *info; struct inet6_dev *idev = __in6_dev_get(dev); + struct net *net = dev_net(dev); int run_pending = 0; int err; @@ -3384,7 +3385,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, case NETDEV_CHANGEMTU: /* if MTU under IPV6_MIN_MTU stop IPv6 on this interface. */ if (dev->mtu < IPV6_MIN_MTU) { - addrconf_ifdown(dev, 1); + addrconf_ifdown(dev, dev != net->loopback_dev); break; } @@ -3500,7 +3501,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, * IPV6_MIN_MTU stop IPv6 on this interface. */ if (dev->mtu < IPV6_MIN_MTU) - addrconf_ifdown(dev, 1); + addrconf_ifdown(dev, dev != net->loopback_dev); } break; -- cgit v1.2.3-70-g09d2 From b92b7d3312033a08cae2c879b9243c42ad7de94b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 22 Jun 2017 00:16:37 +0200 Subject: netvsc: don't access netdev->num_rx_queues directly This structure member is hidden behind CONFIG_SYSFS, and we get a build error when that is disabled: drivers/net/hyperv/netvsc_drv.c: In function 'netvsc_set_channels': drivers/net/hyperv/netvsc_drv.c:754:49: error: 'struct net_device' has no member named 'num_rx_queues'; did you mean 'num_tx_queues'? drivers/net/hyperv/netvsc_drv.c: In function 'netvsc_set_rxfh': drivers/net/hyperv/netvsc_drv.c:1181:25: error: 'struct net_device' has no member named 'num_rx_queues'; did you mean 'num_tx_queues'? As the value is only set once to the argument of alloc_netdev_mq(), we can compare against that constant directly. Fixes: ff4a44199012 ("netvsc: allow get/set of RSS indirection table") Fixes: 2b01888d1b45 ("netvsc: allow more flexible setting of number of channels") Signed-off-by: Arnd Bergmann Reviewed-by: Haiyang Zhang Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 82d6c022ca85..643c539a08ba 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -776,7 +776,7 @@ static int netvsc_set_channels(struct net_device *net, channels->rx_count || channels->tx_count || channels->other_count) return -EINVAL; - if (count > net->num_tx_queues || count > net->num_rx_queues) + if (count > net->num_tx_queues || count > VRSS_CHANNEL_MAX) return -EINVAL; if (!nvdev || nvdev->destroy) @@ -1203,7 +1203,7 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir, rndis_dev = ndev->extension; if (indir) { for (i = 0; i < ITAB_NUM; i++) - if (indir[i] >= dev->num_rx_queues) + if (indir[i] >= VRSS_CHANNEL_MAX) return -EINVAL; for (i = 0; i < ITAB_NUM; i++) -- cgit v1.2.3-70-g09d2 From bb53f4d4f5116d3dae76bb12fb16bc73771f958a Mon Sep 17 00:00:00 2001 From: Martin Habets Date: Thu, 22 Jun 2017 10:50:41 +0100 Subject: sfc: Fix MCDI command size for filter operations The 8000 series adapters uses catch-all filters for encapsulated traffic to support filtering VXLAN, NVGRE and GENEVE traffic. This new filter functionality requires a longer MCDI command. This patch increases the size of buffers on stack that were missed, which fixes a kernel panic from the stack protector. Fixes: 9b41080125176 ("sfc: insert catch-all filters for encapsulated traffic") Signed-off-by: Martin Habets Acked-by: Edward Cree Acked-by: Bert Kenward bkenward@solarflare.com Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 78efb2822b86..a8089667fc5b 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -4172,7 +4172,7 @@ found: * recipients */ if (is_mc_recip) { - MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN); + MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN); unsigned int depth, i; memset(inbuf, 0, sizeof(inbuf)); @@ -4320,7 +4320,7 @@ static int efx_ef10_filter_remove_internal(struct efx_nic *efx, efx_ef10_filter_set_entry(table, filter_idx, NULL, 0); } else { efx_mcdi_display_error(efx, MC_CMD_FILTER_OP, - MC_CMD_FILTER_OP_IN_LEN, + MC_CMD_FILTER_OP_EXT_IN_LEN, NULL, 0, rc); } } @@ -4453,7 +4453,7 @@ static s32 efx_ef10_filter_rfs_insert(struct efx_nic *efx, struct efx_filter_spec *spec) { struct efx_ef10_filter_table *table = efx->filter_state; - MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN); + MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN); struct efx_filter_spec *saved_spec; unsigned int hash, i, depth = 1; bool replacing = false; @@ -4940,7 +4940,7 @@ not_restored: static void efx_ef10_filter_table_remove(struct efx_nic *efx) { struct efx_ef10_filter_table *table = efx->filter_state; - MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN); + MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN); struct efx_filter_spec *spec; unsigned int filter_idx; int rc; -- cgit v1.2.3-70-g09d2 From a5cb659bbc1c8644efa0c3138a757a1e432a4880 Mon Sep 17 00:00:00 2001 From: Michal Kubeček Date: Mon, 19 Jun 2017 13:03:43 +0200 Subject: net: account for current skb length when deciding about UFO Our customer encountered stuck NFS writes for blocks starting at specific offsets w.r.t. page boundary caused by networking stack sending packets via UFO enabled device with wrong checksum. The problem can be reproduced by composing a long UDP datagram from multiple parts using MSG_MORE flag: sendto(sd, buff, 1000, MSG_MORE, ...); sendto(sd, buff, 1000, MSG_MORE, ...); sendto(sd, buff, 3000, 0, ...); Assume this packet is to be routed via a device with MTU 1500 and NETIF_F_UFO enabled. When second sendto() gets into __ip_append_data(), this condition is tested (among others) to decide whether to call ip_ufo_append_data(): ((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb)) At the moment, we already have skb with 1028 bytes of data which is not marked for GSO so that the test is false (fragheaderlen is usually 20). Thus we append second 1000 bytes to this skb without invoking UFO. Third sendto(), however, has sufficient length to trigger the UFO path so that we end up with non-UFO skb followed by a UFO one. Later on, udp_send_skb() uses udp_csum() to calculate the checksum but that assumes all fragments have correct checksum in skb->csum which is not true for UFO fragments. When checking against MTU, we need to add skb->len to length of new segment if we already have a partially filled skb and fragheaderlen only if there isn't one. In the IPv6 case, skb can only be null if this is the first segment so that we have to use headersize (length of the first IPv6 header) rather than fragheaderlen (length of IPv6 header of further fragments) for skb == NULL. Fixes: e89e9cf539a2 ("[IPv4/IPv6]: UFO Scatter-gather approach") Fixes: e4c5e13aa45c ("ipv6: Should use consistent conditional judgement for ip6 fragment between __ip6_append_data and ip6_finish_output") Signed-off-by: Michal Kubecek Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 3 ++- net/ipv6/ip6_output.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 7a3fd25e8913..532b36e9ce2a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -964,7 +964,8 @@ static int __ip_append_data(struct sock *sk, csummode = CHECKSUM_PARTIAL; cork->length += length; - if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) && + if ((((length + (skb ? skb->len : fragheaderlen)) > mtu) || + (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) && (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index bf8a58a1c32d..1699acb2fa2c 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1390,7 +1390,7 @@ emsgsize: */ cork->length += length; - if ((((length + fragheaderlen) > mtu) || + if ((((length + (skb ? skb->len : headersize)) > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) && -- cgit v1.2.3-70-g09d2 From 0ccc22f425e56c4ede9c66f1945846de8ac1f352 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 22 Jun 2017 15:29:33 -0700 Subject: sit: use __GFP_NOWARN for user controlled allocation The memory allocation size is controlled by user-space, if it is too large just fail silently and return NULL, not to mention there is a fallback allocation later. Reported-by: Andrey Konovalov Signed-off-by: Cong Wang Tested-by: Andrey Konovalov Signed-off-by: David S. Miller --- net/ipv6/sit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 2378503577b0..f8ad15891cd7 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -305,7 +305,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t, * we try harder to allocate. */ kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ? - kcalloc(cmax, sizeof(*kp), GFP_KERNEL) : + kcalloc(cmax, sizeof(*kp), GFP_KERNEL | __GFP_NOWARN) : NULL; rcu_read_lock(); -- cgit v1.2.3-70-g09d2 From db9d8b29d19d2801793e4419f4c6272bf8951c62 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Fri, 23 Jun 2017 17:51:31 +0200 Subject: net: dp83640: Avoid NULL pointer dereference. The function, skb_complete_tx_timestamp(), used to allow passing in a NULL pointer for the time stamps, but that was changed in commit 62bccb8cdb69051b95a55ab0c489e3cab261c8ef ("net-timestamp: Make the clone operation stand-alone from phy timestamping"), and the existing call sites, all of which are in the dp83640 driver, were fixed up. Even though the kernel-doc was subsequently updated in commit 7a76a021cd5a292be875fbc616daf03eab1e6996 ("net-timestamp: Update skb_complete_tx_timestamp comment"), still a bug fix from Manfred Rudigier came into the driver using the old semantics. Probably Manfred derived that patch from an older kernel version. This fix should be applied to the stable trees as well. Fixes: 81e8f2e930fe ("net: dp83640: Fix tx timestamp overflow handling.") Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/phy/dp83640.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index ed0d10f54f26..c3065236ffcc 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -908,7 +908,7 @@ static void decode_txts(struct dp83640_private *dp83640, if (overflow) { pr_debug("tx timestamp queue overflow, count %d\n", overflow); while (skb) { - skb_complete_tx_timestamp(skb, NULL); + kfree_skb(skb); skb = skb_dequeue(&dp83640->tx_queue); } return; -- cgit v1.2.3-70-g09d2 From 69c149e2e39e8d66437c9034bb4926ef2c1f7c23 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 23 Jun 2017 14:01:00 -0400 Subject: bnxt_en: Add missing logic to handle TPA end error conditions. When we get a TPA_END completion to handle a completed LRO packet, it is possible that hardware would indicate errors. The current code is not checking for the error condition. Define the proper error bits and the macro to check for this error and abort properly. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 7 ++++--- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 6 +++++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 03f55daecb20..f5ba8ec1b67f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1301,10 +1301,11 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, cp_cons = NEXT_CMP(cp_cons); } - if (unlikely(agg_bufs > MAX_SKB_FRAGS)) { + if (unlikely(agg_bufs > MAX_SKB_FRAGS || TPA_END_ERRORS(tpa_end1))) { bnxt_abort_tpa(bp, bnapi, cp_cons, agg_bufs); - netdev_warn(bp->dev, "TPA frags %d exceeded MAX_SKB_FRAGS %d\n", - agg_bufs, (int)MAX_SKB_FRAGS); + if (agg_bufs > MAX_SKB_FRAGS) + netdev_warn(bp->dev, "TPA frags %d exceeded MAX_SKB_FRAGS %d\n", + agg_bufs, (int)MAX_SKB_FRAGS); return NULL; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 3ef42dbc6327..d46a85041083 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -374,12 +374,16 @@ struct rx_tpa_end_cmp_ext { __le32 rx_tpa_end_cmp_errors_v2; #define RX_TPA_END_CMP_V2 (0x1 << 0) - #define RX_TPA_END_CMP_ERRORS (0x7fff << 1) + #define RX_TPA_END_CMP_ERRORS (0x3 << 1) #define RX_TPA_END_CMPL_ERRORS_SHIFT 1 u32 rx_tpa_end_cmp_start_opaque; }; +#define TPA_END_ERRORS(rx_tpa_end_ext) \ + ((rx_tpa_end_ext)->rx_tpa_end_cmp_errors_v2 & \ + cpu_to_le32(RX_TPA_END_CMP_ERRORS)) + #define DB_IDX_MASK 0xffffff #define DB_IDX_VALID (0x1 << 26) #define DB_IRQ_DIS (0x1 << 27) -- cgit v1.2.3-70-g09d2 From 2270bc5da34979454e6f2eb133d800b635156174 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 23 Jun 2017 14:01:01 -0400 Subject: bnxt_en: Fix netpoll handling. To handle netpoll properly, the driver must only handle TX packets during NAPI. Handling RX events cause warnings and errors in netpoll mode. The ndo_poll_controller() method should call napi_schedule() directly so that a NAPI weight of zero will be used during netpoll mode. The bnxt_en driver supports 2 ring modes: combined, and separate rx/tx. In separate rx/tx mode, the ndo_poll_controller() method will only process the tx rings. In combined mode, the rx and tx completion entries are mixed in the completion ring and we need to drop the rx entries and recycle the rx buffers. Add a function bnxt_force_rx_discard() to handle this in netpoll mode when we see rx entries in combined ring mode. Reported-by: Calvin Owens Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 54 +++++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f5ba8ec1b67f..74e8e215524d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1563,6 +1563,45 @@ next_rx_no_prod: return rc; } +/* In netpoll mode, if we are using a combined completion ring, we need to + * discard the rx packets and recycle the buffers. + */ +static int bnxt_force_rx_discard(struct bnxt *bp, struct bnxt_napi *bnapi, + u32 *raw_cons, u8 *event) +{ + struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; + u32 tmp_raw_cons = *raw_cons; + struct rx_cmp_ext *rxcmp1; + struct rx_cmp *rxcmp; + u16 cp_cons; + u8 cmp_type; + + cp_cons = RING_CMP(tmp_raw_cons); + rxcmp = (struct rx_cmp *) + &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)]; + + tmp_raw_cons = NEXT_RAW_CMP(tmp_raw_cons); + cp_cons = RING_CMP(tmp_raw_cons); + rxcmp1 = (struct rx_cmp_ext *) + &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)]; + + if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons)) + return -EBUSY; + + cmp_type = RX_CMP_TYPE(rxcmp); + if (cmp_type == CMP_TYPE_RX_L2_CMP) { + rxcmp1->rx_cmp_cfa_code_errors_v2 |= + cpu_to_le32(RX_CMPL_ERRORS_CRC_ERROR); + } else if (cmp_type == CMP_TYPE_RX_L2_TPA_END_CMP) { + struct rx_tpa_end_cmp_ext *tpa_end1; + + tpa_end1 = (struct rx_tpa_end_cmp_ext *)rxcmp1; + tpa_end1->rx_tpa_end_cmp_errors_v2 |= + cpu_to_le32(RX_TPA_END_CMP_ERRORS); + } + return bnxt_rx_pkt(bp, bnapi, raw_cons, event); +} + #define BNXT_GET_EVENT_PORT(data) \ ((data) & \ ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_PORT_ID_MASK) @@ -1745,7 +1784,11 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) if (unlikely(tx_pkts > bp->tx_wake_thresh)) rx_pkts = budget; } else if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) { - rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event); + if (likely(budget)) + rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event); + else + rc = bnxt_force_rx_discard(bp, bnapi, &raw_cons, + &event); if (likely(rc >= 0)) rx_pkts += rc; else if (rc == -EBUSY) /* partial completion */ @@ -6664,12 +6707,11 @@ static void bnxt_poll_controller(struct net_device *dev) struct bnxt *bp = netdev_priv(dev); int i; - for (i = 0; i < bp->cp_nr_rings; i++) { - struct bnxt_irq *irq = &bp->irq_tbl[i]; + /* Only process tx rings/combined rings in netpoll mode. */ + for (i = 0; i < bp->tx_nr_rings; i++) { + struct bnxt_tx_ring_info *txr = &bp->tx_ring[i]; - disable_irq(irq->vector); - irq->handler(irq->vector, bp->bnapi[i]); - enable_irq(irq->vector); + napi_schedule(&txr->bnapi->napi); } } #endif -- cgit v1.2.3-70-g09d2 From d0c32a16235aeacd32c9de6ff90f9219614d7e4e Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Sat, 24 Jun 2017 15:37:00 +0300 Subject: bnx2x: Don't log mc removal needlessly When mc configuration changes bnx2x_config_mcast() can return 0 for success, negative for failure and positive for benign reason preventing its immediate work, e.g., when the command awaits the completion of a previously sent command. When removing all configured macs on a 578xx adapter, if a positive value would be returned driver would errneously log it as an error. Fixes: c7b7b483ccc9 ("bnx2x: Don't flush multicast MACs") Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index a851f95c307a..349a46593abf 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12729,7 +12729,7 @@ static int bnx2x_set_mc_list(struct bnx2x *bp) } else { /* If no mc addresses are required, flush the configuration */ rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL); - if (rc) + if (rc < 0) BNX2X_ERR("Failed to clear multicast configuration %d\n", rc); } -- cgit v1.2.3-70-g09d2 From 85cb73ff9b74785a7fc752875d7f0fe17ca3ea7c Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 23 Jun 2017 15:25:37 -0700 Subject: net: ipv6: reset daddr and dport in sk if connect() fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In __ip6_datagram_connect(), reset sk->sk_v6_daddr and inet->dport if error occurs. In udp_v6_early_demux(), check for sk_state to make sure it is in TCP_ESTABLISHED state. Together, it makes sure unconnected UDP socket won't be considered as a valid candidate for early demux. v3: add TCP_ESTABLISHED state check in udp_v6_early_demux() v2: fix compilation error Fixes: 5425077d73e0 ("net: ipv6: Add early demux handler for UDP unicast") Signed-off-by: Wei Wang Acked-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- net/ipv6/datagram.c | 8 +++++++- net/ipv6/udp.c | 3 ++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index e011122ebd43..5c786f5ab961 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -250,8 +250,14 @@ ipv4_connected: */ err = ip6_datagram_dst_update(sk, true); - if (err) + if (err) { + /* Reset daddr and dport so that udp_v6_early_demux() + * fails to find this socket + */ + memset(&sk->sk_v6_daddr, 0, sizeof(sk->sk_v6_daddr)); + inet->inet_dport = 0; goto out; + } sk->sk_state = TCP_ESTABLISHED; sk_set_txhash(sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 06ec39b79609..75703fda23e7 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -879,7 +879,8 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net, struct sock *sk; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { - if (INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif)) + if (sk->sk_state == TCP_ESTABLISHED && + INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif)) return sk; /* Only check first socket in chain */ break; -- cgit v1.2.3-70-g09d2 From d747a7a51b00984127a88113cdbbc26f91e9d815 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sat, 24 Jun 2017 23:50:30 -0700 Subject: tcp: reset sk_rx_dst in tcp_disconnect() We have to reset the sk->sk_rx_dst when we disconnect a TCP connection, because otherwise when we re-connect it this dst reference is simply overridden in tcp_finish_connect(). This fixes a dst leak which leads to a loopback dev refcnt leak. It is a long-standing bug, Kevin reported a very similar (if not same) bug before. Thanks to Andrei for providing such a reliable reproducer which greatly narrows down the problem. Fixes: 41063e9dd119 ("ipv4: Early TCP socket demux.") Reported-by: Andrei Vagin Reported-by: Kevin Xu Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b5ea036ca781..40aca7803cf2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2330,6 +2330,8 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_init_send_head(sk); memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); + dst_release(sk->sk_rx_dst); + sk->sk_rx_dst = NULL; tcp_saved_syn_free(tp); /* Clean up fastopen related fields */ -- cgit v1.2.3-70-g09d2 From 6f64ec74515925cced6df4571638b5a099a49aae Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Jun 2017 07:02:20 -0700 Subject: net: prevent sign extension in dev_get_stats() Similar to the fix provided by Dominik Heidler in commit 9b3dc0a17d73 ("l2tp: cast l2tp traffic counter to unsigned") we need to take care of 32bit kernels in dev_get_stats(). When using atomic_long_read(), we add a 'long' to u64 and might misinterpret high order bit, unless we cast to unsigned. Fixes: caf586e5f23ce ("net: add a core netdev->rx_dropped counter") Fixes: 015f0688f57ca ("net: net: add a core netdev->tx_dropped counter") Fixes: 6e7333d315a76 ("net: add rx_nohandler stat counter") Signed-off-by: Eric Dumazet Cc: Jarod Wilson Signed-off-by: David S. Miller --- net/core/dev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 7243421c9783..91bb55070533 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7783,9 +7783,9 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, } else { netdev_stats_to_stats64(storage, &dev->stats); } - storage->rx_dropped += atomic_long_read(&dev->rx_dropped); - storage->tx_dropped += atomic_long_read(&dev->tx_dropped); - storage->rx_nohandler += atomic_long_read(&dev->rx_nohandler); + storage->rx_dropped += (unsigned long)atomic_long_read(&dev->rx_dropped); + storage->tx_dropped += (unsigned long)atomic_long_read(&dev->tx_dropped); + storage->rx_nohandler += (unsigned long)atomic_long_read(&dev->rx_nohandler); return storage; } EXPORT_SYMBOL(dev_get_stats); -- cgit v1.2.3-70-g09d2 From 85688d9adf68557233d974ef28971e69b89fb690 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Mon, 26 Jun 2017 18:47:00 +0300 Subject: fsl/fman: add dependency on HAS_DMA A previous commit (5567e989198b5a8d) inserted a dependency on DMA API that requires HAS_DMA to be added in Kconfig. Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fman/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/freescale/fman/Kconfig b/drivers/net/ethernet/freescale/fman/Kconfig index dc0850b3b517..8870a9a798ca 100644 --- a/drivers/net/ethernet/freescale/fman/Kconfig +++ b/drivers/net/ethernet/freescale/fman/Kconfig @@ -2,6 +2,7 @@ config FSL_FMAN tristate "FMan support" depends on FSL_SOC || ARCH_LAYERSCAPE || COMPILE_TEST select GENERIC_ALLOCATOR + depends on HAS_DMA select PHYLIB default n help -- cgit v1.2.3-70-g09d2 From e20bd60bf62a2448be873653c7febca1d4d73afc Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Mon, 26 Jun 2017 12:41:20 -0500 Subject: net: usb: asix88179_178a: Add support for the Belkin B2B128 The Belkin B2B128 is a USB 3.0 Hub + Gigabit Ethernet Adapter, the Ethernet adapter uses the ASIX AX88179 USB 3.0 to Gigabit Ethernet chip supported by this driver, add the USB ID for the same. This patch is based on work by Geoffrey Tran who has indicated they would like this upstreamed by someone more familiar with the upstreaming process. Signed-off-by: Andrew F. Davis Signed-off-by: David S. Miller --- drivers/net/usb/ax88179_178a.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 51cf60092a18..4037ab27734a 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1722,6 +1722,18 @@ static const struct driver_info lenovo_info = { .tx_fixup = ax88179_tx_fixup, }; +static const struct driver_info belkin_info = { + .description = "Belkin USB Ethernet Adapter", + .bind = ax88179_bind, + .unbind = ax88179_unbind, + .status = ax88179_status, + .link_reset = ax88179_link_reset, + .reset = ax88179_reset, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .rx_fixup = ax88179_rx_fixup, + .tx_fixup = ax88179_tx_fixup, +}; + static const struct usb_device_id products[] = { { /* ASIX AX88179 10/100/1000 */ @@ -1751,6 +1763,10 @@ static const struct usb_device_id products[] = { /* Lenovo OneLinkDock Gigabit LAN */ USB_DEVICE(0x17ef, 0x304b), .driver_info = (unsigned long)&lenovo_info, +}, { + /* Belkin B2B128 USB 3.0 Hub + Gigabit Ethernet Adapter */ + USB_DEVICE(0x050d, 0x0128), + .driver_info = (unsigned long)&belkin_info, }, { }, }; -- cgit v1.2.3-70-g09d2 From 713a98d90c5ea072c1bb00ef40617aee2cef2232 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 28 Jun 2017 09:51:03 +0800 Subject: virtio-net: serialize tx routine during reset We don't hold any tx lock when trying to disable TX during reset, this would lead a use after free since ndo_start_xmit() tries to access the virtqueue which has already been freed. Fix this by using netif_tx_disable() before freeing the vqs, this could make sure no tx after vq freeing. Reported-by: Jean-Philippe Menil Tested-by: Jean-Philippe Menil Fixes commit f600b6905015 ("virtio_net: Add XDP support") Cc: John Fastabend Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Acked-by: Robert McCabe Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index a871f45ecc79..143d8a95a60d 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1797,6 +1797,7 @@ static void virtnet_freeze_down(struct virtio_device *vdev) flush_work(&vi->config_work); netif_device_detach(vi->dev); + netif_tx_disable(vi->dev); cancel_delayed_work_sync(&vi->refill); if (netif_running(vi->dev)) { -- cgit v1.2.3-70-g09d2 From c1a4872ebfb83b1af7144f7b29ac8c4b344a12a8 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Wed, 28 Jun 2017 12:53:54 +0800 Subject: net: sched: Fix one possible panic when no destroy callback When qdisc fail to init, qdisc_create would invoke the destroy callback to cleanup. But there is no check if the callback exists really. So it would cause the panic if there is no real destroy callback like the qdisc codel, fq, and so on. Take codel as an example following: When a malicious user constructs one invalid netlink msg, it would cause codel_init->codel_change->nla_parse_nested failed. Then kernel would invoke the destroy callback directly but qdisc codel doesn't define one. It causes one panic as a result. Now add one the check for destroy to avoid the possible panic. Fixes: 87b60cfacf9f ("net_sched: fix error recovery at qdisc creation") Signed-off-by: Gao Feng Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_api.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e88342fde1bc..cfdbfa18a95e 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1019,7 +1019,8 @@ static struct Qdisc *qdisc_create(struct net_device *dev, return sch; } /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */ - ops->destroy(sch); + if (ops->destroy) + ops->destroy(sch); err_out3: dev_put(dev); kfree((char *) sch - sch->padded); -- cgit v1.2.3-70-g09d2 From 6b27c8adf27edf1dabe2cdcfaa101ef7e2712415 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 28 Jun 2017 09:03:12 +0300 Subject: mlxsw: spectrum_router: Fix NULL pointer dereference In case a VLAN device is enslaved to a bridge we shouldn't create a router interface (RIF) for it when it's configured with an IP address. This is already handled by the driver for other types of netdevs, such as physical ports and LAG devices. If this IP address is then removed and the interface is subsequently unlinked from the bridge, a NULL pointer dereference can happen, as the original 802.1d FID was replaced with an rFID which was then deleted. To reproduce: $ ip link set dev enp3s0np9 up $ ip link add name enp3s0np9.111 link enp3s0np9 type vlan id 111 $ ip link set dev enp3s0np9.111 up $ ip link add name br0 type bridge $ ip link set dev br0 up $ ip link set enp3s0np9.111 master br0 $ ip address add dev enp3s0np9.111 192.168.0.1/24 $ ip address del dev enp3s0np9.111 192.168.0.1/24 $ ip link set dev enp3s0np9.111 nomaster Fixes: 99724c18fc66 ("mlxsw: spectrum: Introduce support for router interfaces") Signed-off-by: Ido Schimmel Reported-by: Petr Machata Tested-by: Petr Machata Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 9f89c4137d21..0744452a0b18 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3334,6 +3334,9 @@ static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev, struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(vlan_dev); u16 vid = vlan_dev_vlan_id(vlan_dev); + if (netif_is_bridge_port(vlan_dev)) + return 0; + if (mlxsw_sp_port_dev_check(real_dev)) return mlxsw_sp_inetaddr_vport_event(vlan_dev, real_dev, event, vid); -- cgit v1.2.3-70-g09d2 From acb4b7df48b539cb391287921de57e4e5fae3460 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 28 Jun 2017 14:44:21 +0300 Subject: rocker: move dereference before free My static checker complains that ofdpa_neigh_del() can sometimes free "found". It just makes sense to use it first before deleting it. Fixes: ecf244f753e0 ("rocker: fix maybe-uninitialized warning") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker_ofdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index 2ae852454780..a9ce82d3e9cf 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -1505,8 +1505,8 @@ static int ofdpa_port_ipv4_nh(struct ofdpa_port *ofdpa_port, *index = entry->index; resolved = false; } else if (removing) { - ofdpa_neigh_del(trans, found); *index = found->index; + ofdpa_neigh_del(trans, found); } else if (updating) { ofdpa_neigh_update(found, trans, NULL, false); resolved = !is_zero_ether_addr(found->eth_dst); -- cgit v1.2.3-70-g09d2 From 5b85840320151f61e04d83a23ef2567a07094503 Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Wed, 28 Jun 2017 18:28:33 +0200 Subject: arcnet: change irq handler to lock irqsave This patch prevents the arcnet driver from the following deadlock. [ 41.273910] ====================================================== [ 41.280397] [ INFO: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected ] [ 41.287433] 4.4.0-00034-gc0ae784 #536 Not tainted [ 41.292366] ------------------------------------------------------ [ 41.298863] arcecho/233 [HC0[0]:SC0[2]:HE0:SE0] is trying to acquire: [ 41.305628] (&(&lp->lock)->rlock){+.+...}, at: [] arcnet_send_packet+0x60/0x1c0 [arcnet] [ 41.315199] [ 41.315199] and this task is already holding: [ 41.321324] (_xmit_ARCNET#2){+.-...}, at: [] packet_direct_xmit+0xfc/0x1c8 [ 41.329593] which would create a new lock dependency: [ 41.334893] (_xmit_ARCNET#2){+.-...} -> (&(&lp->lock)->rlock){+.+...} [ 41.341801] [ 41.341801] but this new dependency connects a SOFTIRQ-irq-safe lock: [ 41.350108] (_xmit_ARCNET#2){+.-...} ... which became SOFTIRQ-irq-safe at: [ 41.357539] [] _raw_spin_lock+0x30/0x40 [ 41.362677] [] dev_watchdog+0x5c/0x264 [ 41.367723] [] call_timer_fn+0x6c/0xf4 [ 41.372759] [] run_timer_softirq+0x154/0x210 [ 41.378340] [] __do_softirq+0x144/0x298 [ 41.383469] [] irq_exit+0xcc/0x130 [ 41.388138] [] __handle_domain_irq+0x60/0xb4 [ 41.393728] [] __irq_svc+0x58/0x78 [ 41.398402] [] arch_cpu_idle+0x24/0x3c [ 41.403443] [] cpu_startup_entry+0x1f8/0x25c [ 41.409029] [] start_kernel+0x3c0/0x3cc [ 41.414170] [ 41.414170] to a SOFTIRQ-irq-unsafe lock: [ 41.419931] (&(&lp->lock)->rlock){+.+...} ... which became SOFTIRQ-irq-unsafe at: [ 41.427996] ... [] _raw_spin_lock+0x30/0x40 [ 41.433409] [] arcnet_interrupt+0x2c/0x800 [arcnet] [ 41.439646] [] handle_nested_irq+0x8c/0xec [ 41.445063] [] regmap_irq_thread+0x190/0x314 [ 41.450661] [] irq_thread_fn+0x1c/0x34 [ 41.455700] [] irq_thread+0x13c/0x1dc [ 41.460649] [] kthread+0xe4/0xf8 [ 41.465158] [] ret_from_fork+0x14/0x24 [ 41.470207] [ 41.470207] other info that might help us debug this: [ 41.470207] [ 41.478627] Possible interrupt unsafe locking scenario: [ 41.478627] [ 41.485763] CPU0 CPU1 [ 41.490521] ---- ---- [ 41.495279] lock(&(&lp->lock)->rlock); [ 41.499414] local_irq_disable(); [ 41.505636] lock(_xmit_ARCNET#2); [ 41.511967] lock(&(&lp->lock)->rlock); [ 41.518741] [ 41.521490] lock(_xmit_ARCNET#2); [ 41.525356] [ 41.525356] *** DEADLOCK *** [ 41.525356] [ 41.531587] 1 lock held by arcecho/233: [ 41.535617] #0: (_xmit_ARCNET#2){+.-...}, at: [] packet_direct_xmit+0xfc/0x1c8 [ 41.544355] the dependencies between SOFTIRQ-irq-safe lock and the holding lock: [ 41.552362] -> (_xmit_ARCNET#2){+.-...} ops: 27 { [ 41.557357] HARDIRQ-ON-W at: [ 41.560664] [] _raw_spin_lock+0x30/0x40 [ 41.567445] [] dev_deactivate_many+0x114/0x304 [ 41.574866] [] dev_deactivate+0x24/0x38 [ 41.581646] [] linkwatch_do_dev+0x40/0x74 [ 41.588613] [] __linkwatch_run_queue+0xec/0x140 [ 41.596120] [] linkwatch_event+0x2c/0x34 [ 41.602991] [] process_one_work+0x188/0x40c [ 41.610131] [] worker_thread+0x4c/0x480 [ 41.616912] [] kthread+0xe4/0xf8 [ 41.623048] [] ret_from_fork+0x14/0x24 [ 41.629735] IN-SOFTIRQ-W at: [ 41.633039] [] _raw_spin_lock+0x30/0x40 [ 41.639820] [] dev_watchdog+0x5c/0x264 [ 41.646508] [] call_timer_fn+0x6c/0xf4 [ 41.653190] [] run_timer_softirq+0x154/0x210 [ 41.660425] [] __do_softirq+0x144/0x298 [ 41.667201] [] irq_exit+0xcc/0x130 [ 41.673518] [] __handle_domain_irq+0x60/0xb4 [ 41.680754] [] __irq_svc+0x58/0x78 [ 41.687077] [] arch_cpu_idle+0x24/0x3c [ 41.693769] [] cpu_startup_entry+0x1f8/0x25c [ 41.701006] [] start_kernel+0x3c0/0x3cc [ 41.707791] INITIAL USE at: [ 41.711003] [] _raw_spin_lock+0x30/0x40 [ 41.717696] [] dev_deactivate_many+0x114/0x304 [ 41.725026] [] dev_deactivate+0x24/0x38 [ 41.731718] [] linkwatch_do_dev+0x40/0x74 [ 41.738593] [] __linkwatch_run_queue+0xec/0x140 [ 41.746011] [] linkwatch_event+0x2c/0x34 [ 41.752789] [] process_one_work+0x188/0x40c [ 41.759847] [] worker_thread+0x4c/0x480 [ 41.766541] [] kthread+0xe4/0xf8 [ 41.772596] [] ret_from_fork+0x14/0x24 [ 41.779198] } [ 41.780945] ... key at: [] netdev_xmit_lock_key+0x38/0x1c8 [ 41.788192] ... acquired at: [ 41.791309] [] lock_acquire+0x70/0x90 [ 41.796361] [] _raw_spin_lock_irqsave+0x40/0x54 [ 41.802324] [] arcnet_send_packet+0x60/0x1c0 [arcnet] [ 41.808844] [] packet_direct_xmit+0x130/0x1c8 [ 41.814622] [] packet_sendmsg+0x3b8/0x680 [ 41.820034] [] sock_sendmsg+0x14/0x24 [ 41.825091] [] SyS_sendto+0xb8/0xe0 [ 41.829956] [] SyS_send+0x18/0x20 [ 41.834638] [] ret_fast_syscall+0x0/0x1c [ 41.839954] [ 41.841514] the dependencies between the lock to be acquired and SOFTIRQ-irq-unsafe lock: [ 41.850302] -> (&(&lp->lock)->rlock){+.+...} ops: 5 { [ 41.855644] HARDIRQ-ON-W at: [ 41.858945] [] _raw_spin_lock+0x30/0x40 [ 41.865726] [] arcnet_interrupt+0x2c/0x800 [arcnet] [ 41.873607] [] handle_nested_irq+0x8c/0xec [ 41.880666] [] regmap_irq_thread+0x190/0x314 [ 41.887901] [] irq_thread_fn+0x1c/0x34 [ 41.894593] [] irq_thread+0x13c/0x1dc [ 41.901195] [] kthread+0xe4/0xf8 [ 41.907338] [] ret_from_fork+0x14/0x24 [ 41.914025] SOFTIRQ-ON-W at: [ 41.917328] [] _raw_spin_lock+0x30/0x40 [ 41.924106] [] arcnet_interrupt+0x2c/0x800 [arcnet] [ 41.931981] [] handle_nested_irq+0x8c/0xec [ 41.939028] [] regmap_irq_thread+0x190/0x314 [ 41.946264] [] irq_thread_fn+0x1c/0x34 [ 41.952954] [] irq_thread+0x13c/0x1dc [ 41.959548] [] kthread+0xe4/0xf8 [ 41.965689] [] ret_from_fork+0x14/0x24 [ 41.972379] INITIAL USE at: [ 41.975595] [] _raw_spin_lock+0x30/0x40 [ 41.982283] [] arcnet_interrupt+0x2c/0x800 [arcnet] [ 41.990063] [] handle_nested_irq+0x8c/0xec [ 41.997027] [] regmap_irq_thread+0x190/0x314 [ 42.004172] [] irq_thread_fn+0x1c/0x34 [ 42.010766] [] irq_thread+0x13c/0x1dc [ 42.017267] [] kthread+0xe4/0xf8 [ 42.023314] [] ret_from_fork+0x14/0x24 [ 42.029903] } [ 42.031648] ... key at: [] __key.42091+0x0/0xfffff0f8 [arcnet] [ 42.039255] ... acquired at: [ 42.042372] [] lock_acquire+0x70/0x90 [ 42.047413] [] _raw_spin_lock_irqsave+0x40/0x54 [ 42.053364] [] arcnet_send_packet+0x60/0x1c0 [arcnet] [ 42.059872] [] packet_direct_xmit+0x130/0x1c8 [ 42.065634] [] packet_sendmsg+0x3b8/0x680 [ 42.071030] [] sock_sendmsg+0x14/0x24 [ 42.076069] [] SyS_sendto+0xb8/0xe0 [ 42.080926] [] SyS_send+0x18/0x20 [ 42.085601] [] ret_fast_syscall+0x0/0x1c [ 42.090918] [ 42.092481] [ 42.092481] stack backtrace: [ 42.097065] CPU: 0 PID: 233 Comm: arcecho Not tainted 4.4.0-00034-gc0ae784 #536 [ 42.104751] Hardware name: Generic AM33XX (Flattened Device Tree) [ 42.111183] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 42.119337] [] (show_stack) from [] (dump_stack+0x8c/0x9c) [ 42.126937] [] (dump_stack) from [] (check_usage+0x4bc/0x63c) [ 42.134815] [] (check_usage) from [] (check_irq_usage+0x58/0xb0) [ 42.142964] [] (check_irq_usage) from [] (__lock_acquire+0x1524/0x20b0) [ 42.151740] [] (__lock_acquire) from [] (lock_acquire+0x70/0x90) [ 42.159886] [] (lock_acquire) from [] (_raw_spin_lock_irqsave+0x40/0x54) [ 42.168768] [] (_raw_spin_lock_irqsave) from [] (arcnet_send_packet+0x60/0x1c0 [arcnet]) [ 42.179115] [] (arcnet_send_packet [arcnet]) from [] (packet_direct_xmit+0x130/0x1c8) [ 42.189182] [] (packet_direct_xmit) from [] (packet_sendmsg+0x3b8/0x680) [ 42.198059] [] (packet_sendmsg) from [] (sock_sendmsg+0x14/0x24) [ 42.206199] [] (sock_sendmsg) from [] (SyS_sendto+0xb8/0xe0) [ 42.213978] [] (SyS_sendto) from [] (SyS_send+0x18/0x20) [ 42.221388] [] (SyS_send) from [] (ret_fast_syscall+0x0/0x1c) Signed-off-by: Michael Grzeschik --- v1 -> v2: removed unneeded zero assignment of flags Signed-off-by: David S. Miller --- drivers/net/arcnet/arcnet.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c index 62ee439d5882..53a1cb551def 100644 --- a/drivers/net/arcnet/arcnet.c +++ b/drivers/net/arcnet/arcnet.c @@ -756,6 +756,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id) struct net_device *dev = dev_id; struct arcnet_local *lp; int recbuf, status, diagstatus, didsomething, boguscount; + unsigned long flags; int retval = IRQ_NONE; arc_printk(D_DURING, dev, "\n"); @@ -765,7 +766,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id) lp = netdev_priv(dev); BUG_ON(!lp); - spin_lock(&lp->lock); + spin_lock_irqsave(&lp->lock, flags); /* RESET flag was enabled - if device is not running, we must * clear it right away (but nothing else). @@ -774,7 +775,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id) if (lp->hw.status(dev) & RESETflag) lp->hw.command(dev, CFLAGScmd | RESETclear); lp->hw.intmask(dev, 0); - spin_unlock(&lp->lock); + spin_unlock_irqrestore(&lp->lock, flags); return retval; } @@ -998,7 +999,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id) udelay(1); lp->hw.intmask(dev, lp->intmask); - spin_unlock(&lp->lock); + spin_unlock_irqrestore(&lp->lock, flags); return retval; } EXPORT_SYMBOL(arcnet_interrupt); -- cgit v1.2.3-70-g09d2 From 06908d7aee8d62a80cabfd134d0354dc4d2794bc Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 28 Jun 2017 18:28:34 +0200 Subject: Trivial fix to spelling mistake in arc_printk message Signed-off-by: Colin Ian King Signed-off-by: Michael Grzeschik Signed-off-by: David S. Miller --- drivers/net/arcnet/capmode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c index 2056878fb087..4fa2e46b48d3 100644 --- a/drivers/net/arcnet/capmode.c +++ b/drivers/net/arcnet/capmode.c @@ -212,7 +212,7 @@ static int ack_tx(struct net_device *dev, int acked) ackpkt->soft.cap.proto = 0; /* using protocol 0 for acknowledge */ ackpkt->soft.cap.mes.ack = acked; - arc_printk(D_PROTO, dev, "Ackknowledge for cap packet %x.\n", + arc_printk(D_PROTO, dev, "Acknowledge for cap packet %x.\n", *((int *)&ackpkt->soft.cap.cookie[0])); ackskb->protocol = cpu_to_be16(ETH_P_ARCNET); -- cgit v1.2.3-70-g09d2 From 0d494fcf867f95040b5b67e4bc5af739bcda37da Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Wed, 28 Jun 2017 18:28:35 +0200 Subject: arcnet: com20020: remove needless base_addr assignment The assignment is superfluous. Signed-off-by: Michael Grzeschik Signed-off-by: David S. Miller --- drivers/net/arcnet/com20020.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/arcnet/com20020.c b/drivers/net/arcnet/com20020.c index 13d9ad4b3f5c..78043a9c5981 100644 --- a/drivers/net/arcnet/com20020.c +++ b/drivers/net/arcnet/com20020.c @@ -246,8 +246,6 @@ int com20020_found(struct net_device *dev, int shared) return -ENODEV; } - dev->base_addr = ioaddr; - arc_printk(D_NORMAL, dev, "%s: station %02Xh found at %03lXh, IRQ %d.\n", lp->card_name, dev->dev_addr[0], dev->base_addr, dev->irq); -- cgit v1.2.3-70-g09d2 From cb108619f2fc77846bf7a7543517f3487f455b24 Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Wed, 28 Jun 2017 18:28:36 +0200 Subject: arcnet: com20020-pci: fix dev_id calculation The dev_id was miscalculated. Only the two bits 4-5 are relevant for the MA1 card. PCIARC1 and PCIFB2 use the four bits 4-7 for id selection. Signed-off-by: Michael Grzeschik Signed-off-by: David S. Miller --- drivers/net/arcnet/com20020-pci.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c index 239de38fbd6a..fec2df2c869f 100644 --- a/drivers/net/arcnet/com20020-pci.c +++ b/drivers/net/arcnet/com20020-pci.c @@ -135,6 +135,7 @@ static int com20020pci_probe(struct pci_dev *pdev, for (i = 0; i < ci->devcount; i++) { struct com20020_pci_channel_map *cm = &ci->chan_map_tbl[i]; struct com20020_dev *card; + int dev_id_mask = 0xf; dev = alloc_arcdev(device); if (!dev) { @@ -179,8 +180,8 @@ static int com20020pci_probe(struct pci_dev *pdev, /* Get the dev_id from the PLX rotary coder */ if (!strncmp(ci->name, "EAE PLX-PCI MA1", 15)) - dev->dev_id = 0xc; - dev->dev_id ^= inb(priv->misc + ci->rotary) >> 4; + dev_id_mask = 0x3; + dev->dev_id = (inb(priv->misc + ci->rotary) >> 4) & dev_id_mask; snprintf(dev->name, sizeof(dev->name), "arc%d-%d", dev->dev_id, i); -- cgit v1.2.3-70-g09d2 From 2a0ea04c83ab82c3852c9171d2fa5cd9a1432c9b Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Wed, 28 Jun 2017 18:28:37 +0200 Subject: arcnet: com20020-pci: add missing pdev setup in netdev structure We add the pdev data to the pci devices netdev structure. This way the interface get consistent device names in the userspace (udev). Signed-off-by: Michael Grzeschik Signed-off-by: David S. Miller --- drivers/net/arcnet/com20020-pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c index fec2df2c869f..47f80b83dcf4 100644 --- a/drivers/net/arcnet/com20020-pci.c +++ b/drivers/net/arcnet/com20020-pci.c @@ -167,6 +167,7 @@ static int com20020pci_probe(struct pci_dev *pdev, arcnet_outb(0x00, ioaddr, COM20020_REG_W_COMMAND); arcnet_inb(ioaddr, COM20020_REG_R_DIAGSTAT); + SET_NETDEV_DEV(dev, &pdev->dev); dev->base_addr = ioaddr; dev->dev_addr[0] = node; dev->irq = pdev->irq; -- cgit v1.2.3-70-g09d2 From 6bdf6abc56b53103324dfd270a86580306e1a232 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 29 Jun 2017 03:04:59 +0200 Subject: bpf: prevent leaking pointer via xadd on unpriviledged Leaking kernel addresses on unpriviledged is generally disallowed, for example, verifier rejects the following: 0: (b7) r0 = 0 1: (18) r2 = 0xffff897e82304400 3: (7b) *(u64 *)(r1 +48) = r2 R2 leaks addr into ctx Doing pointer arithmetic on them is also forbidden, so that they don't turn into unknown value and then get leaked out. However, there's xadd as a special case, where we don't check the src reg for being a pointer register, e.g. the following will pass: 0: (b7) r0 = 0 1: (7b) *(u64 *)(r1 +48) = r0 2: (18) r2 = 0xffff897e82304400 ; map 4: (db) lock *(u64 *)(r1 +48) += r2 5: (95) exit We could store the pointer into skb->cb, loose the type context, and then read it out from there again to leak it eventually out of a map value. Or more easily in a different variant, too: 0: (bf) r6 = r1 1: (7a) *(u64 *)(r10 -8) = 0 2: (bf) r2 = r10 3: (07) r2 += -8 4: (18) r1 = 0x0 6: (85) call bpf_map_lookup_elem#1 7: (15) if r0 == 0x0 goto pc+3 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R6=ctx R10=fp 8: (b7) r3 = 0 9: (7b) *(u64 *)(r0 +0) = r3 10: (db) lock *(u64 *)(r0 +0) += r6 11: (b7) r0 = 0 12: (95) exit from 7 to 11: R0=inv,min_value=0,max_value=0 R6=ctx R10=fp 11: (b7) r0 = 0 12: (95) exit Prevent this by checking xadd src reg for pointer types. Also add a couple of test cases related to this. Fixes: 1be7f75d1668 ("bpf: enable non-root eBPF programs") Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Acked-by: Edward Cree Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 5 +++ tools/testing/selftests/bpf/test_verifier.c | 66 +++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 339c8a1371de..a8a725697bed 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -989,6 +989,11 @@ static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn) if (err) return err; + if (is_pointer_value(env, insn->src_reg)) { + verbose("R%d leaks addr into mem\n", insn->src_reg); + return -EACCES; + } + /* check whether atomic_add can read the memory */ err = check_mem_access(env, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_READ, -1); diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index cabb19b1e371..0ff8c55c0464 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -3748,6 +3748,72 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "invalid bpf_context access", }, + { + "leak pointer into ctx 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0])), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_2, + offsetof(struct __sk_buff, cb[0])), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 2 }, + .errstr_unpriv = "R2 leaks addr into mem", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "leak pointer into ctx 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0])), + BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_10, + offsetof(struct __sk_buff, cb[0])), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "R10 leaks addr into mem", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "leak pointer into ctx 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, + offsetof(struct __sk_buff, cb[0])), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 1 }, + .errstr_unpriv = "R2 leaks addr into ctx", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "leak pointer into map val", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 4 }, + .errstr_unpriv = "R6 leaks addr into mem", + .result_unpriv = REJECT, + .result = ACCEPT, + }, { "helper access to map: full range", .insns = { -- cgit v1.2.3-70-g09d2 From e44699d2c28067f69698ccb68dd3ddeacfebc434 Mon Sep 17 00:00:00 2001 From: Michal Kubeček Date: Thu, 29 Jun 2017 11:13:36 +0200 Subject: net: handle NAPI_GRO_FREE_STOLEN_HEAD case also in napi_frags_finish() Recently I started seeing warnings about pages with refcount -1. The problem was traced to packets being reused after their head was merged into a GRO packet by skb_gro_receive(). While bisecting the issue pointed to commit c21b48cc1bbf ("net: adjust skb->truesize in ___pskb_trim()") and I have never seen it on a kernel with it reverted, I believe the real problem appeared earlier when the option to merge head frag in GRO was implemented. Handling NAPI_GRO_FREE_STOLEN_HEAD state was only added to GRO_MERGED_FREE branch of napi_skb_finish() so that if the driver uses napi_gro_frags() and head is merged (which in my case happens after the skb_condense() call added by the commit mentioned above), the skb is reused including the head that has been merged. As a result, we release the page reference twice and eventually end up with negative page refcount. To fix the problem, handle NAPI_GRO_FREE_STOLEN_HEAD in napi_frags_finish() the same way it's done in napi_skb_finish(). Fixes: d7e8883cfcf4 ("net: make GRO aware of skb->head_frag") Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- net/core/dev.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 91bb55070533..416137c64bf8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4767,6 +4767,13 @@ struct packet_offload *gro_find_complete_by_type(__be16 type) } EXPORT_SYMBOL(gro_find_complete_by_type); +static void napi_skb_free_stolen_head(struct sk_buff *skb) +{ + skb_dst_drop(skb); + secpath_reset(skb); + kmem_cache_free(skbuff_head_cache, skb); +} + static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) { switch (ret) { @@ -4780,13 +4787,10 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) break; case GRO_MERGED_FREE: - if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) { - skb_dst_drop(skb); - secpath_reset(skb); - kmem_cache_free(skbuff_head_cache, skb); - } else { + if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) + napi_skb_free_stolen_head(skb); + else __kfree_skb(skb); - } break; case GRO_HELD: @@ -4858,10 +4862,16 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, break; case GRO_DROP: - case GRO_MERGED_FREE: napi_reuse_skb(napi, skb); break; + case GRO_MERGED_FREE: + if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) + napi_skb_free_stolen_head(skb); + else + napi_reuse_skb(napi, skb); + break; + case GRO_MERGED: case GRO_CONSUMED: break; -- cgit v1.2.3-70-g09d2 From d58299a478c416c0b48e4b31c6332fe7beb63000 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 29 Jun 2017 16:50:06 +0100 Subject: sfc: fix attempt to translate invalid filter ID When filter insertion fails with no rollback, we were trying to convert EFX_EF10_FILTER_ID_INVALID to an id to store in 'ids' (which is either vlan->uc or vlan->mc). This would WARN_ON_ONCE and then record a bogus filter ID of 0x1fff, neither of which is a good thing. Fixes: 0ccb998bf46d ("sfc: fix filter_id misinterpretation in edge case") Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index a8089667fc5b..78f9e43420e0 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -5105,6 +5105,7 @@ static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx, /* Insert/renew filters */ for (i = 0; i < addr_count; i++) { + EFX_WARN_ON_PARANOID(ids[i] != EFX_EF10_FILTER_ID_INVALID); efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0); efx_filter_set_eth_local(&spec, vlan->vid, addr_list[i].addr); rc = efx_ef10_filter_insert(efx, &spec, true); @@ -5122,11 +5123,11 @@ static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx, } return rc; } else { - /* mark as not inserted, and carry on */ - rc = EFX_EF10_FILTER_ID_INVALID; + /* keep invalid ID, and carry on */ } + } else { + ids[i] = efx_ef10_filter_get_unsafe_id(rc); } - ids[i] = efx_ef10_filter_get_unsafe_id(rc); } if (multicast && rollback) { -- cgit v1.2.3-70-g09d2