From e551c32d57c88923f99f8f010e89ca7ed0735e83 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 Oct 2016 13:40:24 -0700 Subject: net: clear sk_err_soft in sk_clone_lock() At accept() time, it is possible the parent has a non zero sk_err_soft, leftover from a prior error. Make sure we do not leave this value in the child, as it makes future getsockopt(SO_ERROR) calls quite unreliable. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/core/sock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/core') diff --git a/net/core/sock.c b/net/core/sock.c index c73e28fc9c2a..df171acfe232 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1543,6 +1543,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); newsk->sk_err = 0; + newsk->sk_err_soft = 0; newsk->sk_priority = 0; newsk->sk_incoming_cpu = raw_smp_processor_id(); atomic64_set(&newsk->sk_cookie, 0); -- cgit v1.2.3-70-g09d2 From 4f2e4ad56a65f3b7d64c258e373cb71e8d2499f4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 29 Oct 2016 11:02:36 -0700 Subject: net: mangle zero checksum in skb_checksum_help() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sending zero checksum is ok for TCP, but not for UDP. UDPv6 receiver should by default drop a frame with a 0 checksum, and UDPv4 would not verify the checksum and might accept a corrupted packet. Simply replace such checksum by 0xffff, regardless of transport. This error was caught on SIT tunnels, but seems generic. Signed-off-by: Eric Dumazet Cc: Maciej Żenczykowski Cc: Willem de Bruijn Acked-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 820bac239738..eaad4c28069f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2484,7 +2484,7 @@ int skb_checksum_help(struct sk_buff *skb) goto out; } - *(__sum16 *)(skb->data + offset) = csum_fold(csum); + *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0; out_set_summed: skb->ip_summed = CHECKSUM_NONE; out: -- cgit v1.2.3-70-g09d2 From c3f24cfb3e508c70c26ee8569d537c8ca67a36c6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Nov 2016 17:14:41 -0700 Subject: dccp: do not release listeners too soon Andrey Konovalov reported following error while fuzzing with syzkaller : IPv4: Attempt to release alive inet socket ffff880068e98940 kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN Modules linked in: CPU: 1 PID: 3905 Comm: a.out Not tainted 4.9.0-rc3+ #333 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff88006b9e0000 task.stack: ffff880068770000 RIP: 0010:[] [] selinux_socket_sock_rcv_skb+0xff/0x6a0 security/selinux/hooks.c:4639 RSP: 0018:ffff8800687771c8 EFLAGS: 00010202 RAX: ffff88006b9e0000 RBX: 1ffff1000d0eee3f RCX: 1ffff1000d1d312a RDX: 1ffff1000d1d31a6 RSI: dffffc0000000000 RDI: 0000000000000010 RBP: ffff880068777360 R08: 0000000000000000 R09: 0000000000000002 R10: dffffc0000000000 R11: 0000000000000006 R12: ffff880068e98940 R13: 0000000000000002 R14: ffff880068777338 R15: 0000000000000000 FS: 00007f00ff760700(0000) GS:ffff88006cd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020008000 CR3: 000000006a308000 CR4: 00000000000006e0 Stack: ffff8800687771e0 ffffffff812508a5 ffff8800686f3168 0000000000000007 ffff88006ac8cdfc ffff8800665ea500 0000000041b58ab3 ffffffff847b5480 ffffffff819eac60 ffff88006b9e0860 ffff88006b9e0868 ffff88006b9e07f0 Call Trace: [] security_sock_rcv_skb+0x75/0xb0 security/security.c:1317 [] sk_filter_trim_cap+0x67/0x10e0 net/core/filter.c:81 [] __sk_receive_skb+0x30/0xa00 net/core/sock.c:460 [] dccp_v4_rcv+0xdb2/0x1910 net/dccp/ipv4.c:873 [] ip_local_deliver_finish+0x332/0xad0 net/ipv4/ip_input.c:216 [< inline >] NF_HOOK_THRESH ./include/linux/netfilter.h:232 [< inline >] NF_HOOK ./include/linux/netfilter.h:255 [] ip_local_deliver+0x1c2/0x4b0 net/ipv4/ip_input.c:257 [< inline >] dst_input ./include/net/dst.h:507 [] ip_rcv_finish+0x750/0x1c40 net/ipv4/ip_input.c:396 [< inline >] NF_HOOK_THRESH ./include/linux/netfilter.h:232 [< inline >] NF_HOOK ./include/linux/netfilter.h:255 [] ip_rcv+0x96f/0x12f0 net/ipv4/ip_input.c:487 [] __netif_receive_skb_core+0x1897/0x2a50 net/core/dev.c:4213 [] __netif_receive_skb+0x2a/0x170 net/core/dev.c:4251 [] netif_receive_skb_internal+0x1b3/0x390 net/core/dev.c:4279 [] netif_receive_skb+0x48/0x250 net/core/dev.c:4303 [] tun_get_user+0xbd5/0x28a0 drivers/net/tun.c:1308 [] tun_chr_write_iter+0xda/0x190 drivers/net/tun.c:1332 [< inline >] new_sync_write fs/read_write.c:499 [] __vfs_write+0x334/0x570 fs/read_write.c:512 [] vfs_write+0x17b/0x500 fs/read_write.c:560 [< inline >] SYSC_write fs/read_write.c:607 [] SyS_write+0xd4/0x1a0 fs/read_write.c:599 [] entry_SYSCALL_64_fastpath+0x1f/0xc2 It turns out DCCP calls __sk_receive_skb(), and this broke when lookups no longer took a reference on listeners. Fix this issue by adding a @refcounted parameter to __sk_receive_skb(), so that sock_put() is used only when needed. Fixes: 3b24d854cb35 ("tcp/dccp: do not touch listener sk_refcnt under synflood") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: David S. Miller --- include/net/sock.h | 4 ++-- net/core/sock.c | 5 +++-- net/dccp/ipv4.c | 2 +- net/dccp/ipv6.c | 3 ++- 4 files changed, 8 insertions(+), 6 deletions(-) (limited to 'net/core') diff --git a/include/net/sock.h b/include/net/sock.h index 73c6b008f1b7..92b269709b9a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1596,11 +1596,11 @@ static inline void sock_put(struct sock *sk) void sock_gen_put(struct sock *sk); int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested, - unsigned int trim_cap); + unsigned int trim_cap, bool refcounted); static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) { - return __sk_receive_skb(sk, skb, nested, 1); + return __sk_receive_skb(sk, skb, nested, 1, true); } static inline void sk_tx_queue_set(struct sock *sk, int tx_queue) diff --git a/net/core/sock.c b/net/core/sock.c index df171acfe232..5e3ca414357e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -453,7 +453,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) EXPORT_SYMBOL(sock_queue_rcv_skb); int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, - const int nested, unsigned int trim_cap) + const int nested, unsigned int trim_cap, bool refcounted) { int rc = NET_RX_SUCCESS; @@ -487,7 +487,8 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, bh_unlock_sock(sk); out: - sock_put(sk); + if (refcounted) + sock_put(sk); return rc; discard_and_relse: kfree_skb(skb); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 345a3aeb8c7e..dff7cfab1da4 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -868,7 +868,7 @@ lookup: goto discard_and_relse; nf_reset(skb); - return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4); + return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, refcounted); no_dccp_socket: if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 3828f94b234c..09c4e19aa285 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -738,7 +738,8 @@ lookup: if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; - return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4) ? -1 : 0; + return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, + refcounted) ? -1 : 0; no_dccp_socket: if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) -- cgit v1.2.3-70-g09d2 From f567e950bf51290755a2539ff2aaef4c26f735d3 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Mon, 7 Nov 2016 23:22:19 +0100 Subject: rtnl: reset calcit fptr in rtnl_unregister() To avoid having dangling function pointers left behind, reset calcit in rtnl_unregister(), too. This is no issue so far, as only the rtnl core registers a netlink handler with a calcit hook which won't be unregistered, but may become one if new code makes use of the calcit hook. Fixes: c7ac8679bec9 ("rtnetlink: Compute and store minimum ifinfo...") Cc: Jeff Kirsher Cc: Greg Rose Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/core') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index fb7348f13501..db313ec7af32 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -275,6 +275,7 @@ int rtnl_unregister(int protocol, int msgtype) rtnl_msg_handlers[protocol][msgindex].doit = NULL; rtnl_msg_handlers[protocol][msgindex].dumpit = NULL; + rtnl_msg_handlers[protocol][msgindex].calcit = NULL; return 0; } -- cgit v1.2.3-70-g09d2 From 4e3264d21b90984c2165e8fe5a7b64cf25bc2c2d Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 9 Nov 2016 15:36:33 -0800 Subject: bpf: Fix bpf_redirect to an ipip/ip6tnl dev If the bpf program calls bpf_redirect(dev, 0) and dev is an ipip/ip6tnl, it currently includes the mac header. e.g. If dev is ipip, the end result is IP-EthHdr-IP instead of IP-IP. The fix is to pull the mac header. At ingress, skb_postpull_rcsum() is not needed because the ethhdr should have been pulled once already and then got pushed back just before calling the bpf_prog. At egress, this patch calls skb_postpull_rcsum(). If bpf_redirect(dev, BPF_F_INGRESS) is called, it also fails now because it calls dev_forward_skb() which eventually calls eth_type_trans(skb, dev). The eth_type_trans() will set skb->type = PACKET_OTHERHOST because the mac address does not match the redirecting dev->dev_addr. The PACKET_OTHERHOST will eventually cause the ip_rcv() errors out. To fix this, ____dev_forward_skb() is added. Joint work with Daniel Borkmann. Fixes: cfc7381b3002 ("ip_tunnel: add collect_md mode to IPIP tunnel") Fixes: 8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnels") Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/linux/netdevice.h | 15 +++++++++++ net/core/dev.c | 17 +++++------- net/core/filter.c | 68 +++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 81 insertions(+), 19 deletions(-) (limited to 'net/core') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 91ee3643ccc8..bf04a46f6d5b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3354,6 +3354,21 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb); +static __always_inline int ____dev_forward_skb(struct net_device *dev, + struct sk_buff *skb) +{ + if (skb_orphan_frags(skb, GFP_ATOMIC) || + unlikely(!is_skb_forwardable(dev, skb))) { + atomic_long_inc(&dev->rx_dropped); + kfree_skb(skb); + return NET_RX_DROP; + } + + skb_scrub_packet(skb, true); + skb->priority = 0; + return 0; +} + void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); extern int netdev_budget; diff --git a/net/core/dev.c b/net/core/dev.c index eaad4c28069f..6666b28b6815 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1766,19 +1766,14 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb) { - if (skb_orphan_frags(skb, GFP_ATOMIC) || - unlikely(!is_skb_forwardable(dev, skb))) { - atomic_long_inc(&dev->rx_dropped); - kfree_skb(skb); - return NET_RX_DROP; - } + int ret = ____dev_forward_skb(dev, skb); - skb_scrub_packet(skb, true); - skb->priority = 0; - skb->protocol = eth_type_trans(skb, dev); - skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + if (likely(!ret)) { + skb->protocol = eth_type_trans(skb, dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + } - return 0; + return ret; } EXPORT_SYMBOL_GPL(__dev_forward_skb); diff --git a/net/core/filter.c b/net/core/filter.c index 00351cdf7d0c..b391209838ef 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1628,6 +1628,19 @@ static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb) return dev_forward_skb(dev, skb); } +static inline int __bpf_rx_skb_no_mac(struct net_device *dev, + struct sk_buff *skb) +{ + int ret = ____dev_forward_skb(dev, skb); + + if (likely(!ret)) { + skb->dev = dev; + ret = netif_rx(skb); + } + + return ret; +} + static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) { int ret; @@ -1647,6 +1660,51 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) return ret; } +static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev, + u32 flags) +{ + /* skb->mac_len is not set on normal egress */ + unsigned int mlen = skb->network_header - skb->mac_header; + + __skb_pull(skb, mlen); + + /* At ingress, the mac header has already been pulled once. + * At egress, skb_pospull_rcsum has to be done in case that + * the skb is originated from ingress (i.e. a forwarded skb) + * to ensure that rcsum starts at net header. + */ + if (!skb_at_tc_ingress(skb)) + skb_postpull_rcsum(skb, skb_mac_header(skb), mlen); + skb_pop_mac_header(skb); + skb_reset_mac_len(skb); + return flags & BPF_F_INGRESS ? + __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb); +} + +static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev, + u32 flags) +{ + bpf_push_mac_rcsum(skb); + return flags & BPF_F_INGRESS ? + __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb); +} + +static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev, + u32 flags) +{ + switch (dev->type) { + case ARPHRD_TUNNEL: + case ARPHRD_TUNNEL6: + case ARPHRD_SIT: + case ARPHRD_IPGRE: + case ARPHRD_VOID: + case ARPHRD_NONE: + return __bpf_redirect_no_mac(skb, dev, flags); + default: + return __bpf_redirect_common(skb, dev, flags); + } +} + BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) { struct net_device *dev; @@ -1675,10 +1733,7 @@ BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) return -ENOMEM; } - bpf_push_mac_rcsum(clone); - - return flags & BPF_F_INGRESS ? - __bpf_rx_skb(dev, clone) : __bpf_tx_skb(dev, clone); + return __bpf_redirect(clone, dev, flags); } static const struct bpf_func_proto bpf_clone_redirect_proto = { @@ -1722,10 +1777,7 @@ int skb_do_redirect(struct sk_buff *skb) return -EINVAL; } - bpf_push_mac_rcsum(skb); - - return ri->flags & BPF_F_INGRESS ? - __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb); + return __bpf_redirect(skb, dev, ri->flags); } static const struct bpf_func_proto bpf_redirect_proto = { -- cgit v1.2.3-70-g09d2 From 34fad54c2537f7c99d07375e50cb30aa3c23bd83 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Nov 2016 16:04:46 -0800 Subject: net: __skb_flow_dissect() must cap its return value After Tom patch, thoff field could point past the end of the buffer, this could fool some callers. If an skb was provided, skb->len should be the upper limit. If not, hlen is supposed to be the upper limit. Fixes: a6e544b0a88b ("flow_dissector: Jump to exit code in __skb_flow_dissect") Signed-off-by: Eric Dumazet Reported-by: Yibin Yang Acked-by: Willem de Bruijn Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'net/core') diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index ab193e5def07..69e4463a4b1b 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -122,7 +122,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector_key_keyid *key_keyid; bool skip_vlan = false; u8 ip_proto = 0; - bool ret = false; + bool ret; if (!data) { data = skb->data; @@ -549,12 +549,17 @@ ip_proto_again: out_good: ret = true; -out_bad: + key_control->thoff = (u16)nhoff; +out: key_basic->n_proto = proto; key_basic->ip_proto = ip_proto; - key_control->thoff = (u16)nhoff; return ret; + +out_bad: + ret = false; + key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); + goto out; } EXPORT_SYMBOL(__skb_flow_dissect); -- cgit v1.2.3-70-g09d2