From bc8e71314e8444c6315c482441f3204c032ab327 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@mellanox.com>
Date: Sun, 12 Apr 2020 11:45:47 +0300
Subject: netfilter: flowtable: Free block_cb when being deleted

Free block_cb memory when asked to be deleted.

Fixes: 978703f42549 ("netfilter: flowtable: Add API for registering to flow table events")
Signed-off-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_flow_table_core.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index c0cb79495c35..4344e572b7f9 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -421,10 +421,12 @@ void nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
 
 	down_write(&flow_table->flow_block_lock);
 	block_cb = flow_block_cb_lookup(block, cb, cb_priv);
-	if (block_cb)
+	if (block_cb) {
 		list_del(&block_cb->list);
-	else
+		flow_block_cb_free(block_cb);
+	} else {
 		WARN_ON(true);
+	}
 	up_write(&flow_table->flow_block_lock);
 }
 EXPORT_SYMBOL_GPL(nf_flow_table_offload_del_cb);
-- 
cgit v1.2.3-70-g09d2


From d03f228470a8c0a22b774d1f8d47071e0de4f6dd Mon Sep 17 00:00:00 2001
From: Xiyu Yang <xiyuyang19@fudan.edu.cn>
Date: Wed, 15 Apr 2020 16:36:19 +0800
Subject: net: netrom: Fix potential nr_neigh refcnt leak in nr_add_node

nr_add_node() invokes nr_neigh_get_dev(), which returns a local
reference of the nr_neigh object to "nr_neigh" with increased refcnt.

When nr_add_node() returns, "nr_neigh" becomes invalid, so the refcount
should be decreased to keep refcount balanced.

The issue happens in one normal path of nr_add_node(), which forgets to
decrease the refcnt increased by nr_neigh_get_dev() and causes a refcnt
leak. It should decrease the refcnt before the function returns like
other normal paths do.

Fix this issue by calling nr_neigh_put() before the nr_add_node()
returns.

Signed-off-by: Xiyu Yang <xiyuyang19@fudan.edu.cn>
Signed-off-by: Xin Tan <tanxin.ctf@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netrom/nr_route.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 79f12d8c7b86..0891ee02ca4f 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -208,6 +208,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
 		/* refcount initialized at 1 */
 		spin_unlock_bh(&nr_node_list_lock);
 
+		nr_neigh_put(nr_neigh);
 		return 0;
 	}
 	nr_node_lock(nr_node);
-- 
cgit v1.2.3-70-g09d2


From 441870ee4240cf67b5d3ab8e16216a9ff42eb5d6 Mon Sep 17 00:00:00 2001
From: Xiyu Yang <xiyuyang19@fudan.edu.cn>
Date: Wed, 15 Apr 2020 16:39:56 +0800
Subject: tipc: Fix potential tipc_aead refcnt leak in tipc_crypto_rcv

tipc_crypto_rcv() invokes tipc_aead_get(), which returns a reference of
the tipc_aead object to "aead" with increased refcnt.

When tipc_crypto_rcv() returns, the original local reference of "aead"
becomes invalid, so the refcount should be decreased to keep refcount
balanced.

The issue happens in one error path of tipc_crypto_rcv(). When TIPC
message decryption status is EINPROGRESS or EBUSY, the function forgets
to decrease the refcnt increased by tipc_aead_get() and causes a refcnt
leak.

Fix this issue by calling tipc_aead_put() on the error path when TIPC
message decryption status is EINPROGRESS or EBUSY.

Signed-off-by: Xiyu Yang <xiyuyang19@fudan.edu.cn>
Signed-off-by: Xin Tan <tanxin.ctf@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/crypto.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index c8c47fc72653..8c47ded2edb6 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -1712,6 +1712,7 @@ exit:
 	case -EBUSY:
 		this_cpu_inc(stats->stat[STAT_ASYNC]);
 		*skb = NULL;
+		tipc_aead_put(aead);
 		return rc;
 	default:
 		this_cpu_inc(stats->stat[STAT_NOK]);
-- 
cgit v1.2.3-70-g09d2


From de058420767df21e2b6b0f3bb36d1616fb962032 Mon Sep 17 00:00:00 2001
From: Xiyu Yang <xiyuyang19@fudan.edu.cn>
Date: Wed, 15 Apr 2020 16:40:28 +0800
Subject: tipc: Fix potential tipc_node refcnt leak in tipc_rcv

tipc_rcv() invokes tipc_node_find() twice, which returns a reference of
the specified tipc_node object to "n" with increased refcnt.

When tipc_rcv() returns or a new object is assigned to "n", the original
local reference of "n" becomes invalid, so the refcount should be
decreased to keep refcount balanced.

The issue happens in some paths of tipc_rcv(), which forget to decrease
the refcnt increased by tipc_node_find() and will cause a refcnt leak.

Fix this issue by calling tipc_node_put() before the original object
pointed by "n" becomes invalid.

Signed-off-by: Xiyu Yang <xiyuyang19@fudan.edu.cn>
Signed-off-by: Xin Tan <tanxin.ctf@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/node.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/tipc/node.c b/net/tipc/node.c
index 10292c942384..803a3a6d0f50 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -2038,6 +2038,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
 		n = tipc_node_find_by_id(net, ehdr->id);
 	}
 	tipc_crypto_rcv(net, (n) ? n->crypto_rx : NULL, &skb, b);
+	tipc_node_put(n);
 	if (!skb)
 		return;
 
@@ -2090,7 +2091,7 @@ rcv:
 	/* Check/update node state before receiving */
 	if (unlikely(skb)) {
 		if (unlikely(skb_linearize(skb)))
-			goto discard;
+			goto out_node_put;
 		tipc_node_write_lock(n);
 		if (tipc_node_check_state(n, skb, bearer_id, &xmitq)) {
 			if (le->link) {
@@ -2119,6 +2120,7 @@ rcv:
 	if (!skb_queue_empty(&xmitq))
 		tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr, n);
 
+out_node_put:
 	tipc_node_put(n);
 discard:
 	kfree_skb(skb);
-- 
cgit v1.2.3-70-g09d2


From 62e697767fac598518bc687d3e0dafdd7c2f09f5 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Wed, 15 Apr 2020 09:06:53 -0400
Subject: ipv6: rpl: fix full address compression

This patch makes it impossible that cmpri or cmpre values are set to the
value 16 which is not possible, because these are 4 bit values. We
currently run in an overflow when assigning the value 16 to it.

According to the standard a value of 16 can be interpreted as a full
elided address which isn't possible to set as compression value. A reason
why this cannot be set is that the current ipv6 header destination address
should never show up inside the segments of the rpl header. In this case we
run in a overflow and the address will have no compression at all. Means
cmpri or compre is set to 0.

As we handle cmpri and cmpre sometimes as unsigned char or 4 bit value
inside the rpl header the current behaviour ends in an invalid header
format. This patch simple use the best compression method if we ever run
into the case that the destination address is showed up inside the rpl
segments. We avoid the overflow handling and the rpl header is still valid,
even when we have the destination address inside the rpl segments.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/rpl.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/rpl.c b/net/ipv6/rpl.c
index d38b476fc7f2..307f336b5353 100644
--- a/net/ipv6/rpl.c
+++ b/net/ipv6/rpl.c
@@ -8,6 +8,7 @@
 #include <net/rpl.h>
 
 #define IPV6_PFXTAIL_LEN(x) (sizeof(struct in6_addr) - (x))
+#define IPV6_RPL_BEST_ADDR_COMPRESSION 15
 
 static void ipv6_rpl_addr_decompress(struct in6_addr *dst,
 				     const struct in6_addr *daddr,
@@ -73,7 +74,7 @@ static unsigned char ipv6_rpl_srh_calc_cmpri(const struct ipv6_rpl_sr_hdr *inhdr
 		}
 	}
 
-	return plen;
+	return IPV6_RPL_BEST_ADDR_COMPRESSION;
 }
 
 static unsigned char ipv6_rpl_srh_calc_cmpre(const struct in6_addr *daddr,
@@ -83,10 +84,10 @@ static unsigned char ipv6_rpl_srh_calc_cmpre(const struct in6_addr *daddr,
 
 	for (plen = 0; plen < sizeof(*daddr); plen++) {
 		if (daddr->s6_addr[plen] != last_segment->s6_addr[plen])
-			break;
+			return plen;
 	}
 
-	return plen;
+	return IPV6_RPL_BEST_ADDR_COMPRESSION;
 }
 
 void ipv6_rpl_srh_compress(struct ipv6_rpl_sr_hdr *outhdr,
-- 
cgit v1.2.3-70-g09d2


From df1036da90108b1a9969721beab34f4c76228bcc Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 17 Apr 2020 09:28:22 +0200
Subject: mptcp: fix splat when incoming connection is never accepted before
 exit/close

Following snippet (replicated from syzkaller reproducer) generates
warning: "IPv4: Attempt to release TCP socket in state 1".

int main(void) {
 struct sockaddr_in sin1 = { .sin_family = 2, .sin_port = 0x4e20,
                             .sin_addr.s_addr = 0x010000e0, };
 struct sockaddr_in sin2 = { .sin_family = 2,
	                     .sin_addr.s_addr = 0x0100007f, };
 struct sockaddr_in sin3 = { .sin_family = 2, .sin_port = 0x4e20,
	                     .sin_addr.s_addr = 0x0100007f, };
 int r0 = socket(0x2, 0x1, 0x106);
 int r1 = socket(0x2, 0x1, 0x106);

 bind(r1, (void *)&sin1, sizeof(sin1));
 connect(r1, (void *)&sin2, sizeof(sin2));
 listen(r1, 3);
 return connect(r0, (void *)&sin3, 0x4d);
}

Reason is that the newly generated mptcp socket is closed via the ulp
release of the tcp listener socket when its accept backlog gets purged.

To fix this, delay setting the ESTABLISHED state until after userspace
calls accept and via mptcp specific destructor.

Fixes: 58b09919626bf ("mptcp: create msk early")
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/9
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c |  1 +
 net/mptcp/subflow.c  | 25 ++++++++++++++++++++++++-
 2 files changed, 25 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 9936e33ac351..1c8b021b4537 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1431,6 +1431,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
 		newsk = new_mptcp_sock;
 		mptcp_copy_inaddrs(newsk, ssk);
 		list_add(&subflow->node, &msk->conn_list);
+		inet_sk_state_store(newsk, TCP_ESTABLISHED);
 
 		bh_unlock_sock(new_mptcp_sock);
 
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 50a8bea987c6..57a836fe4988 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -347,6 +347,29 @@ static bool subflow_hmac_valid(const struct request_sock *req,
 	return ret;
 }
 
+static void mptcp_sock_destruct(struct sock *sk)
+{
+	/* if new mptcp socket isn't accepted, it is free'd
+	 * from the tcp listener sockets request queue, linked
+	 * from req->sk.  The tcp socket is released.
+	 * This calls the ULP release function which will
+	 * also remove the mptcp socket, via
+	 * sock_put(ctx->conn).
+	 *
+	 * Problem is that the mptcp socket will not be in
+	 * SYN_RECV state and doesn't have SOCK_DEAD flag.
+	 * Both result in warnings from inet_sock_destruct.
+	 */
+
+	if (sk->sk_state == TCP_SYN_RECV) {
+		sk->sk_state = TCP_CLOSE;
+		WARN_ON_ONCE(sk->sk_socket);
+		sock_orphan(sk);
+	}
+
+	inet_sock_destruct(sk);
+}
+
 static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 					  struct sk_buff *skb,
 					  struct request_sock *req,
@@ -422,7 +445,7 @@ create_child:
 			/* new mpc subflow takes ownership of the newly
 			 * created mptcp socket
 			 */
-			inet_sk_state_store(new_msk, TCP_ESTABLISHED);
+			new_msk->sk_destruct = mptcp_sock_destruct;
 			mptcp_pm_new_connection(mptcp_sk(new_msk), 1);
 			ctx->conn = new_msk;
 			new_msk = NULL;
-- 
cgit v1.2.3-70-g09d2


From 9f5ca6a59816b406230adc440b6bb684fda90abe Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 17 Apr 2020 09:28:23 +0200
Subject: mptcp: fix 'Attempt to release TCP socket in state' warnings

We need to set sk_state to CLOSED, else we will get following:

IPv4: Attempt to release TCP socket in state 3 00000000b95f109e
IPv4: Attempt to release TCP socket in state 10 00000000b95f109e

First one is from inet_sock_destruct(), second one from
mptcp_sk_clone failure handling.  Setting sk_state to CLOSED isn't
enough, we also need to orphan sk so it has DEAD flag set.
Otherwise, a very similar warning is printed from inet_sock_destruct().

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 7 +++++--
 net/mptcp/subflow.c  | 8 +++++++-
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 1c8b021b4537..7e816c733ccb 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1355,12 +1355,15 @@ struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req)
 	msk->subflow = NULL;
 
 	if (unlikely(mptcp_token_new_accept(subflow_req->token, nsk))) {
+		nsk->sk_state = TCP_CLOSE;
 		bh_unlock_sock(nsk);
 
 		/* we can't call into mptcp_close() here - possible BH context
-		 * free the sock directly
+		 * free the sock directly.
+		 * sk_clone_lock() sets nsk refcnt to two, hence call sk_free()
+		 * too.
 		 */
-		nsk->sk_prot->destroy(nsk);
+		sk_common_release(nsk);
 		sk_free(nsk);
 		return NULL;
 	}
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 57a836fe4988..bc46b5091b9d 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -370,6 +370,12 @@ static void mptcp_sock_destruct(struct sock *sk)
 	inet_sock_destruct(sk);
 }
 
+static void mptcp_force_close(struct sock *sk)
+{
+	inet_sk_state_store(sk, TCP_CLOSE);
+	sk_common_release(sk);
+}
+
 static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 					  struct sk_buff *skb,
 					  struct request_sock *req,
@@ -467,7 +473,7 @@ create_child:
 out:
 	/* dispose of the left over mptcp master, if any */
 	if (unlikely(new_msk))
-		sock_put(new_msk);
+		mptcp_force_close(new_msk);
 	return child;
 
 close_child:
-- 
cgit v1.2.3-70-g09d2


From b4faef1739dd1f3b3981b8bf173a2266ea86b1eb Mon Sep 17 00:00:00 2001
From: Hillf Danton <hdanton@sina.com>
Date: Sat, 18 Apr 2020 16:28:32 +0800
Subject: netfilter: nat: fix error handling upon registering inet hook

A case of warning was reported by syzbot.

------------[ cut here ]------------
WARNING: CPU: 0 PID: 19934 at net/netfilter/nf_nat_core.c:1106
nf_nat_unregister_fn+0x532/0x5c0 net/netfilter/nf_nat_core.c:1106
Kernel panic - not syncing: panic_on_warn set ...
CPU: 0 PID: 19934 Comm: syz-executor.5 Not tainted 5.6.0-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x188/0x20d lib/dump_stack.c:118
 panic+0x2e3/0x75c kernel/panic.c:221
 __warn.cold+0x2f/0x35 kernel/panic.c:582
 report_bug+0x27b/0x2f0 lib/bug.c:195
 fixup_bug arch/x86/kernel/traps.c:175 [inline]
 fixup_bug arch/x86/kernel/traps.c:170 [inline]
 do_error_trap+0x12b/0x220 arch/x86/kernel/traps.c:267
 do_invalid_op+0x32/0x40 arch/x86/kernel/traps.c:286
 invalid_op+0x23/0x30 arch/x86/entry/entry_64.S:1027
RIP: 0010:nf_nat_unregister_fn+0x532/0x5c0 net/netfilter/nf_nat_core.c:1106
Code: ff df 48 c1 ea 03 80 3c 02 00 75 75 48 8b 44 24 10 4c 89 ef 48 c7 00 00 00 00 00 e8 e8 f8 53 fb e9 4d fe ff ff e8 ee 9c 16 fb <0f> 0b e9 41 fe ff ff e8 e2 45 54 fb e9 b5 fd ff ff 48 8b 7c 24 20
RSP: 0018:ffffc90005487208 EFLAGS: 00010246
RAX: 0000000000040000 RBX: 0000000000000004 RCX: ffffc9001444a000
RDX: 0000000000040000 RSI: ffffffff865c94a2 RDI: 0000000000000005
RBP: ffff88808b5cf000 R08: ffff8880a2620140 R09: fffffbfff14bcd79
R10: ffffc90005487208 R11: fffffbfff14bcd78 R12: 0000000000000000
R13: 0000000000000001 R14: 0000000000000001 R15: 0000000000000000
 nf_nat_ipv6_unregister_fn net/netfilter/nf_nat_proto.c:1017 [inline]
 nf_nat_inet_register_fn net/netfilter/nf_nat_proto.c:1038 [inline]
 nf_nat_inet_register_fn+0xfc/0x140 net/netfilter/nf_nat_proto.c:1023
 nf_tables_register_hook net/netfilter/nf_tables_api.c:224 [inline]
 nf_tables_addchain.constprop.0+0x82e/0x13c0 net/netfilter/nf_tables_api.c:1981
 nf_tables_newchain+0xf68/0x16a0 net/netfilter/nf_tables_api.c:2235
 nfnetlink_rcv_batch+0x83a/0x1610 net/netfilter/nfnetlink.c:433
 nfnetlink_rcv_skb_batch net/netfilter/nfnetlink.c:543 [inline]
 nfnetlink_rcv+0x3af/0x420 net/netfilter/nfnetlink.c:561
 netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline]
 netlink_unicast+0x537/0x740 net/netlink/af_netlink.c:1329
 netlink_sendmsg+0x882/0xe10 net/netlink/af_netlink.c:1918
 sock_sendmsg_nosec net/socket.c:652 [inline]
 sock_sendmsg+0xcf/0x120 net/socket.c:672
 ____sys_sendmsg+0x6bf/0x7e0 net/socket.c:2362
 ___sys_sendmsg+0x100/0x170 net/socket.c:2416
 __sys_sendmsg+0xec/0x1b0 net/socket.c:2449
 do_syscall_64+0xf6/0x7d0 arch/x86/entry/common.c:295
 entry_SYSCALL_64_after_hwframe+0x49/0xb3

and to quiesce it, unregister NFPROTO_IPV6 hook instead of NFPROTO_INET
in case of failing to register NFPROTO_IPV4 hook.

Reported-by: syzbot <syzbot+33e06702fd6cffc24c40@syzkaller.appspotmail.com>
Fixes: d164385ec572 ("netfilter: nat: add inet family nat support")
Cc: Florian Westphal <fw@strlen.de>
Cc: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: Hillf Danton <hdanton@sina.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_nat_proto.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
index 64eedc17037a..3d816a1e5442 100644
--- a/net/netfilter/nf_nat_proto.c
+++ b/net/netfilter/nf_nat_proto.c
@@ -1035,8 +1035,8 @@ int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops)
 	ret = nf_nat_register_fn(net, NFPROTO_IPV4, ops, nf_nat_ipv4_ops,
 				 ARRAY_SIZE(nf_nat_ipv4_ops));
 	if (ret)
-		nf_nat_ipv6_unregister_fn(net, ops);
-
+		nf_nat_unregister_fn(net, NFPROTO_IPV6, ops,
+					ARRAY_SIZE(nf_nat_ipv6_ops));
 	return ret;
 }
 EXPORT_SYMBOL_GPL(nf_nat_inet_register_fn);
-- 
cgit v1.2.3-70-g09d2


From 27de77cec985233bdf6546437b9761853265c505 Mon Sep 17 00:00:00 2001
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Date: Fri, 17 Apr 2020 02:57:31 +0800
Subject: net: openvswitch: ovs_ct_exit to be done under ovs_lock

syzbot wrote:
| =============================
| WARNING: suspicious RCU usage
| 5.7.0-rc1+ #45 Not tainted
| -----------------------------
| net/openvswitch/conntrack.c:1898 RCU-list traversed in non-reader section!!
|
| other info that might help us debug this:
| rcu_scheduler_active = 2, debug_locks = 1
| ...
|
| stack backtrace:
| Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014
| Workqueue: netns cleanup_net
| Call Trace:
| ...
| ovs_ct_exit
| ovs_exit_net
| ops_exit_list.isra.7
| cleanup_net
| process_one_work
| worker_thread

To avoid that warning, invoke the ovs_ct_exit under ovs_lock and add
lockdep_ovsl_is_held as optional lockdep expression.

Link: https://lore.kernel.org/lkml/000000000000e642a905a0cbee6e@google.com
Fixes: 11efd5cb04a1 ("openvswitch: Support conntrack zone limit")
Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Yi-Hung Wei <yihung.wei@gmail.com>
Reported-by: syzbot+7ef50afd3a211f879112@syzkaller.appspotmail.com
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/conntrack.c | 3 ++-
 net/openvswitch/datapath.c  | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index e726159cfcfa..4340f25fe390 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1895,7 +1895,8 @@ static void ovs_ct_limit_exit(struct net *net, struct ovs_net *ovs_net)
 		struct hlist_head *head = &info->limits[i];
 		struct ovs_ct_limit *ct_limit;
 
-		hlist_for_each_entry_rcu(ct_limit, head, hlist_node)
+		hlist_for_each_entry_rcu(ct_limit, head, hlist_node,
+					 lockdep_ovsl_is_held())
 			kfree_rcu(ct_limit, rcu);
 	}
 	kfree(ovs_net->ct_limit_info->limits);
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index d8ae541d22a8..94b024534987 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -2466,8 +2466,10 @@ static void __net_exit ovs_exit_net(struct net *dnet)
 	struct net *net;
 	LIST_HEAD(head);
 
-	ovs_ct_exit(dnet);
 	ovs_lock();
+
+	ovs_ct_exit(dnet);
+
 	list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
 		__dp_destroy(dp);
 
-- 
cgit v1.2.3-70-g09d2


From 82c9ae440857840c56e05d4fb1427ee032531346 Mon Sep 17 00:00:00 2001
From: John Haxby <john.haxby@oracle.com>
Date: Sat, 18 Apr 2020 16:30:49 +0100
Subject: ipv6: fix restrict IPV6_ADDRFORM operation

Commit b6f6118901d1 ("ipv6: restrict IPV6_ADDRFORM operation") fixed a
problem found by syzbot an unfortunate logic error meant that it
also broke IPV6_ADDRFORM.

Rearrange the checks so that the earlier test is just one of the series
of checks made before moving the socket from IPv6 to IPv4.

Fixes: b6f6118901d1 ("ipv6: restrict IPV6_ADDRFORM operation")
Signed-off-by: John Haxby <john.haxby@oracle.com>
Cc: stable@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ipv6_sockglue.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index debdaeba5d8c..18d05403d3b5 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -183,15 +183,14 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 					retv = -EBUSY;
 					break;
 				}
-			} else if (sk->sk_protocol == IPPROTO_TCP) {
-				if (sk->sk_prot != &tcpv6_prot) {
-					retv = -EBUSY;
-					break;
-				}
-				break;
-			} else {
+			}
+			if (sk->sk_protocol == IPPROTO_TCP &&
+			    sk->sk_prot != &tcpv6_prot) {
+				retv = -EBUSY;
 				break;
 			}
+			if (sk->sk_protocol != IPPROTO_TCP)
+				break;
 			if (sk->sk_state != TCP_ESTABLISHED) {
 				retv = -ENOTCONN;
 				break;
-- 
cgit v1.2.3-70-g09d2


From 5e20087d1b678965ae9df01eed03efedc1aef9f8 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 20 Apr 2020 16:25:04 +0200
Subject: mptcp: handle mptcp listener destruction via rcu

Following splat can occur during self test:

 BUG: KASAN: use-after-free in subflow_data_ready+0x156/0x160
 Read of size 8 at addr ffff888100c35c28 by task mptcp_connect/4808

  subflow_data_ready+0x156/0x160
  tcp_child_process+0x6a3/0xb30
  tcp_v4_rcv+0x2231/0x3730
  ip_protocol_deliver_rcu+0x5c/0x860
  ip_local_deliver_finish+0x220/0x360
  ip_local_deliver+0x1c8/0x4e0
  ip_rcv_finish+0x1da/0x2f0
  ip_rcv+0xd0/0x3c0
  __netif_receive_skb_one_core+0xf5/0x160
  __netif_receive_skb+0x27/0x1c0
  process_backlog+0x21e/0x780
  net_rx_action+0x35f/0xe90
  do_softirq+0x4c/0x50
  [..]

This occurs when accessing subflow_ctx->conn.

Problem is that tcp_child_process() calls listen sockets'
sk_data_ready() notification, but it doesn't hold the listener
lock.  Another cpu calling close() on the listener will then cause
transition of refcount to 0.

Fixes: 58b09919626bf ("mptcp: create msk early")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 7e816c733ccb..b57974a6b6cc 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1378,6 +1378,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req)
 		msk->ack_seq = ack_seq;
 	}
 
+	sock_reset_flag(nsk, SOCK_RCU_FREE);
 	/* will be fully established after successful MPC subflow creation */
 	inet_sk_state_store(nsk, TCP_SYN_RECV);
 	bh_unlock_sock(nsk);
@@ -1779,6 +1780,8 @@ static int mptcp_listen(struct socket *sock, int backlog)
 		goto unlock;
 	}
 
+	sock_set_flag(sock->sk, SOCK_RCU_FREE);
+
 	err = ssock->ops->listen(ssock, backlog);
 	inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
 	if (!err)
-- 
cgit v1.2.3-70-g09d2


From 4c8941de781cf71388d1490c6b85a02d1cec1ef4 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 20 Apr 2020 16:25:05 +0200
Subject: mptcp: avoid flipping mp_capable field in syn_recv_sock()

If multiple CPUs races on the same req_sock in syn_recv_sock(),
flipping such field can cause inconsistent child socket status.

When racing, the CPU losing the req ownership may still change
the mptcp request socket mp_capable flag while the CPU owning
the request is cloning the socket, leaving the child socket with
'is_mptcp' set but no 'mp_capable' flag.

Such socket will stay with 'conn' field cleared, heading to oops
in later mptcp callback.

Address the issue tracking the fallback status in a local variable.

Fixes: 58b09919626b ("mptcp: create msk early")
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/subflow.c | 46 ++++++++++++++++++++++++++++++----------------
 1 file changed, 30 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index bc46b5091b9d..4fa190368507 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -376,6 +376,17 @@ static void mptcp_force_close(struct sock *sk)
 	sk_common_release(sk);
 }
 
+static void subflow_ulp_fallback(struct sock *sk,
+				 struct mptcp_subflow_context *old_ctx)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+
+	mptcp_subflow_tcp_fallback(sk, old_ctx);
+	icsk->icsk_ulp_ops = NULL;
+	rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
+	tcp_sk(sk)->is_mptcp = 0;
+}
+
 static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 					  struct sk_buff *skb,
 					  struct request_sock *req,
@@ -388,6 +399,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 	struct tcp_options_received opt_rx;
 	bool fallback_is_fatal = false;
 	struct sock *new_msk = NULL;
+	bool fallback = false;
 	struct sock *child;
 
 	pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
@@ -412,14 +424,14 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 			subflow_req->remote_key = opt_rx.mptcp.sndr_key;
 			subflow_req->remote_key_valid = 1;
 		} else {
-			subflow_req->mp_capable = 0;
+			fallback = true;
 			goto create_child;
 		}
 
 create_msk:
 		new_msk = mptcp_sk_clone(listener->conn, req);
 		if (!new_msk)
-			subflow_req->mp_capable = 0;
+			fallback = true;
 	} else if (subflow_req->mp_join) {
 		fallback_is_fatal = true;
 		opt_rx.mptcp.mp_join = 0;
@@ -438,12 +450,18 @@ create_child:
 	if (child && *own_req) {
 		struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child);
 
-		/* we have null ctx on TCP fallback, which is fatal on
-		 * MPJ handshake
+		/* we need to fallback on ctx allocation failure and on pre-reqs
+		 * checking above. In the latter scenario we additionally need
+		 * to reset the context to non MPTCP status.
 		 */
-		if (!ctx) {
+		if (!ctx || fallback) {
 			if (fallback_is_fatal)
 				goto close_child;
+
+			if (ctx) {
+				subflow_ulp_fallback(child, ctx);
+				kfree_rcu(ctx, rcu);
+			}
 			goto out;
 		}
 
@@ -474,6 +492,13 @@ out:
 	/* dispose of the left over mptcp master, if any */
 	if (unlikely(new_msk))
 		mptcp_force_close(new_msk);
+
+	/* check for expected invariant - should never trigger, just help
+	 * catching eariler subtle bugs
+	 */
+	WARN_ON_ONCE(*own_req && child && tcp_sk(child)->is_mptcp &&
+		     (!mptcp_subflow_ctx(child) ||
+		      !mptcp_subflow_ctx(child)->conn));
 	return child;
 
 close_child:
@@ -1076,17 +1101,6 @@ static void subflow_ulp_release(struct sock *sk)
 	kfree_rcu(ctx, rcu);
 }
 
-static void subflow_ulp_fallback(struct sock *sk,
-				 struct mptcp_subflow_context *old_ctx)
-{
-	struct inet_connection_sock *icsk = inet_csk(sk);
-
-	mptcp_subflow_tcp_fallback(sk, old_ctx);
-	icsk->icsk_ulp_ops = NULL;
-	rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
-	tcp_sk(sk)->is_mptcp = 0;
-}
-
 static void subflow_ulp_clone(const struct request_sock *req,
 			      struct sock *newsk,
 			      const gfp_t priority)
-- 
cgit v1.2.3-70-g09d2


From fca5c82c086ea3871b103618b80558c479c8e597 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 20 Apr 2020 16:25:06 +0200
Subject: mptcp: drop req socket remote_key* fields

We don't need them, as we can use the current ingress opt
data instead. Setting them in syn_recv_sock() may causes
inconsistent mptcp socket status, as per previous commit.

Fixes: cc7972ea1932 ("mptcp: parse and emit MP_CAPABLE option according to v1 spec")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c |  8 +++++---
 net/mptcp/protocol.h |  8 ++++----
 net/mptcp/subflow.c  | 20 ++++++++++----------
 3 files changed, 19 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index b57974a6b6cc..b22a63ba2348 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1332,7 +1332,9 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
 }
 #endif
 
-struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req)
+struct sock *mptcp_sk_clone(const struct sock *sk,
+			    const struct tcp_options_received *opt_rx,
+			    struct request_sock *req)
 {
 	struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
 	struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
@@ -1370,9 +1372,9 @@ struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req)
 
 	msk->write_seq = subflow_req->idsn + 1;
 	atomic64_set(&msk->snd_una, msk->write_seq);
-	if (subflow_req->remote_key_valid) {
+	if (opt_rx->mptcp.mp_capable) {
 		msk->can_ack = true;
-		msk->remote_key = subflow_req->remote_key;
+		msk->remote_key = opt_rx->mptcp.sndr_key;
 		mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
 		ack_seq++;
 		msk->ack_seq = ack_seq;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 67448002a2d7..a2b3048037d0 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -206,12 +206,10 @@ struct mptcp_subflow_request_sock {
 	struct	tcp_request_sock sk;
 	u16	mp_capable : 1,
 		mp_join : 1,
-		backup : 1,
-		remote_key_valid : 1;
+		backup : 1;
 	u8	local_id;
 	u8	remote_id;
 	u64	local_key;
-	u64	remote_key;
 	u64	idsn;
 	u32	token;
 	u32	ssn_offset;
@@ -332,7 +330,9 @@ void mptcp_proto_init(void);
 int mptcp_proto_v6_init(void);
 #endif
 
-struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req);
+struct sock *mptcp_sk_clone(const struct sock *sk,
+			    const struct tcp_options_received *opt_rx,
+			    struct request_sock *req);
 void mptcp_get_options(const struct sk_buff *skb,
 		       struct tcp_options_received *opt_rx);
 
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 4fa190368507..fabd06f2ff45 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -133,7 +133,6 @@ static void subflow_init_req(struct request_sock *req,
 
 	subflow_req->mp_capable = 0;
 	subflow_req->mp_join = 0;
-	subflow_req->remote_key_valid = 0;
 
 #ifdef CONFIG_TCP_MD5SIG
 	/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
@@ -404,6 +403,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 
 	pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
 
+	opt_rx.mptcp.mp_capable = 0;
 	if (tcp_rsk(req)->is_mptcp == 0)
 		goto create_child;
 
@@ -418,18 +418,14 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 			goto create_msk;
 		}
 
-		opt_rx.mptcp.mp_capable = 0;
 		mptcp_get_options(skb, &opt_rx);
-		if (opt_rx.mptcp.mp_capable) {
-			subflow_req->remote_key = opt_rx.mptcp.sndr_key;
-			subflow_req->remote_key_valid = 1;
-		} else {
+		if (!opt_rx.mptcp.mp_capable) {
 			fallback = true;
 			goto create_child;
 		}
 
 create_msk:
-		new_msk = mptcp_sk_clone(listener->conn, req);
+		new_msk = mptcp_sk_clone(listener->conn, &opt_rx, req);
 		if (!new_msk)
 			fallback = true;
 	} else if (subflow_req->mp_join) {
@@ -473,6 +469,13 @@ create_child:
 			mptcp_pm_new_connection(mptcp_sk(new_msk), 1);
 			ctx->conn = new_msk;
 			new_msk = NULL;
+
+			/* with OoO packets we can reach here without ingress
+			 * mpc option
+			 */
+			ctx->remote_key = opt_rx.mptcp.sndr_key;
+			ctx->fully_established = opt_rx.mptcp.mp_capable;
+			ctx->can_ack = opt_rx.mptcp.mp_capable;
 		} else if (ctx->mp_join) {
 			struct mptcp_sock *owner;
 
@@ -1134,9 +1137,6 @@ static void subflow_ulp_clone(const struct request_sock *req,
 		 * is fully established only after we receive the remote key
 		 */
 		new_ctx->mp_capable = 1;
-		new_ctx->fully_established = subflow_req->remote_key_valid;
-		new_ctx->can_ack = subflow_req->remote_key_valid;
-		new_ctx->remote_key = subflow_req->remote_key;
 		new_ctx->local_key = subflow_req->local_key;
 		new_ctx->token = subflow_req->token;
 		new_ctx->ssn_offset = subflow_req->ssn_offset;
-- 
cgit v1.2.3-70-g09d2


From 526f3d96b8f83b1b13d73bd0b5c79cc2c487ec8e Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Mon, 20 Apr 2020 09:04:24 +0200
Subject: cgroup, netclassid: remove double cond_resched

Commit 018d26fcd12a ("cgroup, netclassid: periodically release file_lock
on classid") added a second cond_resched to write_classid indirectly by
update_classid_task. Remove the one in write_classid.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: Dmitry Yakunin <zeil@yandex-team.ru>
Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netclassid_cgroup.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index b4c87fe31be2..41b24cd31562 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -127,10 +127,8 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
 	cs->classid = (u32)value;
 
 	css_task_iter_start(css, 0, &it);
-	while ((p = css_task_iter_next(&it))) {
+	while ((p = css_task_iter_next(&it)))
 		update_classid_task(p, cs->classid);
-		cond_resched();
-	}
 	css_task_iter_end(&it);
 
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 0c922a4850eba2e668f73a3f1153196e09abb251 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 20 Apr 2020 17:13:51 -0600
Subject: xfrm: Always set XFRM_TRANSFORMED in xfrm{4,6}_output_finish

IPSKB_XFRM_TRANSFORMED and IP6SKB_XFRM_TRANSFORMED are skb flags set by
xfrm code to tell other skb handlers that the packet has been passed
through the xfrm output functions. Simplify the code and just always
set them rather than conditionally based on netfilter enabled thus
making the flag available for other users.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_output.c | 2 --
 net/ipv6/xfrm6_output.c | 2 --
 2 files changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 89ba7c87de5d..30ddb9dc9398 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -58,9 +58,7 @@ int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb)
 {
 	memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 
-#ifdef CONFIG_NETFILTER
 	IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
-#endif
 
 	return xfrm_output(sk, skb);
 }
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index fbe51d40bd7e..e34167f790e6 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -111,9 +111,7 @@ int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb)
 {
 	memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
 
-#ifdef CONFIG_NETFILTER
 	IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
-#endif
 
 	return xfrm_output(sk, skb);
 }
-- 
cgit v1.2.3-70-g09d2


From a1211bf9a7774706722ba3b18c6157d980319f79 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 21 Apr 2020 10:00:28 -0700
Subject: sched: etf: do not assume all sockets are full blown

skb->sk does not always point to a full blown socket,
we need to use sk_fullsock() before accessing fields which
only make sense on full socket.

BUG: KASAN: use-after-free in report_sock_error+0x286/0x300 net/sched/sch_etf.c:141
Read of size 1 at addr ffff88805eb9b245 by task syz-executor.5/9630

CPU: 1 PID: 9630 Comm: syz-executor.5 Not tainted 5.7.0-rc2-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 <IRQ>
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x188/0x20d lib/dump_stack.c:118
 print_address_description.constprop.0.cold+0xd3/0x315 mm/kasan/report.c:382
 __kasan_report.cold+0x35/0x4d mm/kasan/report.c:511
 kasan_report+0x33/0x50 mm/kasan/common.c:625
 report_sock_error+0x286/0x300 net/sched/sch_etf.c:141
 etf_enqueue_timesortedlist+0x389/0x740 net/sched/sch_etf.c:170
 __dev_xmit_skb net/core/dev.c:3710 [inline]
 __dev_queue_xmit+0x154a/0x30a0 net/core/dev.c:4021
 neigh_hh_output include/net/neighbour.h:499 [inline]
 neigh_output include/net/neighbour.h:508 [inline]
 ip6_finish_output2+0xfb5/0x25b0 net/ipv6/ip6_output.c:117
 __ip6_finish_output+0x442/0xab0 net/ipv6/ip6_output.c:143
 ip6_finish_output+0x34/0x1f0 net/ipv6/ip6_output.c:153
 NF_HOOK_COND include/linux/netfilter.h:296 [inline]
 ip6_output+0x239/0x810 net/ipv6/ip6_output.c:176
 dst_output include/net/dst.h:435 [inline]
 NF_HOOK include/linux/netfilter.h:307 [inline]
 NF_HOOK include/linux/netfilter.h:301 [inline]
 ip6_xmit+0xe1a/0x2090 net/ipv6/ip6_output.c:280
 tcp_v6_send_synack+0x4e7/0x960 net/ipv6/tcp_ipv6.c:521
 tcp_rtx_synack+0x10d/0x1a0 net/ipv4/tcp_output.c:3916
 inet_rtx_syn_ack net/ipv4/inet_connection_sock.c:669 [inline]
 reqsk_timer_handler+0x4c2/0xb40 net/ipv4/inet_connection_sock.c:763
 call_timer_fn+0x1ac/0x780 kernel/time/timer.c:1405
 expire_timers kernel/time/timer.c:1450 [inline]
 __run_timers kernel/time/timer.c:1774 [inline]
 __run_timers kernel/time/timer.c:1741 [inline]
 run_timer_softirq+0x623/0x1600 kernel/time/timer.c:1787
 __do_softirq+0x26c/0x9f7 kernel/softirq.c:292
 invoke_softirq kernel/softirq.c:373 [inline]
 irq_exit+0x192/0x1d0 kernel/softirq.c:413
 exiting_irq arch/x86/include/asm/apic.h:546 [inline]
 smp_apic_timer_interrupt+0x19e/0x600 arch/x86/kernel/apic/apic.c:1140
 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:829
 </IRQ>
RIP: 0010:des_encrypt+0x157/0x9c0 lib/crypto/des.c:792
Code: 85 22 06 00 00 41 31 dc 41 8b 4d 04 44 89 e2 41 83 e4 3f 4a 8d 3c a5 60 72 72 88 81 e2 3f 3f 3f 3f 48 89 f8 48 c1 e8 03 31 d9 <0f> b6 34 28 48 89 f8 c1 c9 04 83 e0 07 83 c0 03 40 38 f0 7c 09 40
RSP: 0018:ffffc90003b5f6c0 EFLAGS: 00000282 ORIG_RAX: ffffffffffffff13
RAX: 1ffffffff10e4e55 RBX: 00000000d2f846d0 RCX: 00000000d2f846d0
RDX: 0000000012380612 RSI: ffffffff839863ca RDI: ffffffff887272a8
RBP: dffffc0000000000 R08: ffff888091d0a380 R09: 0000000000800081
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000012
R13: ffff8880a8ae8078 R14: 00000000c545c93e R15: 0000000000000006
 cipher_crypt_one crypto/cipher.c:75 [inline]
 crypto_cipher_encrypt_one+0x124/0x210 crypto/cipher.c:82
 crypto_cbcmac_digest_update+0x1b5/0x250 crypto/ccm.c:830
 crypto_shash_update+0xc4/0x120 crypto/shash.c:119
 shash_ahash_update+0xa3/0x110 crypto/shash.c:246
 crypto_ahash_update include/crypto/hash.h:547 [inline]
 hash_sendmsg+0x518/0xad0 crypto/algif_hash.c:102
 sock_sendmsg_nosec net/socket.c:652 [inline]
 sock_sendmsg+0xcf/0x120 net/socket.c:672
 ____sys_sendmsg+0x308/0x7e0 net/socket.c:2362
 ___sys_sendmsg+0x100/0x170 net/socket.c:2416
 __sys_sendmmsg+0x195/0x480 net/socket.c:2506
 __do_sys_sendmmsg net/socket.c:2535 [inline]
 __se_sys_sendmmsg net/socket.c:2532 [inline]
 __x64_sys_sendmmsg+0x99/0x100 net/socket.c:2532
 do_syscall_64+0xf6/0x7d0 arch/x86/entry/common.c:295
 entry_SYSCALL_64_after_hwframe+0x49/0xb3
RIP: 0033:0x45c829
Code: 0d b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 db b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007f6d9528ec78 EFLAGS: 00000246 ORIG_RAX: 0000000000000133
RAX: ffffffffffffffda RBX: 00000000004fc080 RCX: 000000000045c829
RDX: 0000000000000001 RSI: 0000000020002640 RDI: 0000000000000004
RBP: 000000000078bf00 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff
R13: 00000000000008d7 R14: 00000000004cb7aa R15: 00007f6d9528f6d4

Fixes: 4b15c7075352 ("net/sched: Make etf report drops on error_queue")
Fixes: 25db26a91364 ("net/sched: Introduce the ETF Qdisc")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Reviewed-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_etf.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
index b1da5589a0c6..c48f91075b5c 100644
--- a/net/sched/sch_etf.c
+++ b/net/sched/sch_etf.c
@@ -82,7 +82,7 @@ static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb)
 	if (q->skip_sock_check)
 		goto skip;
 
-	if (!sk)
+	if (!sk || !sk_fullsock(sk))
 		return false;
 
 	if (!sock_flag(sk, SOCK_TXTIME))
@@ -137,8 +137,9 @@ static void report_sock_error(struct sk_buff *skb, u32 err, u8 code)
 	struct sock_exterr_skb *serr;
 	struct sk_buff *clone;
 	ktime_t txtime = skb->tstamp;
+	struct sock *sk = skb->sk;
 
-	if (!skb->sk || !(skb->sk->sk_txtime_report_errors))
+	if (!sk || !sk_fullsock(sk) || !(sk->sk_txtime_report_errors))
 		return;
 
 	clone = skb_clone(skb, GFP_ATOMIC);
@@ -154,7 +155,7 @@ static void report_sock_error(struct sk_buff *skb, u32 err, u8 code)
 	serr->ee.ee_data = (txtime >> 32); /* high part of tstamp */
 	serr->ee.ee_info = txtime; /* low part of tstamp */
 
-	if (sock_queue_err_skb(skb->sk, clone))
+	if (sock_queue_err_skb(sk, clone))
 		kfree_skb(clone);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 72579e14a1d3d3d561039dfe7e5f47aaf22e3fd3 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 21 Apr 2020 20:18:53 +0300
Subject: net: dsa: don't fail to probe if we couldn't set the MTU

There is no reason to fail the probing of the switch if the MTU couldn't
be configured correctly (either the switch port itself, or the host
port) for whatever reason. MTU-sized traffic probably won't work, sure,
but we can still probably limp on and support some form of communication
anyway, which the users would probably appreciate more.

Fixes: bfcb813203e6 ("net: dsa: configure the MTU for switch ports")
Reported-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/slave.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index e94eb1aac602..d1068803cd11 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1770,11 +1770,9 @@ int dsa_slave_create(struct dsa_port *port)
 	rtnl_lock();
 	ret = dsa_slave_change_mtu(slave_dev, ETH_DATA_LEN);
 	rtnl_unlock();
-	if (ret && ret != -EOPNOTSUPP) {
-		dev_err(ds->dev, "error %d setting MTU on port %d\n",
-			ret, port->index);
-		goto out_free;
-	}
+	if (ret)
+		dev_warn(ds->dev, "nonfatal error %d setting MTU on port %d\n",
+			 ret, port->index);
 
 	netif_carrier_off(slave_dev);
 
-- 
cgit v1.2.3-70-g09d2


From 145cb2f7177d94bc54563ed26027e952ee0ae03c Mon Sep 17 00:00:00 2001
From: Jere Leppänen <jere.leppanen@nokia.com>
Date: Tue, 21 Apr 2020 22:03:41 +0300
Subject: sctp: Fix bundling of SHUTDOWN with COOKIE-ACK
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When we start shutdown in sctp_sf_do_dupcook_a(), we want to bundle
the SHUTDOWN with the COOKIE-ACK to ensure that the peer receives them
at the same time and in the correct order. This bundling was broken by
commit 4ff40b86262b ("sctp: set chunk transport correctly when it's a
new asoc"), which assigns a transport for the COOKIE-ACK, but not for
the SHUTDOWN.

Fix this by passing a reference to the COOKIE-ACK chunk as an argument
to sctp_sf_do_9_2_start_shutdown() and onward to
sctp_make_shutdown(). This way the SHUTDOWN chunk is assigned the same
transport as the COOKIE-ACK chunk, which allows them to be bundled.

In sctp_sf_do_9_2_start_shutdown(), the void *arg parameter was
previously unused. Now that we're taking it into use, it must be a
valid pointer to a chunk, or NULL. There is only one call site where
it's not, in sctp_sf_autoclose_timer_expire(). Fix that too.

Fixes: 4ff40b86262b ("sctp: set chunk transport correctly when it's a new asoc")
Signed-off-by: Jere Leppänen <jere.leppanen@nokia.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_statefuns.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 6a16af4b1ef6..26788f4a3b9e 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1865,7 +1865,7 @@ static enum sctp_disposition sctp_sf_do_dupcook_a(
 		 */
 		sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
 		return sctp_sf_do_9_2_start_shutdown(net, ep, asoc,
-						     SCTP_ST_CHUNK(0), NULL,
+						     SCTP_ST_CHUNK(0), repl,
 						     commands);
 	} else {
 		sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
@@ -5470,7 +5470,7 @@ enum sctp_disposition sctp_sf_do_9_2_start_shutdown(
 	 * in the Cumulative TSN Ack field the last sequential TSN it
 	 * has received from the peer.
 	 */
-	reply = sctp_make_shutdown(asoc, NULL);
+	reply = sctp_make_shutdown(asoc, arg);
 	if (!reply)
 		goto nomem;
 
@@ -6068,7 +6068,7 @@ enum sctp_disposition sctp_sf_autoclose_timer_expire(
 	disposition = SCTP_DISPOSITION_CONSUME;
 	if (sctp_outq_is_empty(&asoc->outqueue)) {
 		disposition = sctp_sf_do_9_2_start_shutdown(net, ep, asoc, type,
-							    arg, commands);
+							    NULL, commands);
 	}
 
 	return disposition;
-- 
cgit v1.2.3-70-g09d2


From 12dfd78e3a74825e6f0bc8df7ef9f938fbc6bfe3 Mon Sep 17 00:00:00 2001
From: Jere Leppänen <jere.leppanen@nokia.com>
Date: Tue, 21 Apr 2020 22:03:42 +0300
Subject: sctp: Fix SHUTDOWN CTSN Ack in the peer restart case
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When starting shutdown in sctp_sf_do_dupcook_a(), get the value for
SHUTDOWN Cumulative TSN Ack from the new association, which is
reconstructed from the cookie, instead of the old association, which
the peer doesn't have anymore.

Otherwise the SHUTDOWN is either ignored or replied to with an ABORT
by the peer because CTSN Ack doesn't match the peer's Initial TSN.

Fixes: bdf6fa52f01b ("sctp: handle association restarts when the socket is closed.")
Signed-off-by: Jere Leppänen <jere.leppanen@nokia.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_make_chunk.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 09050c1d5517..f7cb0b7faec2 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -858,7 +858,11 @@ struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc,
 	struct sctp_chunk *retval;
 	__u32 ctsn;
 
-	ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map);
+	if (chunk && chunk->asoc)
+		ctsn = sctp_tsnmap_get_ctsn(&chunk->asoc->peer.tsn_map);
+	else
+		ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map);
+
 	shut.cum_tsn_ack = htonl(ctsn);
 
 	retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN, 0,
-- 
cgit v1.2.3-70-g09d2


From 9a19371bf029d784aa37ee623ce175205f43ccfd Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Wed, 22 Apr 2020 18:24:56 +0200
Subject: mptcp: fix data_fin handing in RX path

The data fin flag is set only via a DSS option, but
mptcp_incoming_options() copies it unconditionally from the
provided RX options.

Since we do not clear all the mptcp sock RX options in a
socket free/alloc cycle, we can end-up with a stray data_fin
value while parsing e.g. MPC packets.

That would lead to mapping data corruption and will trigger
a few WARN_ON() in the RX path.

Instead of adding a costly memset(), fetch the data_fin flag
only for DSS packets - when we always explicitly initialize
such bit at option parsing time.

Fixes: 648ef4b88673 ("mptcp: Implement MPTCP receive path")
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/options.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index faf57585b892..4a7c467b99db 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -876,12 +876,11 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
 			mpext->data_seq = mp_opt->data_seq;
 			mpext->subflow_seq = mp_opt->subflow_seq;
 			mpext->dsn64 = mp_opt->dsn64;
+			mpext->data_fin = mp_opt->data_fin;
 		}
 		mpext->data_len = mp_opt->data_len;
 		mpext->use_map = 1;
 	}
-
-	mpext->data_fin = mp_opt->data_fin;
 }
 
 void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
-- 
cgit v1.2.3-70-g09d2


From c0259664c6879e1045d6d6703f37501690f6760f Mon Sep 17 00:00:00 2001
From: Salvatore Bonaccorso <carnil@debian.org>
Date: Wed, 22 Apr 2020 21:07:53 +0200
Subject: netlabel: Kconfig: Update reference for NetLabel Tools project

The NetLabel Tools project has moved from http://netlabel.sf.net to a
GitHub project. Update to directly refer to the new home for the tools.

Signed-off-by: Salvatore Bonaccorso <carnil@debian.org>
Acked-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlabel/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netlabel/Kconfig b/net/netlabel/Kconfig
index 64280a1d3906..07b03c306f28 100644
--- a/net/netlabel/Kconfig
+++ b/net/netlabel/Kconfig
@@ -14,6 +14,6 @@ config NETLABEL
 	  Documentation/netlabel as well as the NetLabel SourceForge project
 	  for configuration tools and additional documentation.
 
-	   * http://netlabel.sf.net
+	   * https://github.com/netlabel/netlabel_tools
 
 	  If you are unsure, say N.
-- 
cgit v1.2.3-70-g09d2


From 7c74b0bec918c1e0ca0b4208038c156eacf8f13f Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Wed, 22 Apr 2020 15:40:20 -0600
Subject: ipv4: Update fib_select_default to handle nexthop objects

A user reported [0] hitting the WARN_ON in fib_info_nh:

    [ 8633.839816] ------------[ cut here ]------------
    [ 8633.839819] WARNING: CPU: 0 PID: 1719 at include/net/nexthop.h:251 fib_select_path+0x303/0x381
    ...
    [ 8633.839846] RIP: 0010:fib_select_path+0x303/0x381
    ...
    [ 8633.839848] RSP: 0018:ffffb04d407f7d00 EFLAGS: 00010286
    [ 8633.839850] RAX: 0000000000000000 RBX: ffff9460b9897ee8 RCX: 00000000000000fe
    [ 8633.839851] RDX: 0000000000000000 RSI: 00000000ffffffff RDI: 0000000000000000
    [ 8633.839852] RBP: ffff946076049850 R08: 0000000059263a83 R09: ffff9460840e4000
    [ 8633.839853] R10: 0000000000000014 R11: 0000000000000000 R12: ffffb04d407f7dc0
    [ 8633.839854] R13: ffffffffa4ce3240 R14: 0000000000000000 R15: ffff9460b7681f60
    [ 8633.839857] FS:  00007fcac2e02700(0000) GS:ffff9460bdc00000(0000) knlGS:0000000000000000
    [ 8633.839858] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    [ 8633.839859] CR2: 00007f27beb77e28 CR3: 0000000077734000 CR4: 00000000000006f0
    [ 8633.839867] Call Trace:
    [ 8633.839871]  ip_route_output_key_hash_rcu+0x421/0x890
    [ 8633.839873]  ip_route_output_key_hash+0x5e/0x80
    [ 8633.839876]  ip_route_output_flow+0x1a/0x50
    [ 8633.839878]  __ip4_datagram_connect+0x154/0x310
    [ 8633.839880]  ip4_datagram_connect+0x28/0x40
    [ 8633.839882]  __sys_connect+0xd6/0x100
    ...

The WARN_ON is triggered in fib_select_default which is invoked when
there are multiple default routes. Update the function to use
fib_info_nhc and convert the nexthop checks to use fib_nh_common.

Add test case that covers the affected code path.

[0] https://github.com/FRRouting/frr/issues/6089

Fixes: 493ced1ac47c ("ipv4: Allow routes to use nexthop objects")
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_semantics.c                    |  6 +++---
 tools/testing/selftests/net/fib_nexthops.sh | 23 +++++++++++++++++++++++
 2 files changed, 26 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 6ed8c9317179..55ca2e521828 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -2014,7 +2014,7 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
 
 	hlist_for_each_entry_rcu(fa, fa_head, fa_list) {
 		struct fib_info *next_fi = fa->fa_info;
-		struct fib_nh *nh;
+		struct fib_nh_common *nhc;
 
 		if (fa->fa_slen != slen)
 			continue;
@@ -2037,8 +2037,8 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
 		    fa->fa_type != RTN_UNICAST)
 			continue;
 
-		nh = fib_info_nh(next_fi, 0);
-		if (!nh->fib_nh_gw4 || nh->fib_nh_scope != RT_SCOPE_LINK)
+		nhc = fib_info_nhc(next_fi, 0);
+		if (!nhc->nhc_gw_family || nhc->nhc_scope != RT_SCOPE_LINK)
 			continue;
 
 		fib_alias_accessed(fa);
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 796670ebc65b..6560ed796ac4 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -749,6 +749,29 @@ ipv4_fcnal_runtime()
 	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
 	log_test $? 0 "Ping - multipath"
 
+	run_cmd "$IP ro delete 172.16.101.1/32 nhid 122"
+
+	#
+	# multiple default routes
+	# - tests fib_select_default
+	run_cmd "$IP nexthop add id 501 via 172.16.1.2 dev veth1"
+	run_cmd "$IP ro add default nhid 501"
+	run_cmd "$IP ro add default via 172.16.1.3 dev veth1 metric 20"
+	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	log_test $? 0 "Ping - multiple default routes, nh first"
+
+	# flip the order
+	run_cmd "$IP ro del default nhid 501"
+	run_cmd "$IP ro del default via 172.16.1.3 dev veth1 metric 20"
+	run_cmd "$IP ro add default via 172.16.1.2 dev veth1 metric 20"
+	run_cmd "$IP nexthop replace id 501 via 172.16.1.3 dev veth1"
+	run_cmd "$IP ro add default nhid 501 metric 20"
+	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	log_test $? 0 "Ping - multiple default routes, nh second"
+
+	run_cmd "$IP nexthop delete nhid 501"
+	run_cmd "$IP ro del default"
+
 	#
 	# IPv4 with blackhole nexthops
 	#
-- 
cgit v1.2.3-70-g09d2


From b4e0f9a926ec557cc0b91216957afd1b711bd45f Mon Sep 17 00:00:00 2001
From: Bo YU <tsu.yubo@gmail.com>
Date: Thu, 23 Apr 2020 10:10:03 +0800
Subject: mptcp/pm_netlink.c : add check for nla_put_in/6_addr

Normal there should be checked for nla_put_in6_addr like other
usage in net.

Detected by CoverityScan, CID# 1461639

Fixes: 01cacb00b35c ("mptcp: add netlink-based PM")
Signed-off-by: Bo YU <tsu.yubo@gmail.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/pm_netlink.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 86d61ab34c7c..b78edf237ba0 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -599,12 +599,14 @@ static int mptcp_nl_fill_addr(struct sk_buff *skb,
 	    nla_put_s32(skb, MPTCP_PM_ADDR_ATTR_IF_IDX, entry->ifindex))
 		goto nla_put_failure;
 
-	if (addr->family == AF_INET)
-		nla_put_in_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR4,
-				addr->addr.s_addr);
+	if (addr->family == AF_INET &&
+	    nla_put_in_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR4,
+			    addr->addr.s_addr))
+		goto nla_put_failure;
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
-	else if (addr->family == AF_INET6)
-		nla_put_in6_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR6, &addr->addr6);
+	else if (addr->family == AF_INET6 &&
+		 nla_put_in6_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR6, &addr->addr6))
+		goto nla_put_failure;
 #endif
 	nla_nest_end(skb, attr);
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From f35d12971b4d814cdb2f659d76b42f0c545270b6 Mon Sep 17 00:00:00 2001
From: Xiyu Yang <xiyuyang19@fudan.edu.cn>
Date: Thu, 23 Apr 2020 13:13:03 +0800
Subject: net/x25: Fix x25_neigh refcnt leak when receiving frame

x25_lapb_receive_frame() invokes x25_get_neigh(), which returns a
reference of the specified x25_neigh object to "nb" with increased
refcnt.

When x25_lapb_receive_frame() returns, local variable "nb" becomes
invalid, so the refcount should be decreased to keep refcount balanced.

The reference counting issue happens in one path of
x25_lapb_receive_frame(). When pskb_may_pull() returns false, the
function forgets to decrease the refcnt increased by x25_get_neigh(),
causing a refcnt leak.

Fix this issue by calling x25_neigh_put() when pskb_may_pull() returns
false.

Fixes: cb101ed2c3c7 ("x25: Handle undersized/fragmented skbs")
Signed-off-by: Xiyu Yang <xiyuyang19@fudan.edu.cn>
Signed-off-by: Xin Tan <tanxin.ctf@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/x25_dev.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index 00e782335cb0..25bf72ee6cad 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -115,8 +115,10 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev,
 		goto drop;
 	}
 
-	if (!pskb_may_pull(skb, 1))
+	if (!pskb_may_pull(skb, 1)) {
+		x25_neigh_put(nb);
 		return 0;
+	}
 
 	switch (skb->data[0]) {
 
-- 
cgit v1.2.3-70-g09d2


From 6cb5f3ea4654faf8c28b901266e960b1a4787b26 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 23 Apr 2020 11:13:49 +0200
Subject: mac80211: populate debugfs only after cfg80211 init

When fixing the initialization race, we neglected to account for
the fact that debugfs is initialized in wiphy_register(), and
some debugfs things went missing (or rather were rerooted to the
global debugfs root).

Fix this by adding debugfs entries only after wiphy_register().
This requires some changes in the rate control code since it
currently adds debugfs at alloc time, which can no longer be
done after the reordering.

Reported-by: Jouni Malinen <j@w1.fi>
Reported-by: kernel test robot <rong.a.chen@intel.com>
Reported-by: Hauke Mehrtens <hauke@hauke-m.de>
Reported-by: Felix Fietkau <nbd@nbd.name>
Cc: stable@vger.kernel.org
Fixes: 52e04b4ce5d0 ("mac80211: fix race in ieee80211_register_hw()")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Acked-by: Sumit Garg <sumit.garg@linaro.org>
Link: https://lore.kernel.org/r/20200423111344.0e00d3346f12.Iadc76a03a55093d94391fc672e996a458702875d@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/intel/iwlegacy/3945-rs.c |  2 +-
 drivers/net/wireless/intel/iwlegacy/4965-rs.c |  2 +-
 drivers/net/wireless/intel/iwlwifi/dvm/rs.c   |  2 +-
 drivers/net/wireless/intel/iwlwifi/mvm/rs.c   |  2 +-
 drivers/net/wireless/realtek/rtlwifi/rc.c     |  2 +-
 include/net/mac80211.h                        |  4 +++-
 net/mac80211/main.c                           |  5 +++--
 net/mac80211/rate.c                           | 15 ++++-----------
 net/mac80211/rate.h                           | 23 +++++++++++++++++++++++
 net/mac80211/rc80211_minstrel_ht.c            | 19 +++++++++++++------
 10 files changed, 51 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/intel/iwlegacy/3945-rs.c b/drivers/net/wireless/intel/iwlegacy/3945-rs.c
index 6209f85a71dd..0af9e997c9f6 100644
--- a/drivers/net/wireless/intel/iwlegacy/3945-rs.c
+++ b/drivers/net/wireless/intel/iwlegacy/3945-rs.c
@@ -374,7 +374,7 @@ out:
 }
 
 static void *
-il3945_rs_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
+il3945_rs_alloc(struct ieee80211_hw *hw)
 {
 	return hw->priv;
 }
diff --git a/drivers/net/wireless/intel/iwlegacy/4965-rs.c b/drivers/net/wireless/intel/iwlegacy/4965-rs.c
index 7c6e2c863497..0a02d8aca320 100644
--- a/drivers/net/wireless/intel/iwlegacy/4965-rs.c
+++ b/drivers/net/wireless/intel/iwlegacy/4965-rs.c
@@ -2474,7 +2474,7 @@ il4965_rs_fill_link_cmd(struct il_priv *il, struct il_lq_sta *lq_sta,
 }
 
 static void *
-il4965_rs_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
+il4965_rs_alloc(struct ieee80211_hw *hw)
 {
 	return hw->priv;
 }
diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/rs.c b/drivers/net/wireless/intel/iwlwifi/dvm/rs.c
index 226165db7dfd..dac809df7f1d 100644
--- a/drivers/net/wireless/intel/iwlwifi/dvm/rs.c
+++ b/drivers/net/wireless/intel/iwlwifi/dvm/rs.c
@@ -3019,7 +3019,7 @@ static void rs_fill_link_cmd(struct iwl_priv *priv,
 			cpu_to_le16(priv->lib->bt_params->agg_time_limit);
 }
 
-static void *rs_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
+static void *rs_alloc(struct ieee80211_hw *hw)
 {
 	return hw->priv;
 }
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
index c1aba2bf73cf..00e7fdbaeb7f 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
@@ -3665,7 +3665,7 @@ static void rs_fill_lq_cmd(struct iwl_mvm *mvm,
 			cpu_to_le16(iwl_mvm_coex_agg_time_limit(mvm, sta));
 }
 
-static void *rs_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
+static void *rs_alloc(struct ieee80211_hw *hw)
 {
 	return hw->priv;
 }
diff --git a/drivers/net/wireless/realtek/rtlwifi/rc.c b/drivers/net/wireless/realtek/rtlwifi/rc.c
index 0c7d74902d33..4b5ea0ec9109 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rc.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rc.c
@@ -261,7 +261,7 @@ static void rtl_rate_update(void *ppriv,
 {
 }
 
-static void *rtl_rate_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
+static void *rtl_rate_alloc(struct ieee80211_hw *hw)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	return rtlpriv;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index b6b4de0e4b5e..97fec4d310ac 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -6007,7 +6007,9 @@ enum rate_control_capabilities {
 struct rate_control_ops {
 	unsigned long capa;
 	const char *name;
-	void *(*alloc)(struct ieee80211_hw *hw, struct dentry *debugfsdir);
+	void *(*alloc)(struct ieee80211_hw *hw);
+	void (*add_debugfs)(struct ieee80211_hw *hw, void *priv,
+			    struct dentry *debugfsdir);
 	void (*free)(void *priv);
 
 	void *(*alloc_sta)(void *priv, struct ieee80211_sta *sta, gfp_t gfp);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 0e9ad60fb2b3..6423173bb87e 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1183,8 +1183,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	local->tx_headroom = max_t(unsigned int , local->hw.extra_tx_headroom,
 				   IEEE80211_TX_STATUS_HEADROOM);
 
-	debugfs_hw_add(local);
-
 	/*
 	 * if the driver doesn't specify a max listen interval we
 	 * use 5 which should be a safe default
@@ -1273,6 +1271,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	if (result < 0)
 		goto fail_wiphy_register;
 
+	debugfs_hw_add(local);
+	rate_control_add_debugfs(local);
+
 	rtnl_lock();
 
 	/* add one default STA interface if supported */
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index a1e9fc7878aa..b051f125d3af 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -214,17 +214,16 @@ static ssize_t rcname_read(struct file *file, char __user *userbuf,
 				       ref->ops->name, len);
 }
 
-static const struct file_operations rcname_ops = {
+const struct file_operations rcname_ops = {
 	.read = rcname_read,
 	.open = simple_open,
 	.llseek = default_llseek,
 };
 #endif
 
-static struct rate_control_ref *rate_control_alloc(const char *name,
-					    struct ieee80211_local *local)
+static struct rate_control_ref *
+rate_control_alloc(const char *name, struct ieee80211_local *local)
 {
-	struct dentry *debugfsdir = NULL;
 	struct rate_control_ref *ref;
 
 	ref = kmalloc(sizeof(struct rate_control_ref), GFP_KERNEL);
@@ -234,13 +233,7 @@ static struct rate_control_ref *rate_control_alloc(const char *name,
 	if (!ref->ops)
 		goto free;
 
-#ifdef CONFIG_MAC80211_DEBUGFS
-	debugfsdir = debugfs_create_dir("rc", local->hw.wiphy->debugfsdir);
-	local->debugfs.rcdir = debugfsdir;
-	debugfs_create_file("name", 0400, debugfsdir, ref, &rcname_ops);
-#endif
-
-	ref->priv = ref->ops->alloc(&local->hw, debugfsdir);
+	ref->priv = ref->ops->alloc(&local->hw);
 	if (!ref->priv)
 		goto free;
 	return ref;
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index 5397c6dad056..79b44d3db171 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -60,6 +60,29 @@ static inline void rate_control_add_sta_debugfs(struct sta_info *sta)
 #endif
 }
 
+extern const struct file_operations rcname_ops;
+
+static inline void rate_control_add_debugfs(struct ieee80211_local *local)
+{
+#ifdef CONFIG_MAC80211_DEBUGFS
+	struct dentry *debugfsdir;
+
+	if (!local->rate_ctrl)
+		return;
+
+	if (!local->rate_ctrl->ops->add_debugfs)
+		return;
+
+	debugfsdir = debugfs_create_dir("rc", local->hw.wiphy->debugfsdir);
+	local->debugfs.rcdir = debugfsdir;
+	debugfs_create_file("name", 0400, debugfsdir,
+			    local->rate_ctrl, &rcname_ops);
+
+	local->rate_ctrl->ops->add_debugfs(&local->hw, local->rate_ctrl->priv,
+					   debugfsdir);
+#endif
+}
+
 void ieee80211_check_rate_mask(struct ieee80211_sub_if_data *sdata);
 
 /* Get a reference to the rate control algorithm. If `name' is NULL, get the
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 694a31978a04..5dc3e5bc4e64 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -1635,7 +1635,7 @@ minstrel_ht_init_cck_rates(struct minstrel_priv *mp)
 }
 
 static void *
-minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
+minstrel_ht_alloc(struct ieee80211_hw *hw)
 {
 	struct minstrel_priv *mp;
 
@@ -1673,7 +1673,17 @@ minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
 	mp->update_interval = HZ / 10;
 	mp->new_avg = true;
 
+	minstrel_ht_init_cck_rates(mp);
+
+	return mp;
+}
+
 #ifdef CONFIG_MAC80211_DEBUGFS
+static void minstrel_ht_add_debugfs(struct ieee80211_hw *hw, void *priv,
+				    struct dentry *debugfsdir)
+{
+	struct minstrel_priv *mp = priv;
+
 	mp->fixed_rate_idx = (u32) -1;
 	debugfs_create_u32("fixed_rate_idx", S_IRUGO | S_IWUGO, debugfsdir,
 			   &mp->fixed_rate_idx);
@@ -1681,12 +1691,8 @@ minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
 			   &mp->sample_switch);
 	debugfs_create_bool("new_avg", S_IRUGO | S_IWUSR, debugfsdir,
 			   &mp->new_avg);
-#endif
-
-	minstrel_ht_init_cck_rates(mp);
-
-	return mp;
 }
+#endif
 
 static void
 minstrel_ht_free(void *priv)
@@ -1725,6 +1731,7 @@ static const struct rate_control_ops mac80211_minstrel_ht = {
 	.alloc = minstrel_ht_alloc,
 	.free = minstrel_ht_free,
 #ifdef CONFIG_MAC80211_DEBUGFS
+	.add_debugfs = minstrel_ht_add_debugfs,
 	.add_sta_debugfs = minstrel_ht_add_sta_debugfs,
 #endif
 	.get_expected_throughput = minstrel_ht_get_expected_throughput,
-- 
cgit v1.2.3-70-g09d2


From 8ca47eb9f9e4e10e7e7fa695731a88941732c38d Mon Sep 17 00:00:00 2001
From: Madhuparna Bhowmik <madhuparnabhowmik10@gmail.com>
Date: Thu, 9 Apr 2020 13:59:06 +0530
Subject: mac80211: sta_info: Add lockdep condition for RCU list usage

The function sta_info_get_by_idx() uses RCU list primitive.
It is called with  local->sta_mtx held from mac80211/cfg.c.
Add lockdep expression to avoid any false positive RCU list warnings.

Signed-off-by: Madhuparna Bhowmik <madhuparnabhowmik10@gmail.com>
Link: https://lore.kernel.org/r/20200409082906.27427-1-madhuparnabhowmik10@gmail.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/sta_info.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index f8d5c2515829..cd8487bc6fc2 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -231,7 +231,8 @@ struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata,
 	struct sta_info *sta;
 	int i = 0;
 
-	list_for_each_entry_rcu(sta, &local->sta_list, list) {
+	list_for_each_entry_rcu(sta, &local->sta_list, list,
+				lockdep_is_held(&local->sta_mtx)) {
 		if (sdata != sta->sdata)
 			continue;
 		if (i < idx) {
-- 
cgit v1.2.3-70-g09d2