25 files changed, 3988 insertions, 54 deletions
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 8cf22b3c2563..6f81130605d7 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -3,6 +3,7 @@ socket
 psock_fanout
 psock_snd
 psock_tpacket
+reuseport_addr_any
 reuseport_bpf
 reuseport_bpf_cpu
 reuseport_bpf_numa
@@ -14,4 +15,5 @@ udpgso_bench_rx
 udpgso_bench_tx
 tcp_inq
 tls
+txring_overwrite
 ip_defrag
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 923570a9708a..f8f3e90700c0 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -4,14 +4,16 @@
 CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g
 CFLAGS += -I../../../../usr/include/
 
-TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
+TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \
+	      rtnetlink.sh xfrm_policy.sh
 TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh
 TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
+TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
 TEST_PROGS += test_vxlan_fdb_changelink.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket
-TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
-TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd
+TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
+TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
 TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
 TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index cd3a2f1545b5..5821bdd98d20 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -14,3 +14,17 @@ CONFIG_IPV6_VTI=y
 CONFIG_DUMMY=y
 CONFIG_BRIDGE=y
 CONFIG_VLAN_8021Q=y
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_NAT_IPV6=m
+CONFIG_NF_NAT_IPV4=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP6_NF_NAT=m
+CONFIG_IP_NF_NAT=m
+CONFIG_NF_TABLES=m
+CONFIG_NF_TABLES_IPV6=y
+CONFIG_NF_TABLES_IPV4=y
+CONFIG_NFT_CHAIN_NAT_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 85d253546684..3f248d1f5b91 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -15,6 +15,8 @@ PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
 PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no}
 NETIF_TYPE=${NETIF_TYPE:=veth}
 NETIF_CREATE=${NETIF_CREATE:=yes}
+MCD=${MCD:=smcrouted}
+MC_CLI=${MC_CLI:=smcroutectl}
 
 relative_path="${BASH_SOURCE%/*}"
 if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
@@ -104,7 +106,7 @@ create_netif_veth()
 {
 	local i
 
-	for i in $(eval echo {1..$NUM_NETIFS}); do
+	for ((i = 1; i <= NUM_NETIFS; ++i)); do
 		local j=$((i+1))
 
 		ip link show dev ${NETIFS[p$i]} &> /dev/null
@@ -135,7 +137,7 @@ if [[ "$NETIF_CREATE" = "yes" ]]; then
 	create_netif
 fi
 
-for i in $(eval echo {1..$NUM_NETIFS}); do
+for ((i = 1; i <= NUM_NETIFS; ++i)); do
 	ip link show dev ${NETIFS[p$i]} &> /dev/null
 	if [[ $? -ne 0 ]]; then
 		echo "SKIP: could not find all required interfaces"
@@ -477,11 +479,24 @@ master_name_get()
 	ip -j link show dev $if_name | jq -r '.[]["master"]'
 }
 
+link_stats_get()
+{
+	local if_name=$1; shift
+	local dir=$1; shift
+	local stat=$1; shift
+
+	ip -j -s link show dev $if_name \
+		| jq '.[]["stats64"]["'$dir'"]["'$stat'"]'
+}
+
 link_stats_tx_packets_get()
 {
-       local if_name=$1
+	link_stats_get $1 tx packets
+}
 
-       ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]'
+link_stats_rx_errors_get()
+{
+	link_stats_get $1 rx errors
 }
 
 tc_rule_stats_get()
@@ -783,6 +798,17 @@ multipath_eval()
 	log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
 }
 
+in_ns()
+{
+	local name=$1; shift
+
+	ip netns exec $name bash <<-EOF
+		NUM_NETIFS=0
+		source lib.sh
+		$(for a in "$@"; do printf "%q${IFS:0:1}" "$a"; done)
+	EOF
+}
+
 ##############################################################################
 # Tests
 
@@ -790,10 +816,11 @@ ping_do()
 {
 	local if_name=$1
 	local dip=$2
+	local args=$3
 	local vrf_name
 
 	vrf_name=$(master_name_get $if_name)
-	ip vrf exec $vrf_name $PING $dip -c 10 -i 0.1 -w 2 &> /dev/null
+	ip vrf exec $vrf_name $PING $args $dip -c 10 -i 0.1 -w 2 &> /dev/null
 }
 
 ping_test()
@@ -802,17 +829,18 @@ ping_test()
 
 	ping_do $1 $2
 	check_err $?
-	log_test "ping"
+	log_test "ping$3"
 }
 
 ping6_do()
 {
 	local if_name=$1
 	local dip=$2
+	local args=$3
 	local vrf_name
 
 	vrf_name=$(master_name_get $if_name)
-	ip vrf exec $vrf_name $PING6 $dip -c 10 -i 0.1 -w 2 &> /dev/null
+	ip vrf exec $vrf_name $PING6 $args $dip -c 10 -i 0.1 -w 2 &> /dev/null
 }
 
 ping6_test()
@@ -821,7 +849,7 @@ ping6_test()
 
 	ping6_do $1 $2
 	check_err $?
-	log_test "ping6"
+	log_test "ping6$3"
 }
 
 learning_test()
diff --git a/tools/testing/selftests/net/forwarding/router_multicast.sh b/tools/testing/selftests/net/forwarding/router_multicast.sh
new file mode 100755
index 000000000000..109e6d785169
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_multicast.sh
@@ -0,0 +1,311 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +------------------+
+# | H1 (v$h1)        |
+# | 2001:db8:1::2/64 |
+# | 198.51.100.2/28  |
+# |         $h1 +    |
+# +-------------|----+
+#               |
+# +-------------|-------------------------------+
+# | SW1         |                               |
+# |        $rp1 +                               |
+# | 198.51.100.1/28                             |
+# | 2001:db8:1::1/64                            |
+# |                                             |
+# | 2001:db8:2::1/64           2001:db8:3::1/64 |
+# | 198.51.100.17/28           198.51.100.33/28 |
+# |         $rp2 +                     $rp3 +   |
+# +--------------|--------------------------|---+
+#                |                          |
+#                |                          |
+# +--------------|---+       +--------------|---+
+# | H2 (v$h2)    |   |       | H3 (v$h3)    |   |
+# |          $h2 +   |       |          $h3 +   |
+# | 198.51.100.18/28 |       | 198.51.100.34/28 |
+# | 2001:db8:2::2/64 |       | 2001:db8:3::2/64 |
+# +------------------+       +------------------+
+#
+
+ALL_TESTS="mcast_v4 mcast_v6"
+NUM_NETIFS=6
+source lib.sh
+source tc_common.sh
+
+require_command $MCD
+require_command $MC_CLI
+table_name=selftests
+
+h1_create()
+{
+	simple_if_init $h1 198.51.100.2/28 2001:db8:1::2/64
+
+	ip route add 198.51.100.16/28 vrf v$h1 nexthop via 198.51.100.1
+	ip route add 198.51.100.32/28 vrf v$h1 nexthop via 198.51.100.1
+
+	ip route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::1
+	ip route add 2001:db8:3::/64 vrf v$h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+	ip route del 2001:db8:3::/64 vrf v$h1
+	ip route del 2001:db8:2::/64 vrf v$h1
+
+	ip route del 198.51.100.32/28 vrf v$h1
+	ip route del 198.51.100.16/28 vrf v$h1
+
+	simple_if_fini $h1 198.51.100.2/28 2001:db8:1::2/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 198.51.100.18/28 2001:db8:2::2/64
+
+	ip route add 198.51.100.0/28 vrf v$h2 nexthop via 198.51.100.17
+	ip route add 198.51.100.32/28 vrf v$h2 nexthop via 198.51.100.17
+
+	ip route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1
+	ip route add 2001:db8:3::/64 vrf v$h2 nexthop via 2001:db8:2::1
+
+	tc qdisc add dev $h2 ingress
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 ingress
+
+	ip route del 2001:db8:3::/64 vrf v$h2
+	ip route del 2001:db8:1::/64 vrf v$h2
+
+	ip route del 198.51.100.32/28 vrf v$h2
+	ip route del 198.51.100.0/28 vrf v$h2
+
+	simple_if_fini $h2 198.51.100.18/28 2001:db8:2::2/64
+}
+
+h3_create()
+{
+	simple_if_init $h3 198.51.100.34/28 2001:db8:3::2/64
+
+	ip route add 198.51.100.0/28 vrf v$h3 nexthop via 198.51.100.33
+	ip route add 198.51.100.16/28 vrf v$h3 nexthop via 198.51.100.33
+
+	ip route add 2001:db8:1::/64 vrf v$h3 nexthop via 2001:db8:3::1
+	ip route add 2001:db8:2::/64 vrf v$h3 nexthop via 2001:db8:3::1
+
+	tc qdisc add dev $h3 ingress
+}
+
+h3_destroy()
+{
+	tc qdisc del dev $h3 ingress
+
+	ip route del 2001:db8:2::/64 vrf v$h3
+	ip route del 2001:db8:1::/64 vrf v$h3
+
+	ip route del 198.51.100.16/28 vrf v$h3
+	ip route del 198.51.100.0/28 vrf v$h3
+
+	simple_if_fini $h3 198.51.100.34/28 2001:db8:3::2/64
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+	ip link set dev $rp3 up
+
+	ip address add 198.51.100.1/28 dev $rp1
+	ip address add 198.51.100.17/28 dev $rp2
+	ip address add 198.51.100.33/28 dev $rp3
+
+	ip address add 2001:db8:1::1/64 dev $rp1
+	ip address add 2001:db8:2::1/64 dev $rp2
+	ip address add 2001:db8:3::1/64 dev $rp3
+}
+
+router_destroy()
+{
+	ip address del 2001:db8:3::1/64 dev $rp3
+	ip address del 2001:db8:2::1/64 dev $rp2
+	ip address del 2001:db8:1::1/64 dev $rp1
+
+	ip address del 198.51.100.33/28 dev $rp3
+	ip address del 198.51.100.17/28 dev $rp2
+	ip address del 198.51.100.1/28 dev $rp1
+
+	ip link set dev $rp3 down
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+start_mcd()
+{
+	SMCROUTEDIR="$(mktemp -d)"
+
+	for ((i = 1; i <= $NUM_NETIFS; ++i)); do
+		echo "phyint ${NETIFS[p$i]} enable" >> \
+			$SMCROUTEDIR/$table_name.conf
+	done
+
+	$MCD -N -I $table_name -f $SMCROUTEDIR/$table_name.conf \
+		-P $SMCROUTEDIR/$table_name.pid
+}
+
+kill_mcd()
+{
+	pkill $MCD
+	rm -rf $SMCROUTEDIR
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	start_mcd
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	h3_create
+
+	router_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router_destroy
+
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+
+	kill_mcd
+}
+
+create_mcast_sg()
+{
+	local if_name=$1; shift
+	local s_addr=$1; shift
+	local mcast=$1; shift
+	local dest_ifs=${@}
+
+	$MC_CLI -I $table_name add $if_name $s_addr $mcast $dest_ifs
+}
+
+delete_mcast_sg()
+{
+	local if_name=$1; shift
+	local s_addr=$1; shift
+	local mcast=$1; shift
+	local dest_ifs=${@}
+
+        $MC_CLI -I $table_name remove $if_name $s_addr $mcast $dest_ifs
+}
+
+mcast_v4()
+{
+	# Add two interfaces to an MC group, send a packet to the MC group and
+	# verify packets are received on both. Then delete the route and verify
+	# packets are no longer received.
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 122 flower \
+		dst_ip 225.1.2.3 action drop
+	tc filter add dev $h3 ingress protocol ip pref 1 handle 133 flower \
+		dst_ip 225.1.2.3 action drop
+
+	create_mcast_sg $rp1 198.51.100.2 225.1.2.3 $rp2 $rp3
+
+	# Send frames with the corresponding L2 destination address.
+	$MZ $h1 -c 5 -p 128 -t udp -a 00:11:22:33:44:55 -b 01:00:5e:01:02:03 \
+		-A 198.51.100.2 -B 225.1.2.3 -q
+
+	tc_check_packets "dev $h2 ingress" 122 5
+	check_err $? "Multicast not received on first host"
+	tc_check_packets "dev $h3 ingress" 133 5
+	check_err $? "Multicast not received on second host"
+
+	delete_mcast_sg $rp1 198.51.100.2 225.1.2.3 $rp2 $rp3
+
+	$MZ $h1 -c 5 -p 128 -t udp -a 00:11:22:33:44:55 -b 01:00:5e:01:02:03 \
+		-A 198.51.100.2 -B 225.1.2.3 -q
+
+	tc_check_packets "dev $h2 ingress" 122 5
+	check_err $? "Multicast received on host although deleted"
+	tc_check_packets "dev $h3 ingress" 133 5
+	check_err $? "Multicast received on second host although deleted"
+
+	tc filter del dev $h3 ingress protocol ip pref 1 handle 133 flower
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 122 flower
+
+	log_test "mcast IPv4"
+}
+
+mcast_v6()
+{
+	# Add two interfaces to an MC group, send a packet to the MC group and
+	# verify packets are received on both. Then delete the route and verify
+	# packets are no longer received.
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ipv6 pref 1 handle 122 flower \
+		dst_ip ff0e::3 action drop
+	tc filter add dev $h3 ingress protocol ipv6 pref 1 handle 133 flower \
+		dst_ip ff0e::3 action drop
+
+	create_mcast_sg $rp1 2001:db8:1::2 ff0e::3 $rp2 $rp3
+
+	# Send frames with the corresponding L2 destination address.
+	$MZ $h1 -6 -c 5 -p 128 -t udp -a 00:11:22:33:44:55 \
+		-b 33:33:00:00:00:03 -A 2001:db8:1::2 -B ff0e::3 -q
+
+	tc_check_packets "dev $h2 ingress" 122 5
+	check_err $? "Multicast not received on first host"
+	tc_check_packets "dev $h3 ingress" 133 5
+	check_err $? "Multicast not received on second host"
+
+	delete_mcast_sg $rp1 2001:db8:1::2 ff0e::3 $rp2 $rp3
+
+	$MZ $h1 -6 -c 5 -p 128 -t udp -a 00:11:22:33:44:55 \
+		-b 33:33:00:00:00:03 -A 2001:db8:1::2 -B ff0e::3 -q
+
+	tc_check_packets "dev $h2 ingress" 122 5
+	check_err $? "Multicast received on first host although deleted"
+	tc_check_packets "dev $h3 ingress" 133 5
+	check_err $? "Multicast received on second host although deleted"
+
+	tc filter del dev $h3 ingress protocol ipv6 pref 1 handle 133 flower
+	tc filter del dev $h2 ingress protocol ipv6 pref 1 handle 122 flower
+
+	log_test "mcast IPv6"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_vid_1.sh b/tools/testing/selftests/net/forwarding/router_vid_1.sh
new file mode 100755
index 000000000000..a7306c7ac06d
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_vid_1.sh
@@ -0,0 +1,135 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="ping_ipv4 ping_ipv6"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+	vrf_create "vrf-h1"
+	ip link set dev vrf-h1 up
+
+	ip link set dev $h1 up
+	vlan_create $h1 1 vrf-h1 192.0.2.2/24 2001:db8:1::2/64
+
+	ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+	ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+	ip route del 2001:db8:2::/64 vrf vrf-h1
+	ip route del 198.51.100.0/24 vrf vrf-h1
+
+	vlan_destroy $h1 1
+	ip link set dev $h1 down
+
+	ip link set dev vrf-h1 down
+	vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+	vrf_create "vrf-h2"
+	ip link set dev vrf-h2 up
+
+	ip link set dev $h2 up
+	vlan_create $h2 1 vrf-h2 198.51.100.2/24 2001:db8:2::2/64
+
+	ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+	ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip route del 2001:db8:1::/64 vrf vrf-h2
+	ip route del 192.0.2.0/24 vrf vrf-h2
+
+	vlan_destroy $h2 1
+	ip link set dev $h2 down
+
+	ip link set dev vrf-h2 down
+	vrf_destroy "vrf-h2"
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link add link $rp1 name $rp1.1 up type vlan id 1
+
+	ip address add 192.0.2.1/24 dev $rp1.1
+	ip address add 2001:db8:1::1/64 dev $rp1.1
+
+	ip link set dev $rp2 up
+	ip link add link $rp2 name $rp2.1 up type vlan id 1
+
+	ip address add 198.51.100.1/24 dev $rp2.1
+	ip address add 2001:db8:2::1/64 dev $rp2.1
+}
+
+router_destroy()
+{
+	ip address del 2001:db8:2::1/64 dev $rp2.1
+	ip address del 198.51.100.1/24 dev $rp2.1
+
+	ip link del dev $rp2.1
+	ip link set dev $rp2 down
+
+	ip address del 2001:db8:1::1/64 dev $rp1.1
+	ip address del 192.0.2.1/24 dev $rp1.1
+
+	ip link del dev $rp1.1
+	ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1.1 198.51.100.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1.1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
new file mode 100755
index 000000000000..56cef3b1c194
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
@@ -0,0 +1,786 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +--------------------+                               +----------------------+
+# | H1 (vrf)           |                               |             H2 (vrf) |
+# |    + $h1           |                               |  + $h2               |
+# |    | 192.0.2.1/28  |                               |  | 192.0.2.2/28      |
+# +----|---------------+                               +--|-------------------+
+#      |                                                  |
+# +----|--------------------------------------------------|-------------------+
+# | SW |                                                  |                   |
+# | +--|--------------------------------------------------|-----------------+ |
+# | |  + $swp1                   BR1 (802.1d)             + $swp2           | |
+# | |                                                                       | |
+# | |  + vx1 (vxlan)                                                        | |
+# | |    local 192.0.2.17                                                   | |
+# | |    remote 192.0.2.34 192.0.2.50                                       | |
+# | |    id 1000 dstport $VXPORT                                            | |
+# | +-----------------------------------------------------------------------+ |
+# |                                                                           |
+# |  192.0.2.32/28 via 192.0.2.18                                             |
+# |  192.0.2.48/28 via 192.0.2.18                                             |
+# |                                                                           |
+# |    + $rp1                                                                 |
+# |    | 192.0.2.17/28                                                        |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|--------------------------------------------------------+
+# |    |                                             VRP2 (vrf) |
+# |    + $rp2                                                   |
+# |      192.0.2.18/28                                          |
+# |                                                             |   (maybe) HW
+# =============================================================================
+# |                                                             |  (likely) SW
+# |    + v1 (veth)                             + v3 (veth)      |
+# |    | 192.0.2.33/28                         | 192.0.2.49/28  |
+# +----|---------------------------------------|----------------+
+#      |                                       |
+# +----|------------------------------+   +----|------------------------------+
+# |    + v2 (veth)        NS1 (netns) |   |    + v4 (veth)        NS2 (netns) |
+# |      192.0.2.34/28                |   |      192.0.2.50/28                |
+# |                                   |   |                                   |
+# |   192.0.2.16/28 via 192.0.2.33    |   |   192.0.2.16/28 via 192.0.2.49    |
+# |   192.0.2.50/32 via 192.0.2.33    |   |   192.0.2.34/32 via 192.0.2.49    |
+# |                                   |   |                                   |
+# | +-------------------------------+ |   | +-------------------------------+ |
+# | |                  BR2 (802.1d) | |   | |                  BR2 (802.1d) | |
+# | |  + vx2 (vxlan)                | |   | |  + vx2 (vxlan)                | |
+# | |    local 192.0.2.34           | |   | |    local 192.0.2.50           | |
+# | |    remote 192.0.2.17          | |   | |    remote 192.0.2.17          | |
+# | |    remote 192.0.2.50          | |   | |    remote 192.0.2.34          | |
+# | |    id 1000 dstport $VXPORT    | |   | |    id 1000 dstport $VXPORT    | |
+# | |                               | |   | |                               | |
+# | |  + w1 (veth)                  | |   | |  + w1 (veth)                  | |
+# | +--|----------------------------+ |   | +--|----------------------------+ |
+# |    |                              |   |    |                              |
+# | +--|----------------------------+ |   | +--|----------------------------+ |
+# | |  |                  VW2 (vrf) | |   | |  |                  VW2 (vrf) | |
+# | |  + w2 (veth)                  | |   | |  + w2 (veth)                  | |
+# | |    192.0.2.3/28               | |   | |    192.0.2.4/28               | |
+# | +-------------------------------+ |   | +-------------------------------+ |
+# +-----------------------------------+   +-----------------------------------+
+
+: ${VXPORT:=4789}
+export VXPORT
+
+: ${ALL_TESTS:="
+	ping_ipv4
+	test_flood
+	test_unicast
+	test_ttl
+	test_tos
+	test_ecn_encap
+	test_ecn_decap
+	reapply_config
+	ping_ipv4
+	test_flood
+	test_unicast
+	test_learning
+    "}
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28
+	tc qdisc add dev $h1 clsact
+}
+
+h1_destroy()
+{
+	tc qdisc del dev $h1 clsact
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/28
+	tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.2/28
+}
+
+rp1_set_addr()
+{
+	ip address add dev $rp1 192.0.2.17/28
+
+	ip route add 192.0.2.32/28 nexthop via 192.0.2.18
+	ip route add 192.0.2.48/28 nexthop via 192.0.2.18
+}
+
+rp1_unset_addr()
+{
+	ip route del 192.0.2.48/28 nexthop via 192.0.2.18
+	ip route del 192.0.2.32/28 nexthop via 192.0.2.18
+
+	ip address del dev $rp1 192.0.2.17/28
+}
+
+switch_create()
+{
+	ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0
+	# Make sure the bridge uses the MAC address of the local port and not
+	# that of the VxLAN's device.
+	ip link set dev br1 address $(mac_get $swp1)
+	ip link set dev br1 up
+
+	ip link set dev $rp1 up
+	rp1_set_addr
+
+	ip link add name vx1 type vxlan id 1000		\
+		local 192.0.2.17 dstport "$VXPORT"	\
+		nolearning noudpcsum tos inherit ttl 100
+	ip link set dev vx1 up
+
+	ip link set dev vx1 master br1
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+
+	bridge fdb append dev vx1 00:00:00:00:00:00 dst 192.0.2.34 self
+	bridge fdb append dev vx1 00:00:00:00:00:00 dst 192.0.2.50 self
+}
+
+switch_destroy()
+{
+	rp1_unset_addr
+	ip link set dev $rp1 down
+
+	bridge fdb del dev vx1 00:00:00:00:00:00 dst 192.0.2.50 self
+	bridge fdb del dev vx1 00:00:00:00:00:00 dst 192.0.2.34 self
+
+	ip link set dev vx1 nomaster
+	ip link set dev vx1 down
+	ip link del dev vx1
+
+	ip link set dev $swp2 down
+	ip link set dev $swp2 nomaster
+
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+
+	ip link set dev br1 down
+	ip link del dev br1
+}
+
+vrp2_create()
+{
+	simple_if_init $rp2 192.0.2.18/28
+	__simple_if_init v1 v$rp2 192.0.2.33/28
+	__simple_if_init v3 v$rp2 192.0.2.49/28
+	tc qdisc add dev v1 clsact
+}
+
+vrp2_destroy()
+{
+	tc qdisc del dev v1 clsact
+	__simple_if_fini v3 192.0.2.49/28
+	__simple_if_fini v1 192.0.2.33/28
+	simple_if_fini $rp2 192.0.2.18/28
+}
+
+ns_init_common()
+{
+	local in_if=$1; shift
+	local in_addr=$1; shift
+	local other_in_addr=$1; shift
+	local nh_addr=$1; shift
+	local host_addr=$1; shift
+
+	ip link set dev $in_if up
+	ip address add dev $in_if $in_addr/28
+	tc qdisc add dev $in_if clsact
+
+	ip link add name br2 type bridge vlan_filtering 0
+	ip link set dev br2 up
+
+	ip link add name w1 type veth peer name w2
+
+	ip link set dev w1 master br2
+	ip link set dev w1 up
+
+	ip link add name vx2 type vxlan id 1000 local $in_addr dstport "$VXPORT"
+	ip link set dev vx2 up
+	bridge fdb append dev vx2 00:00:00:00:00:00 dst 192.0.2.17 self
+	bridge fdb append dev vx2 00:00:00:00:00:00 dst $other_in_addr self
+
+	ip link set dev vx2 master br2
+	tc qdisc add dev vx2 clsact
+
+	simple_if_init w2 $host_addr/28
+
+	ip route add 192.0.2.16/28 nexthop via $nh_addr
+	ip route add $other_in_addr/32 nexthop via $nh_addr
+}
+export -f ns_init_common
+
+ns1_create()
+{
+	ip netns add ns1
+	ip link set dev v2 netns ns1
+	in_ns ns1 \
+	      ns_init_common v2 192.0.2.34 192.0.2.50 192.0.2.33 192.0.2.3
+}
+
+ns1_destroy()
+{
+	ip netns exec ns1 ip link set dev v2 netns 1
+	ip netns del ns1
+}
+
+ns2_create()
+{
+	ip netns add ns2
+	ip link set dev v4 netns ns2
+	in_ns ns2 \
+	      ns_init_common v4 192.0.2.50 192.0.2.34 192.0.2.49 192.0.2.4
+}
+
+ns2_destroy()
+{
+	ip netns exec ns2 ip link set dev v4 netns 1
+	ip netns del ns2
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp1=${NETIFS[p5]}
+	rp2=${NETIFS[p6]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+	switch_create
+
+	ip link add name v1 type veth peer name v2
+	ip link add name v3 type veth peer name v4
+	vrp2_create
+	ns1_create
+	ns2_create
+
+	r1_mac=$(in_ns ns1 mac_get w2)
+	r2_mac=$(in_ns ns2 mac_get w2)
+	h2_mac=$(mac_get $h2)
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ns2_destroy
+	ns1_destroy
+	vrp2_destroy
+	ip link del dev v3
+	ip link del dev v1
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+# For the first round of tests, vx1 is the first device to get attached to the
+# bridge, and that at the point that the local IP is already configured. Try the
+# other scenario of attaching the device to an already-offloaded bridge, and
+# only then attach the local IP.
+reapply_config()
+{
+	echo "Reapplying configuration"
+
+	bridge fdb del dev vx1 00:00:00:00:00:00 dst 192.0.2.50 self
+	bridge fdb del dev vx1 00:00:00:00:00:00 dst 192.0.2.34 self
+	rp1_unset_addr
+	ip link set dev vx1 nomaster
+	sleep 5
+
+	ip link set dev vx1 master br1
+	bridge fdb append dev vx1 00:00:00:00:00:00 dst 192.0.2.34 self
+	bridge fdb append dev vx1 00:00:00:00:00:00 dst 192.0.2.50 self
+	sleep 1
+	rp1_set_addr
+	sleep 5
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.2 ": local->local"
+	ping_test $h1 192.0.2.3 ": local->remote 1"
+	ping_test $h1 192.0.2.4 ": local->remote 2"
+}
+
+maybe_in_ns()
+{
+	echo ${1:+in_ns} $1
+}
+
+__flood_counter_add_del()
+{
+	local add_del=$1; shift
+	local dev=$1; shift
+	local ns=$1; shift
+
+	# Putting the ICMP capture both to HW and to SW will end up
+	# double-counting the packets that are trapped to slow path, such as for
+	# the unicast test. Adding either skip_hw or skip_sw fixes this problem,
+	# but with skip_hw, the flooded packets are not counted at all, because
+	# those are dropped due to MAC address mismatch; and skip_sw is a no-go
+	# for veth-based topologies.
+	#
+	# So try to install with skip_sw and fall back to skip_sw if that fails.
+
+	$(maybe_in_ns $ns) __icmp_capture_add_del          \
+			   $add_del 100 "" $dev skip_sw 2>/dev/null || \
+	$(maybe_in_ns $ns) __icmp_capture_add_del          \
+			   $add_del 100 "" $dev skip_hw
+}
+
+flood_counter_install()
+{
+	__flood_counter_add_del add "$@"
+}
+
+flood_counter_uninstall()
+{
+	__flood_counter_add_del del "$@"
+}
+
+flood_fetch_stat()
+{
+	local dev=$1; shift
+	local ns=$1; shift
+
+	$(maybe_in_ns $ns) tc_rule_stats_get $dev 100 ingress
+}
+
+flood_fetch_stats()
+{
+	local counters=("${@}")
+	local counter
+
+	for counter in "${counters[@]}"; do
+		flood_fetch_stat $counter
+	done
+}
+
+vxlan_flood_test()
+{
+	local mac=$1; shift
+	local dst=$1; shift
+	local -a expects=("${@}")
+
+	local -a counters=($h2 "vx2 ns1" "vx2 ns2")
+	local counter
+	local key
+
+	for counter in "${counters[@]}"; do
+		flood_counter_install $counter
+	done
+
+	local -a t0s=($(flood_fetch_stats "${counters[@]}"))
+	$MZ $h1 -c 10 -d 100msec -p 64 -b $mac -B $dst -t icmp -q
+	sleep 1
+	local -a t1s=($(flood_fetch_stats "${counters[@]}"))
+
+	for key in ${!t0s[@]}; do
+		local delta=$((t1s[$key] - t0s[$key]))
+		local expect=${expects[$key]}
+
+		((expect == delta))
+		check_err $? "${counters[$key]}: Expected to capture $expect packets, got $delta."
+	done
+
+	for counter in "${counters[@]}"; do
+		flood_counter_uninstall $counter
+	done
+}
+
+__test_flood()
+{
+	local mac=$1; shift
+	local dst=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	vxlan_flood_test $mac $dst 10 10 10
+
+	log_test "VXLAN: $what"
+}
+
+test_flood()
+{
+	__test_flood de:ad:be:ef:13:37 192.0.2.100 "flood"
+}
+
+vxlan_fdb_add_del()
+{
+	local add_del=$1; shift
+	local mac=$1; shift
+	local dev=$1; shift
+	local dst=$1; shift
+
+	bridge fdb $add_del dev $dev $mac self static permanent \
+		${dst:+dst} $dst 2>/dev/null
+	bridge fdb $add_del dev $dev $mac master static 2>/dev/null
+}
+
+__test_unicast()
+{
+	local mac=$1; shift
+	local dst=$1; shift
+	local hit_idx=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	local -a expects=(0 0 0)
+	expects[$hit_idx]=10
+
+	vxlan_flood_test $mac $dst "${expects[@]}"
+
+	log_test "VXLAN: $what"
+}
+
+test_unicast()
+{
+	local -a targets=("$h2_mac $h2"
+			  "$r1_mac vx1 192.0.2.34"
+			  "$r2_mac vx1 192.0.2.50")
+	local target
+
+	for target in "${targets[@]}"; do
+		vxlan_fdb_add_del add $target
+	done
+
+	__test_unicast $h2_mac 192.0.2.2 0 "local MAC unicast"
+	__test_unicast $r1_mac 192.0.2.3 1 "remote MAC 1 unicast"
+	__test_unicast $r2_mac 192.0.2.4 2 "remote MAC 2 unicast"
+
+	for target in "${targets[@]}"; do
+		vxlan_fdb_add_del del $target
+	done
+}
+
+vxlan_ping_test()
+{
+	local ping_dev=$1; shift
+	local ping_dip=$1; shift
+	local ping_args=$1; shift
+	local capture_dev=$1; shift
+	local capture_dir=$1; shift
+	local capture_pref=$1; shift
+	local expect=$1; shift
+
+	local t0=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir)
+	ping_do $ping_dev $ping_dip "$ping_args"
+	local t1=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir)
+	local delta=$((t1 - t0))
+
+	# Tolerate a couple stray extra packets.
+	((expect <= delta && delta <= expect + 2))
+	check_err $? "$capture_dev: Expected to capture $expect packets, got $delta."
+}
+
+test_ttl()
+{
+	RET=0
+
+	tc filter add dev v1 egress pref 77 prot ip \
+		flower ip_ttl 99 action pass
+	vxlan_ping_test $h1 192.0.2.3 "" v1 egress 77 10
+	tc filter del dev v1 egress pref 77 prot ip
+
+	log_test "VXLAN: envelope TTL"
+}
+
+test_tos()
+{
+	RET=0
+
+	tc filter add dev v1 egress pref 77 prot ip \
+		flower ip_tos 0x40 action pass
+	vxlan_ping_test $h1 192.0.2.3 "-Q 0x40" v1 egress 77 10
+	vxlan_ping_test $h1 192.0.2.3 "-Q 0x30" v1 egress 77 0
+	tc filter del dev v1 egress pref 77 prot ip
+
+	log_test "VXLAN: envelope TOS inheritance"
+}
+
+__test_ecn_encap()
+{
+	local q=$1; shift
+	local tos=$1; shift
+
+	RET=0
+
+	tc filter add dev v1 egress pref 77 prot ip \
+		flower ip_tos $tos action pass
+	sleep 1
+	vxlan_ping_test $h1 192.0.2.3 "-Q $q" v1 egress 77 10
+	tc filter del dev v1 egress pref 77 prot ip
+
+	log_test "VXLAN: ECN encap: $q->$tos"
+}
+
+test_ecn_encap()
+{
+	# In accordance with INET_ECN_encapsulate()
+	__test_ecn_encap 0x00 0x00
+	__test_ecn_encap 0x01 0x01
+	__test_ecn_encap 0x02 0x02
+	__test_ecn_encap 0x03 0x02
+}
+
+vxlan_encapped_ping_do()
+{
+	local count=$1; shift
+	local dev=$1; shift
+	local next_hop_mac=$1; shift
+	local dest_ip=$1; shift
+	local dest_mac=$1; shift
+	local inner_tos=$1; shift
+	local outer_tos=$1; shift
+
+	$MZ $dev -c $count -d 100msec -q \
+		-b $next_hop_mac -B $dest_ip \
+		-t udp tos=$outer_tos,sp=23456,dp=$VXPORT,p=$(:
+		    )"08:"$(                      : VXLAN flags
+		    )"00:00:00:"$(                : VXLAN reserved
+		    )"00:03:e8:"$(                : VXLAN VNI
+		    )"00:"$(                      : VXLAN reserved
+		    )"$dest_mac:"$(               : ETH daddr
+		    )"$(mac_get w2):"$(           : ETH saddr
+		    )"08:00:"$(                   : ETH type
+		    )"45:"$(                      : IP version + IHL
+		    )"$inner_tos:"$(              : IP TOS
+		    )"00:54:"$(                   : IP total length
+		    )"99:83:"$(                   : IP identification
+		    )"40:00:"$(                   : IP flags + frag off
+		    )"40:"$(                      : IP TTL
+		    )"01:"$(                      : IP proto
+		    )"00:00:"$(                   : IP header csum
+		    )"c0:00:02:03:"$(             : IP saddr: 192.0.2.3
+		    )"c0:00:02:01:"$(             : IP daddr: 192.0.2.1
+		    )"08:"$(                      : ICMP type
+		    )"00:"$(                      : ICMP code
+		    )"8b:f2:"$(                   : ICMP csum
+		    )"1f:6a:"$(                   : ICMP request identifier
+		    )"00:01:"$(                   : ICMP request sequence number
+		    )"4f:ff:c5:5b:00:00:00:00:"$( : ICMP payload
+		    )"6d:74:0b:00:00:00:00:00:"$( :
+		    )"10:11:12:13:14:15:16:17:"$( :
+		    )"18:19:1a:1b:1c:1d:1e:1f:"$( :
+		    )"20:21:22:23:24:25:26:27:"$( :
+		    )"28:29:2a:2b:2c:2d:2e:2f:"$( :
+		    )"30:31:32:33:34:35:36:37"
+}
+export -f vxlan_encapped_ping_do
+
+vxlan_encapped_ping_test()
+{
+	local ping_dev=$1; shift
+	local nh_dev=$1; shift
+	local ping_dip=$1; shift
+	local inner_tos=$1; shift
+	local outer_tos=$1; shift
+	local stat_get=$1; shift
+	local expect=$1; shift
+
+	local t0=$($stat_get)
+
+	in_ns ns1 \
+		vxlan_encapped_ping_do 10 $ping_dev $(mac_get $nh_dev) \
+			$ping_dip $(mac_get $h1) \
+			$inner_tos $outer_tos
+
+	local t1=$($stat_get)
+	local delta=$((t1 - t0))
+
+	# Tolerate a couple stray extra packets.
+	((expect <= delta && delta <= expect + 2))
+	check_err $? "Expected to capture $expect packets, got $delta."
+}
+export -f vxlan_encapped_ping_test
+
+__test_ecn_decap()
+{
+	local orig_inner_tos=$1; shift
+	local orig_outer_tos=$1; shift
+	local decapped_tos=$1; shift
+
+	RET=0
+
+	tc filter add dev $h1 ingress pref 77 prot ip \
+		flower ip_tos $decapped_tos action pass
+	sleep 1
+	vxlan_encapped_ping_test v2 v1 192.0.2.17 \
+				 $orig_inner_tos $orig_outer_tos \
+				 "tc_rule_stats_get $h1 77 ingress" 10
+	tc filter del dev $h1 ingress pref 77
+
+	log_test "VXLAN: ECN decap: $orig_outer_tos/$orig_inner_tos->$decapped_tos"
+}
+
+test_ecn_decap_error()
+{
+	local orig_inner_tos=00
+	local orig_outer_tos=03
+
+	RET=0
+
+	vxlan_encapped_ping_test v2 v1 192.0.2.17 \
+				 $orig_inner_tos $orig_outer_tos \
+				 "link_stats_rx_errors_get vx1" 10
+
+	log_test "VXLAN: ECN decap: $orig_outer_tos/$orig_inner_tos->error"
+}
+
+test_ecn_decap()
+{
+	# In accordance with INET_ECN_decapsulate()
+	__test_ecn_decap 00 00 0x00
+	__test_ecn_decap 01 01 0x01
+	__test_ecn_decap 02 01 0x02
+	__test_ecn_decap 01 03 0x03
+	__test_ecn_decap 02 03 0x03
+	test_ecn_decap_error
+}
+
+test_learning()
+{
+	local mac=de:ad:be:ef:13:37
+	local dst=192.0.2.100
+
+	# Enable learning on the VxLAN device and set ageing time to 10 seconds
+	ip link set dev br1 type bridge ageing_time 1000
+	ip link set dev vx1 type vxlan ageing 10
+	ip link set dev vx1 type vxlan learning
+	reapply_config
+
+	# Check that flooding works
+	RET=0
+
+	vxlan_flood_test $mac $dst 10 10 10
+
+	log_test "VXLAN: flood before learning"
+
+	# Send a packet with source mac set to $mac from host w2 and check that
+	# a corresponding entry is created in VxLAN device vx1
+	RET=0
+
+	in_ns ns1 $MZ w2 -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff -B $dst \
+		-t icmp -q
+	sleep 1
+
+	bridge fdb show brport vx1 | grep $mac | grep -q self
+	check_err $?
+	bridge fdb show brport vx1 | grep $mac | grep -q -v self
+	check_err $?
+
+	log_test "VXLAN: show learned FDB entry"
+
+	# Repeat first test and check that packets only reach host w2 in ns1
+	RET=0
+
+	vxlan_flood_test $mac $dst 0 10 0
+
+	log_test "VXLAN: learned FDB entry"
+
+	# Delete the learned FDB entry from the VxLAN and bridge devices and
+	# check that packets are flooded
+	RET=0
+
+	bridge fdb del dev vx1 $mac master self
+	sleep 1
+
+	vxlan_flood_test $mac $dst 10 10 10
+
+	log_test "VXLAN: deletion of learned FDB entry"
+
+	# Re-learn the first FDB entry and check that it is correctly aged-out
+	RET=0
+
+	in_ns ns1 $MZ w2 -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff -B $dst \
+		-t icmp -q
+	sleep 1
+
+	bridge fdb show brport vx1 | grep $mac | grep -q self
+	check_err $?
+	bridge fdb show brport vx1 | grep $mac | grep -q -v self
+	check_err $?
+
+	vxlan_flood_test $mac $dst 0 10 0
+
+	sleep 20
+
+	bridge fdb show brport vx1 | grep $mac | grep -q self
+	check_fail $?
+	bridge fdb show brport vx1 | grep $mac | grep -q -v self
+	check_fail $?
+
+	vxlan_flood_test $mac $dst 10 10 10
+
+	log_test "VXLAN: Ageing of learned FDB entry"
+
+	# Toggle learning on the bridge port and check that the bridge's FDB
+	# is populated only when it should
+	RET=0
+
+	ip link set dev vx1 type bridge_slave learning off
+
+	in_ns ns1 $MZ w2 -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff -B $dst \
+		-t icmp -q
+	sleep 1
+
+	bridge fdb show brport vx1 | grep $mac | grep -q -v self
+	check_fail $?
+
+	ip link set dev vx1 type bridge_slave learning on
+
+	in_ns ns1 $MZ w2 -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff -B $dst \
+		-t icmp -q
+	sleep 1
+
+	bridge fdb show brport vx1 | grep $mac | grep -q -v self
+	check_err $?
+
+	log_test "VXLAN: learning toggling on bridge port"
+
+	# Restore previous settings
+	ip link set dev vx1 type vxlan nolearning
+	ip link set dev vx1 type vxlan ageing 300
+	ip link set dev br1 type bridge ageing_time 30000
+	reapply_config
+}
+
+test_all()
+{
+	echo "Running tests with UDP port $VXPORT"
+	tests_run
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+test_all
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472.sh
new file mode 100755
index 000000000000..3bf3da69195f
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# A wrapper to run VXLAN tests with an unusual port number.
+
+VXPORT=8472
+ALL_TESTS="
+	ping_ipv4
+"
+source vxlan_bridge_1d.sh
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
new file mode 100755
index 000000000000..a5789721ba92
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
@@ -0,0 +1,860 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+                          +------------------------+
+# | H1 (vrf)              |                          | H2 (vrf)               |
+# |  + $h1.10             |                          |  + $h2.10              |
+# |  | 192.0.2.1/28       |                          |  | 192.0.2.2/28        |
+# |  |                    |                          |  |                     |
+# |  | + $h1.20           |                          |  | + $h2.20            |
+# |  \ | 198.51.100.1/24  |                          |  \ | 198.51.100.2/24   |
+# |   \|                  |                          |   \|                   |
+# |    + $h1              |                          |    + $h2               |
+# +----|------------------+                          +----|-------------------+
+#      |                                                  |
+# +----|--------------------------------------------------|-------------------+
+# | SW |                                                  |                   |
+# | +--|--------------------------------------------------|-----------------+ |
+# | |  + $swp1                   BR1 (802.1q)             + $swp2           | |
+# | |     vid 10                                             vid 10         | |
+# | |     vid 20                                             vid 20         | |
+# | |                                                                       | |
+# | |  + vx10 (vxlan)                        + vx20 (vxlan)                 | |
+# | |    local 192.0.2.17                      local 192.0.2.17             | |
+# | |    remote 192.0.2.34 192.0.2.50          remote 192.0.2.34 192.0.2.50 | |
+# | |    id 1000 dstport $VXPORT               id 2000 dstport $VXPORT      | |
+# | |    vid 10 pvid untagged                  vid 20 pvid untagged         | |
+# | +-----------------------------------------------------------------------+ |
+# |                                                                           |
+# |  192.0.2.32/28 via 192.0.2.18                                             |
+# |  192.0.2.48/28 via 192.0.2.18                                             |
+# |                                                                           |
+# |    + $rp1                                                                 |
+# |    | 192.0.2.17/28                                                        |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|--------------------------------------------------------+
+# |    |                                             VRP2 (vrf) |
+# |    + $rp2                                                   |
+# |      192.0.2.18/28                                          |
+# |                                                             |   (maybe) HW
+# =============================================================================
+# |                                                             |  (likely) SW
+# |    + v1 (veth)                             + v3 (veth)      |
+# |    | 192.0.2.33/28                         | 192.0.2.49/28  |
+# +----|---------------------------------------|----------------+
+#      |                                       |
+# +----|------------------------------+   +----|------------------------------+
+# |    + v2 (veth)        NS1 (netns) |   |    + v4 (veth)        NS2 (netns) |
+# |      192.0.2.34/28                |   |      192.0.2.50/28                |
+# |                                   |   |                                   |
+# |   192.0.2.16/28 via 192.0.2.33    |   |   192.0.2.16/28 via 192.0.2.49    |
+# |   192.0.2.50/32 via 192.0.2.33    |   |   192.0.2.34/32 via 192.0.2.49    |
+# |                                   |   |                                   |
+# | +-------------------------------+ |   | +-------------------------------+ |
+# | |                  BR2 (802.1q) | |   | |                  BR2 (802.1q) | |
+# | |  + vx10 (vxlan)               | |   | |  + vx10 (vxlan)               | |
+# | |    local 192.0.2.34           | |   | |    local 192.0.2.50           | |
+# | |    remote 192.0.2.17          | |   | |    remote 192.0.2.17          | |
+# | |    remote 192.0.2.50          | |   | |    remote 192.0.2.34          | |
+# | |    id 1000 dstport $VXPORT    | |   | |    id 1000 dstport $VXPORT    | |
+# | |    vid 10 pvid untagged       | |   | |    vid 10 pvid untagged       | |
+# | |                               | |   | |                               | |
+# | |  + vx20 (vxlan)               | |   | |  + vx20 (vxlan)               | |
+# | |    local 192.0.2.34           | |   | |    local 192.0.2.50           | |
+# | |    remote 192.0.2.17          | |   | |    remote 192.0.2.17          | |
+# | |    remote 192.0.2.50          | |   | |    remote 192.0.2.34          | |
+# | |    id 2000 dstport $VXPORT    | |   | |    id 2000 dstport $VXPORT    | |
+# | |    vid 20 pvid untagged       | |   | |    vid 20 pvid untagged       | |
+# | |                               | |   | |                               | |
+# | |  + w1 (veth)                  | |   | |  + w1 (veth)                  | |
+# | |  | vid 10                     | |   | |  | vid 10                     | |
+# | |  | vid 20                     | |   | |  | vid 20                     | |
+# | +--|----------------------------+ |   | +--|----------------------------+ |
+# |    |                              |   |    |                              |
+# | +--|----------------------------+ |   | +--|----------------------------+ |
+# | |  + w2 (veth)        VW2 (vrf) | |   | |  + w2 (veth)        VW2 (vrf) | |
+# | |  |\                           | |   | |  |\                           | |
+# | |  | + w2.10                    | |   | |  | + w2.10                    | |
+# | |  |   192.0.2.3/28             | |   | |  |   192.0.2.4/28             | |
+# | |  |                            | |   | |  |                            | |
+# | |  + w2.20                      | |   | |  + w2.20                      | |
+# | |    198.51.100.3/24            | |   | |    198.51.100.4/24            | |
+# | +-------------------------------+ |   | +-------------------------------+ |
+# +-----------------------------------+   +-----------------------------------+
+
+: ${VXPORT:=4789}
+export VXPORT
+
+: ${ALL_TESTS:="
+	ping_ipv4
+	test_flood
+	test_unicast
+	reapply_config
+	ping_ipv4
+	test_flood
+	test_unicast
+	test_learning
+	test_pvid
+    "}
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	tc qdisc add dev $h1 clsact
+	vlan_create $h1 10 v$h1 192.0.2.1/28
+	vlan_create $h1 20 v$h1 198.51.100.1/24
+}
+
+h1_destroy()
+{
+	vlan_destroy $h1 20
+	vlan_destroy $h1 10
+	tc qdisc del dev $h1 clsact
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	tc qdisc add dev $h2 clsact
+	vlan_create $h2 10 v$h2 192.0.2.2/28
+	vlan_create $h2 20 v$h2 198.51.100.2/24
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 20
+	vlan_destroy $h2 10
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2
+}
+
+rp1_set_addr()
+{
+	ip address add dev $rp1 192.0.2.17/28
+
+	ip route add 192.0.2.32/28 nexthop via 192.0.2.18
+	ip route add 192.0.2.48/28 nexthop via 192.0.2.18
+}
+
+rp1_unset_addr()
+{
+	ip route del 192.0.2.48/28 nexthop via 192.0.2.18
+	ip route del 192.0.2.32/28 nexthop via 192.0.2.18
+
+	ip address del dev $rp1 192.0.2.17/28
+}
+
+switch_create()
+{
+	ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+		mcast_snooping 0
+	# Make sure the bridge uses the MAC address of the local port and not
+	# that of the VxLAN's device.
+	ip link set dev br1 address $(mac_get $swp1)
+	ip link set dev br1 up
+
+	ip link set dev $rp1 up
+	rp1_set_addr
+
+	ip link add name vx10 type vxlan id 1000	\
+		local 192.0.2.17 dstport "$VXPORT"	\
+		nolearning noudpcsum tos inherit ttl 100
+	ip link set dev vx10 up
+
+	ip link set dev vx10 master br1
+	bridge vlan add vid 10 dev vx10 pvid untagged
+
+	ip link add name vx20 type vxlan id 2000	\
+		local 192.0.2.17 dstport "$VXPORT"	\
+		nolearning noudpcsum tos inherit ttl 100
+	ip link set dev vx20 up
+
+	ip link set dev vx20 master br1
+	bridge vlan add vid 20 dev vx20 pvid untagged
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	bridge vlan add vid 10 dev $swp1
+	bridge vlan add vid 20 dev $swp1
+
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+	bridge vlan add vid 10 dev $swp2
+	bridge vlan add vid 20 dev $swp2
+
+	bridge fdb append dev vx10 00:00:00:00:00:00 dst 192.0.2.34 self
+	bridge fdb append dev vx10 00:00:00:00:00:00 dst 192.0.2.50 self
+
+	bridge fdb append dev vx20 00:00:00:00:00:00 dst 192.0.2.34 self
+	bridge fdb append dev vx20 00:00:00:00:00:00 dst 192.0.2.50 self
+}
+
+switch_destroy()
+{
+	bridge fdb del dev vx20 00:00:00:00:00:00 dst 192.0.2.50 self
+	bridge fdb del dev vx20 00:00:00:00:00:00 dst 192.0.2.34 self
+
+	bridge fdb del dev vx10 00:00:00:00:00:00 dst 192.0.2.50 self
+	bridge fdb del dev vx10 00:00:00:00:00:00 dst 192.0.2.34 self
+
+	bridge vlan del vid 20 dev $swp2
+	bridge vlan del vid 10 dev $swp2
+	ip link set dev $swp2 down
+	ip link set dev $swp2 nomaster
+
+	bridge vlan del vid 20 dev $swp1
+	bridge vlan del vid 10 dev $swp1
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+
+	bridge vlan del vid 20 dev vx20
+	ip link set dev vx20 nomaster
+
+	ip link set dev vx20 down
+	ip link del dev vx20
+
+	bridge vlan del vid 10 dev vx10
+	ip link set dev vx10 nomaster
+
+	ip link set dev vx10 down
+	ip link del dev vx10
+
+	rp1_unset_addr
+	ip link set dev $rp1 down
+
+	ip link set dev br1 down
+	ip link del dev br1
+}
+
+vrp2_create()
+{
+	simple_if_init $rp2 192.0.2.18/28
+	__simple_if_init v1 v$rp2 192.0.2.33/28
+	__simple_if_init v3 v$rp2 192.0.2.49/28
+	tc qdisc add dev v1 clsact
+}
+
+vrp2_destroy()
+{
+	tc qdisc del dev v1 clsact
+	__simple_if_fini v3 192.0.2.49/28
+	__simple_if_fini v1 192.0.2.33/28
+	simple_if_fini $rp2 192.0.2.18/28
+}
+
+ns_init_common()
+{
+	local in_if=$1; shift
+	local in_addr=$1; shift
+	local other_in_addr=$1; shift
+	local nh_addr=$1; shift
+	local host_addr1=$1; shift
+	local host_addr2=$1; shift
+
+	ip link set dev $in_if up
+	ip address add dev $in_if $in_addr/28
+	tc qdisc add dev $in_if clsact
+
+	ip link add name br2 type bridge vlan_filtering 1 vlan_default_pvid 0
+	ip link set dev br2 up
+
+	ip link add name w1 type veth peer name w2
+
+	ip link set dev w1 master br2
+	ip link set dev w1 up
+
+	bridge vlan add vid 10 dev w1
+	bridge vlan add vid 20 dev w1
+
+	ip link add name vx10 type vxlan id 1000 local $in_addr \
+		dstport "$VXPORT"
+	ip link set dev vx10 up
+	bridge fdb append dev vx10 00:00:00:00:00:00 dst 192.0.2.17 self
+	bridge fdb append dev vx10 00:00:00:00:00:00 dst $other_in_addr self
+
+	ip link set dev vx10 master br2
+	tc qdisc add dev vx10 clsact
+
+	bridge vlan add vid 10 dev vx10 pvid untagged
+
+	ip link add name vx20 type vxlan id 2000 local $in_addr \
+		dstport "$VXPORT"
+	ip link set dev vx20 up
+	bridge fdb append dev vx20 00:00:00:00:00:00 dst 192.0.2.17 self
+	bridge fdb append dev vx20 00:00:00:00:00:00 dst $other_in_addr self
+
+	ip link set dev vx20 master br2
+	tc qdisc add dev vx20 clsact
+
+	bridge vlan add vid 20 dev vx20 pvid untagged
+
+	simple_if_init w2
+        vlan_create w2 10 vw2 $host_addr1/28
+        vlan_create w2 20 vw2 $host_addr2/24
+
+	ip route add 192.0.2.16/28 nexthop via $nh_addr
+	ip route add $other_in_addr/32 nexthop via $nh_addr
+}
+export -f ns_init_common
+
+ns1_create()
+{
+	ip netns add ns1
+	ip link set dev v2 netns ns1
+	in_ns ns1 \
+	      ns_init_common v2 192.0.2.34 192.0.2.50 192.0.2.33 192.0.2.3 \
+	      198.51.100.3
+}
+
+ns1_destroy()
+{
+	ip netns exec ns1 ip link set dev v2 netns 1
+	ip netns del ns1
+}
+
+ns2_create()
+{
+	ip netns add ns2
+	ip link set dev v4 netns ns2
+	in_ns ns2 \
+	      ns_init_common v4 192.0.2.50 192.0.2.34 192.0.2.49 192.0.2.4 \
+	      198.51.100.4
+}
+
+ns2_destroy()
+{
+	ip netns exec ns2 ip link set dev v4 netns 1
+	ip netns del ns2
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp1=${NETIFS[p5]}
+	rp2=${NETIFS[p6]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+	switch_create
+
+	ip link add name v1 type veth peer name v2
+	ip link add name v3 type veth peer name v4
+	vrp2_create
+	ns1_create
+	ns2_create
+
+	r1_mac=$(in_ns ns1 mac_get w2)
+	r2_mac=$(in_ns ns2 mac_get w2)
+	h2_mac=$(mac_get $h2)
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ns2_destroy
+	ns1_destroy
+	vrp2_destroy
+	ip link del dev v3
+	ip link del dev v1
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+# For the first round of tests, vx10 and vx20 were the first devices to get
+# attached to the bridge, and that at the point that the local IP is already
+# configured. Try the other scenario of attaching these devices to a bridge
+# that already has local ports members, and only then assign the local IP.
+reapply_config()
+{
+	log_info "Reapplying configuration"
+
+	bridge fdb del dev vx20 00:00:00:00:00:00 dst 192.0.2.50 self
+	bridge fdb del dev vx20 00:00:00:00:00:00 dst 192.0.2.34 self
+
+	bridge fdb del dev vx10 00:00:00:00:00:00 dst 192.0.2.50 self
+	bridge fdb del dev vx10 00:00:00:00:00:00 dst 192.0.2.34 self
+
+	ip link set dev vx20 nomaster
+	ip link set dev vx10 nomaster
+
+	rp1_unset_addr
+	sleep 5
+
+	ip link set dev vx10 master br1
+	bridge vlan add vid 10 dev vx10 pvid untagged
+
+	ip link set dev vx20 master br1
+	bridge vlan add vid 20 dev vx20 pvid untagged
+
+	bridge fdb append dev vx10 00:00:00:00:00:00 dst 192.0.2.34 self
+	bridge fdb append dev vx10 00:00:00:00:00:00 dst 192.0.2.50 self
+
+	bridge fdb append dev vx20 00:00:00:00:00:00 dst 192.0.2.34 self
+	bridge fdb append dev vx20 00:00:00:00:00:00 dst 192.0.2.50 self
+
+	rp1_set_addr
+	sleep 5
+}
+
+ping_ipv4()
+{
+	ping_test $h1.10 192.0.2.2 ": local->local vid 10"
+	ping_test $h1.20 198.51.100.2 ": local->local vid 20"
+	ping_test $h1.10 192.0.2.3 ": local->remote 1 vid 10"
+	ping_test $h1.10 192.0.2.4 ": local->remote 2 vid 10"
+	ping_test $h1.20 198.51.100.3 ": local->remote 1 vid 20"
+	ping_test $h1.20 198.51.100.4 ": local->remote 2 vid 20"
+}
+
+maybe_in_ns()
+{
+	echo ${1:+in_ns} $1
+}
+
+__flood_counter_add_del()
+{
+	local add_del=$1; shift
+	local dev=$1; shift
+	local ns=$1; shift
+
+	# Putting the ICMP capture both to HW and to SW will end up
+	# double-counting the packets that are trapped to slow path, such as for
+	# the unicast test. Adding either skip_hw or skip_sw fixes this problem,
+	# but with skip_hw, the flooded packets are not counted at all, because
+	# those are dropped due to MAC address mismatch; and skip_sw is a no-go
+	# for veth-based topologies.
+	#
+	# So try to install with skip_sw and fall back to skip_sw if that fails.
+
+	$(maybe_in_ns $ns) __icmp_capture_add_del          \
+			   $add_del 100 "" $dev skip_sw 2>/dev/null || \
+	$(maybe_in_ns $ns) __icmp_capture_add_del          \
+			   $add_del 100 "" $dev skip_hw
+}
+
+flood_counter_install()
+{
+	__flood_counter_add_del add "$@"
+}
+
+flood_counter_uninstall()
+{
+	__flood_counter_add_del del "$@"
+}
+
+flood_fetch_stat()
+{
+	local dev=$1; shift
+	local ns=$1; shift
+
+	$(maybe_in_ns $ns) tc_rule_stats_get $dev 100 ingress
+}
+
+flood_fetch_stats()
+{
+	local counters=("${@}")
+	local counter
+
+	for counter in "${counters[@]}"; do
+		flood_fetch_stat $counter
+	done
+}
+
+vxlan_flood_test()
+{
+	local mac=$1; shift
+	local dst=$1; shift
+	local vid=$1; shift
+	local -a expects=("${@}")
+
+	local -a counters=($h2 "vx10 ns1" "vx20 ns1" "vx10 ns2" "vx20 ns2")
+	local counter
+	local key
+
+	# Packets reach the local host tagged whereas they reach the VxLAN
+	# devices untagged. In order to be able to use the same filter for
+	# all counters, make sure the packets also reach the local host
+	# untagged
+	bridge vlan add vid $vid dev $swp2 untagged
+	for counter in "${counters[@]}"; do
+		flood_counter_install $counter
+	done
+
+	local -a t0s=($(flood_fetch_stats "${counters[@]}"))
+	$MZ $h1 -Q $vid -c 10 -d 100msec -p 64 -b $mac -B $dst -t icmp -q
+	sleep 1
+	local -a t1s=($(flood_fetch_stats "${counters[@]}"))
+
+	for key in ${!t0s[@]}; do
+		local delta=$((t1s[$key] - t0s[$key]))
+		local expect=${expects[$key]}
+
+		((expect == delta))
+		check_err $? "${counters[$key]}: Expected to capture $expect packets, got $delta."
+	done
+
+	for counter in "${counters[@]}"; do
+		flood_counter_uninstall $counter
+	done
+	bridge vlan add vid $vid dev $swp2
+}
+
+__test_flood()
+{
+	local mac=$1; shift
+	local dst=$1; shift
+	local vid=$1; shift
+	local what=$1; shift
+	local -a expects=("${@}")
+
+	RET=0
+
+	vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+	log_test "VXLAN: $what"
+}
+
+test_flood()
+{
+	__test_flood de:ad:be:ef:13:37 192.0.2.100 10 "flood vlan 10" \
+		10 10 0 10 0
+	__test_flood ca:fe:be:ef:13:37 198.51.100.100 20 "flood vlan 20" \
+		10 0 10 0 10
+}
+
+vxlan_fdb_add_del()
+{
+	local add_del=$1; shift
+	local vid=$1; shift
+	local mac=$1; shift
+	local dev=$1; shift
+	local dst=$1; shift
+
+	bridge fdb $add_del dev $dev $mac self static permanent \
+		${dst:+dst} $dst 2>/dev/null
+	bridge fdb $add_del dev $dev $mac master static vlan $vid 2>/dev/null
+}
+
+__test_unicast()
+{
+	local mac=$1; shift
+	local dst=$1; shift
+	local hit_idx=$1; shift
+	local vid=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	local -a expects=(0 0 0 0 0)
+	expects[$hit_idx]=10
+
+	vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+	log_test "VXLAN: $what"
+}
+
+test_unicast()
+{
+	local -a targets=("$h2_mac $h2"
+			  "$r1_mac vx10 192.0.2.34"
+			  "$r2_mac vx10 192.0.2.50")
+	local target
+
+	log_info "unicast vlan 10"
+
+	for target in "${targets[@]}"; do
+		vxlan_fdb_add_del add 10 $target
+	done
+
+	__test_unicast $h2_mac 192.0.2.2 0 10 "local MAC unicast"
+	__test_unicast $r1_mac 192.0.2.3 1 10 "remote MAC 1 unicast"
+	__test_unicast $r2_mac 192.0.2.4 3 10 "remote MAC 2 unicast"
+
+	for target in "${targets[@]}"; do
+		vxlan_fdb_add_del del 10 $target
+	done
+
+	log_info "unicast vlan 20"
+
+	targets=("$h2_mac $h2" "$r1_mac vx20 192.0.2.34" \
+		 "$r2_mac vx20 192.0.2.50")
+
+	for target in "${targets[@]}"; do
+		vxlan_fdb_add_del add 20 $target
+	done
+
+	__test_unicast $h2_mac 198.51.100.2 0 20 "local MAC unicast"
+	__test_unicast $r1_mac 198.51.100.3 2 20 "remote MAC 1 unicast"
+	__test_unicast $r2_mac 198.51.100.4 4 20 "remote MAC 2 unicast"
+
+	for target in "${targets[@]}"; do
+		vxlan_fdb_add_del del 20 $target
+	done
+}
+
+test_pvid()
+{
+	local -a expects=(0 0 0 0 0)
+	local mac=de:ad:be:ef:13:37
+	local dst=192.0.2.100
+	local vid=10
+
+	# Check that flooding works
+	RET=0
+
+	expects[0]=10; expects[1]=10; expects[3]=10
+	vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+	log_test "VXLAN: flood before pvid off"
+
+	# Toggle PVID off and test that flood to remote hosts does not work
+	RET=0
+
+	bridge vlan add vid 10 dev vx10
+
+	expects[0]=10; expects[1]=0; expects[3]=0
+	vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+	log_test "VXLAN: flood after pvid off"
+
+	# Toggle PVID on and test that flood to remote hosts does work
+	RET=0
+
+	bridge vlan add vid 10 dev vx10 pvid untagged
+
+	expects[0]=10; expects[1]=10; expects[3]=10
+	vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+	log_test "VXLAN: flood after pvid on"
+
+	# Add a new VLAN and test that it does not affect flooding
+	RET=0
+
+	bridge vlan add vid 30 dev vx10
+
+	expects[0]=10; expects[1]=10; expects[3]=10
+	vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+	bridge vlan del vid 30 dev vx10
+
+	log_test "VXLAN: flood after vlan add"
+
+	# Remove currently mapped VLAN and test that flood to remote hosts does
+	# not work
+	RET=0
+
+	bridge vlan del vid 10 dev vx10
+
+	expects[0]=10; expects[1]=0; expects[3]=0
+	vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+	log_test "VXLAN: flood after vlan delete"
+
+	# Re-add the VLAN and test that flood to remote hosts does work
+	RET=0
+
+	bridge vlan add vid 10 dev vx10 pvid untagged
+
+	expects[0]=10; expects[1]=10; expects[3]=10
+	vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+	log_test "VXLAN: flood after vlan re-add"
+}
+
+vxlan_ping_test()
+{
+	local ping_dev=$1; shift
+	local ping_dip=$1; shift
+	local ping_args=$1; shift
+	local capture_dev=$1; shift
+	local capture_dir=$1; shift
+	local capture_pref=$1; shift
+	local expect=$1; shift
+
+	local t0=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir)
+	ping_do $ping_dev $ping_dip "$ping_args"
+	local t1=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir)
+	local delta=$((t1 - t0))
+
+	# Tolerate a couple stray extra packets.
+	((expect <= delta && delta <= expect + 2))
+	check_err $? "$capture_dev: Expected to capture $expect packets, got $delta."
+}
+
+__test_learning()
+{
+	local -a expects=(0 0 0 0 0)
+	local mac=$1; shift
+	local dst=$1; shift
+	local vid=$1; shift
+	local idx1=$1; shift
+	local idx2=$1; shift
+	local vx=vx$vid
+
+	# Check that flooding works
+	RET=0
+
+	expects[0]=10; expects[$idx1]=10; expects[$idx2]=10
+	vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+	log_test "VXLAN: flood before learning"
+
+	# Send a packet with source mac set to $mac from host w2 and check that
+	# a corresponding entry is created in the VxLAN device
+	RET=0
+
+	in_ns ns1 $MZ w2 -Q $vid -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff \
+		-B $dst -t icmp -q
+	sleep 1
+
+	bridge fdb show brport $vx | grep $mac | grep -q self
+	check_err $?
+	bridge fdb show brport $vx | grep $mac | grep "vlan $vid" \
+		| grep -q -v self
+	check_err $?
+
+	log_test "VXLAN: show learned FDB entry"
+
+	# Repeat first test and check that packets only reach host w2 in ns1
+	RET=0
+
+	expects[0]=0; expects[$idx1]=10; expects[$idx2]=0
+	vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+	log_test "VXLAN: learned FDB entry"
+
+	# Delete the learned FDB entry from the VxLAN and bridge devices and
+	# check that packets are flooded
+	RET=0
+
+	bridge fdb del dev $vx $mac master self vlan $vid
+	sleep 1
+
+	expects[0]=10; expects[$idx1]=10; expects[$idx2]=10
+	vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+	log_test "VXLAN: deletion of learned FDB entry"
+
+	# Re-learn the first FDB entry and check that it is correctly aged-out
+	RET=0
+
+	in_ns ns1 $MZ w2 -Q $vid -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff \
+		-B $dst -t icmp -q
+	sleep 1
+
+	bridge fdb show brport $vx | grep $mac | grep -q self
+	check_err $?
+	bridge fdb show brport $vx | grep $mac | grep "vlan $vid" \
+		| grep -q -v self
+	check_err $?
+
+	expects[0]=0; expects[$idx1]=10; expects[$idx2]=0
+	vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+	sleep 20
+
+	bridge fdb show brport $vx | grep $mac | grep -q self
+	check_fail $?
+	bridge fdb show brport $vx | grep $mac | grep "vlan $vid" \
+		| grep -q -v self
+	check_fail $?
+
+	expects[0]=10; expects[$idx1]=10; expects[$idx2]=10
+	vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+	log_test "VXLAN: Ageing of learned FDB entry"
+
+	# Toggle learning on the bridge port and check that the bridge's FDB
+	# is populated only when it should
+	RET=0
+
+	ip link set dev $vx type bridge_slave learning off
+
+	in_ns ns1 $MZ w2 -Q $vid -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff \
+		-B $dst -t icmp -q
+	sleep 1
+
+	bridge fdb show brport $vx | grep $mac | grep "vlan $vid" \
+		| grep -q -v self
+	check_fail $?
+
+	ip link set dev $vx type bridge_slave learning on
+
+	in_ns ns1 $MZ w2 -Q $vid -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff \
+		-B $dst -t icmp -q
+	sleep 1
+
+	bridge fdb show brport $vx | grep $mac | grep "vlan $vid" \
+		| grep -q -v self
+	check_err $?
+
+	log_test "VXLAN: learning toggling on bridge port"
+}
+
+test_learning()
+{
+	local mac=de:ad:be:ef:13:37
+	local dst=192.0.2.100
+	local vid=10
+
+	# Enable learning on the VxLAN devices and set ageing time to 10 seconds
+	ip link set dev br1 type bridge ageing_time 1000
+	ip link set dev vx10 type vxlan ageing 10
+	ip link set dev vx10 type vxlan learning
+	ip link set dev vx20 type vxlan ageing 10
+	ip link set dev vx20 type vxlan learning
+	reapply_config
+
+	log_info "learning vlan 10"
+
+	__test_learning $mac $dst $vid 1 3
+
+	log_info "learning vlan 20"
+
+	mac=ca:fe:be:ef:13:37
+	dst=198.51.100.100
+	vid=20
+
+	__test_learning $mac $dst $vid 2 4
+
+	# Restore previous settings
+	ip link set dev vx20 type vxlan nolearning
+	ip link set dev vx20 type vxlan ageing 300
+	ip link set dev vx10 type vxlan nolearning
+	ip link set dev vx10 type vxlan ageing 300
+	ip link set dev br1 type bridge ageing_time 30000
+	reapply_config
+}
+
+test_all()
+{
+	log_info "Running tests with UDP port $VXPORT"
+	tests_run
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+test_all
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472.sh
new file mode 100755
index 000000000000..b1b2d1a3164f
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# A wrapper to run VXLAN tests with an unusual port number.
+
+VXPORT=8472
+ALL_TESTS="
+	ping_ipv4
+"
+source vxlan_bridge_1q.sh
diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index 406cc70c571d..4b02933cab8a 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -651,12 +651,13 @@ static void do_flush_datagram(int fd, int type)
 
 static void do_rx(int domain, int type, int protocol)
 {
+	const int cfg_receiver_wait_ms = 400;
 	uint64_t tstop;
 	int fd;
 
 	fd = do_setup_rx(domain, type, protocol);
 
-	tstop = gettimeofday_ms() + cfg_runtime_ms;
+	tstop = gettimeofday_ms() + cfg_runtime_ms + cfg_receiver_wait_ms;
 	do {
 		if (type == SOCK_STREAM)
 			do_flush_tcp(fd);
diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh
index c43c6debda06..825ffec85cea 100755
--- a/tools/testing/selftests/net/msg_zerocopy.sh
+++ b/tools/testing/selftests/net/msg_zerocopy.sh
@@ -25,6 +25,8 @@ readonly path_sysctl_mem="net.core.optmem_max"
 if [[ "$#" -eq "0" ]]; then
 	$0 4 tcp -t 1
 	$0 6 tcp -t 1
+	$0 4 udp -t 1
+	$0 6 udp -t 1
 	echo "OK. All tests passed"
 	exit 0
 fi
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index a369d616b390..e2c94e47707c 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -26,6 +26,47 @@
 # - pmtu_ipv6
 #	Same as pmtu_ipv4, except for locked PMTU tests, using IPv6
 #
+# - pmtu_ipv4_vxlan4_exception
+#	Set up the same network topology as pmtu_ipv4, create a VXLAN tunnel
+#	over IPv4 between A and B, routed via R1. On the link between R1 and B,
+#	set a MTU lower than the VXLAN MTU and the MTU on the link between A and
+#	R1. Send IPv4 packets, exceeding the MTU between R1 and B, over VXLAN
+#	from A to B and check that the PMTU exception is created with the right
+#	value on A
+#
+# - pmtu_ipv6_vxlan4_exception
+#	Same as pmtu_ipv4_vxlan4_exception, but send IPv6 packets from A to B
+#
+# - pmtu_ipv4_vxlan6_exception
+#	Same as pmtu_ipv4_vxlan4_exception, but use IPv6 transport from A to B
+#
+# - pmtu_ipv6_vxlan6_exception
+#	Same as pmtu_ipv4_vxlan6_exception, but send IPv6 packets from A to B
+#
+# - pmtu_ipv4_geneve4_exception
+#	Same as pmtu_ipv4_vxlan4_exception, but using a GENEVE tunnel instead of
+#	VXLAN
+#
+# - pmtu_ipv6_geneve4_exception
+#	Same as pmtu_ipv6_vxlan4_exception, but using a GENEVE tunnel instead of
+#	VXLAN
+#
+# - pmtu_ipv4_geneve6_exception
+#	Same as pmtu_ipv4_vxlan6_exception, but using a GENEVE tunnel instead of
+#	VXLAN
+#
+# - pmtu_ipv6_geneve6_exception
+#	Same as pmtu_ipv6_vxlan6_exception, but using a GENEVE tunnel instead of
+#	VXLAN
+#
+# - pmtu_ipv{4,6}_fou{4,6}_exception
+#	Same as pmtu_ipv4_vxlan4, but using a direct IPv4/IPv6 encapsulation
+#	(FoU) over IPv4/IPv6, instead of VXLAN
+#
+# - pmtu_ipv{4,6}_fou{4,6}_exception
+#	Same as pmtu_ipv4_vxlan4, but using a generic UDP IPv4/IPv6
+#	encapsulation (GUE) over IPv4/IPv6, instead of VXLAN
+#
 # - pmtu_vti4_exception
 #	Set up vti tunnel on top of veth, with xfrm states and policies, in two
 #	namespaces with matching endpoints. Check that route exception is not
@@ -72,6 +113,22 @@ which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
 tests="
 	pmtu_ipv4_exception		ipv4: PMTU exceptions
 	pmtu_ipv6_exception		ipv6: PMTU exceptions
+	pmtu_ipv4_vxlan4_exception	IPv4 over vxlan4: PMTU exceptions
+	pmtu_ipv6_vxlan4_exception	IPv6 over vxlan4: PMTU exceptions
+	pmtu_ipv4_vxlan6_exception	IPv4 over vxlan6: PMTU exceptions
+	pmtu_ipv6_vxlan6_exception	IPv6 over vxlan6: PMTU exceptions
+	pmtu_ipv4_geneve4_exception	IPv4 over geneve4: PMTU exceptions
+	pmtu_ipv6_geneve4_exception	IPv6 over geneve4: PMTU exceptions
+	pmtu_ipv4_geneve6_exception	IPv4 over geneve6: PMTU exceptions
+	pmtu_ipv6_geneve6_exception	IPv6 over geneve6: PMTU exceptions
+	pmtu_ipv4_fou4_exception	IPv4 over fou4: PMTU exceptions
+	pmtu_ipv6_fou4_exception	IPv6 over fou4: PMTU exceptions
+	pmtu_ipv4_fou6_exception	IPv4 over fou6: PMTU exceptions
+	pmtu_ipv6_fou6_exception	IPv6 over fou6: PMTU exceptions
+	pmtu_ipv4_gue4_exception	IPv4 over gue4: PMTU exceptions
+	pmtu_ipv6_gue4_exception	IPv6 over gue4: PMTU exceptions
+	pmtu_ipv4_gue6_exception	IPv4 over gue6: PMTU exceptions
+	pmtu_ipv6_gue6_exception	IPv6 over gue6: PMTU exceptions
 	pmtu_vti6_exception		vti6: PMTU exceptions
 	pmtu_vti4_exception		vti4: PMTU exceptions
 	pmtu_vti4_default_mtu		vti4: default MTU assignment
@@ -95,8 +152,8 @@ ns_r2="ip netns exec ${NS_R2}"
 # Addresses are:
 # - IPv4: PREFIX4.SEGMENT.ID (/24)
 # - IPv6: PREFIX6:SEGMENT::ID (/64)
-prefix4="192.168"
-prefix6="fd00"
+prefix4="10.0"
+prefix6="fc00"
 a_r1=1
 a_r2=2
 b_r1=3
@@ -129,12 +186,12 @@ veth6_a_addr="fd00:1::a"
 veth6_b_addr="fd00:1::b"
 veth6_mask="64"
 
-vti4_a_addr="192.168.2.1"
-vti4_b_addr="192.168.2.2"
-vti4_mask="24"
-vti6_a_addr="fd00:2::a"
-vti6_b_addr="fd00:2::b"
-vti6_mask="64"
+tunnel4_a_addr="192.168.2.1"
+tunnel4_b_addr="192.168.2.2"
+tunnel4_mask="24"
+tunnel6_a_addr="fd00:2::a"
+tunnel6_b_addr="fd00:2::b"
+tunnel6_mask="64"
 
 dummy6_0_addr="fc00:1000::0"
 dummy6_1_addr="fc00:1001::0"
@@ -159,6 +216,89 @@ nsname() {
 	eval echo \$NS_$1
 }
 
+setup_fou_or_gue() {
+	outer="${1}"
+	inner="${2}"
+	encap="${3}"
+
+	if [ "${outer}" = "4" ]; then
+		modprobe fou || return 2
+		a_addr="${prefix4}.${a_r1}.1"
+		b_addr="${prefix4}.${b_r1}.1"
+		if [ "${inner}" = "4" ]; then
+			type="ipip"
+			ipproto="4"
+		else
+			type="sit"
+			ipproto="41"
+		fi
+	else
+		modprobe fou6 || return 2
+		a_addr="${prefix6}:${a_r1}::1"
+		b_addr="${prefix6}:${b_r1}::1"
+		if [ "${inner}" = "4" ]; then
+			type="ip6tnl"
+			mode="mode ipip6"
+			ipproto="4 -6"
+		else
+			type="ip6tnl"
+			mode="mode ip6ip6"
+			ipproto="41 -6"
+		fi
+	fi
+
+	${ns_a} ip fou add port 5555 ipproto ${ipproto} || return 2
+	${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return 2
+
+	${ns_b} ip fou add port 5556 ipproto ${ipproto}
+	${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555
+
+	if [ "${inner}" = "4" ]; then
+		${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${encap}_a
+		${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${encap}_b
+	else
+		${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${encap}_a
+		${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${encap}_b
+	fi
+
+	${ns_a} ip link set ${encap}_a up
+	${ns_b} ip link set ${encap}_b up
+
+	sleep 1
+}
+
+setup_fou44() {
+	setup_fou_or_gue 4 4 fou
+}
+
+setup_fou46() {
+	setup_fou_or_gue 4 6 fou
+}
+
+setup_fou64() {
+	setup_fou_or_gue 6 4 fou
+}
+
+setup_fou66() {
+	setup_fou_or_gue 6 6 fou
+}
+
+setup_gue44() {
+	setup_fou_or_gue 4 4 gue
+}
+
+setup_gue46() {
+	setup_fou_or_gue 4 6 gue
+}
+
+setup_gue64() {
+	setup_fou_or_gue 6 4 gue
+}
+
+setup_gue66() {
+	setup_fou_or_gue 6 6 gue
+}
+
 setup_namespaces() {
 	for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
 		ip netns add ${n} || return 1
@@ -202,11 +342,57 @@ setup_vti() {
 }
 
 setup_vti4() {
-	setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${vti4_a_addr} ${vti4_b_addr} ${vti4_mask}
+	setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask}
 }
 
 setup_vti6() {
-	setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${vti6_a_addr} ${vti6_b_addr} ${vti6_mask}
+	setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask}
+}
+
+setup_vxlan_or_geneve() {
+	type="${1}"
+	a_addr="${2}"
+	b_addr="${3}"
+	opts="${4}"
+
+	if [ "${type}" = "vxlan" ]; then
+		opts="${opts} ttl 64 dstport 4789"
+		opts_a="local ${a_addr}"
+		opts_b="local ${b_addr}"
+	else
+		opts_a=""
+		opts_b=""
+	fi
+
+	${ns_a} ip link add ${type}_a type ${type} id 1 ${opts_a} remote ${b_addr} ${opts} || return 1
+	${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts}
+
+	${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a
+	${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
+
+	${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a
+	${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b
+
+	${ns_a} ip link set ${type}_a up
+	${ns_b} ip link set ${type}_b up
+
+	sleep 1
+}
+
+setup_geneve4() {
+	setup_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1  ${prefix4}.${b_r1}.1  "df set"
+}
+
+setup_vxlan4() {
+	setup_vxlan_or_geneve vxlan  ${prefix4}.${a_r1}.1  ${prefix4}.${b_r1}.1  "df set"
+}
+
+setup_geneve6() {
+	setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
+}
+
+setup_vxlan6() {
+	setup_vxlan_or_geneve vxlan  ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
 }
 
 setup_xfrm() {
@@ -465,6 +651,161 @@ test_pmtu_ipv6_exception() {
 	test_pmtu_ipvX 6
 }
 
+test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() {
+	type=${1}
+	family=${2}
+	outer_family=${3}
+	ll_mtu=4000
+
+	if [ ${outer_family} -eq 4 ]; then
+		setup namespaces routing ${type}4 || return 2
+		#                      IPv4 header   UDP header   VXLAN/GENEVE header   Ethernet header
+		exp_mtu=$((${ll_mtu} - 20          - 8          - 8                   - 14))
+	else
+		setup namespaces routing ${type}6 || return 2
+		#                      IPv6 header   UDP header   VXLAN/GENEVE header   Ethernet header
+		exp_mtu=$((${ll_mtu} - 40          - 8          - 8                   - 14))
+	fi
+
+	trace "${ns_a}" ${type}_a    "${ns_b}"  ${type}_b \
+	      "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
+	      "${ns_b}" veth_B-R1    "${ns_r1}" veth_R1-B
+
+	if [ ${family} -eq 4 ]; then
+		ping=ping
+		dst=${tunnel4_b_addr}
+	else
+		ping=${ping6}
+		dst=${tunnel6_b_addr}
+	fi
+
+	# Create route exception by exceeding link layer MTU
+	mtu "${ns_a}"  veth_A-R1 $((${ll_mtu} + 1000))
+	mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
+	mtu "${ns_b}"  veth_B-R1 ${ll_mtu}
+	mtu "${ns_r1}" veth_R1-B ${ll_mtu}
+
+	mtu "${ns_a}" ${type}_a $((${ll_mtu} + 1000))
+	mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
+	${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s $((${ll_mtu} + 500)) ${dst} > /dev/null
+
+	# Check that exception was created
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
+	check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ${type} interface"
+}
+
+test_pmtu_ipv4_vxlan4_exception() {
+	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan  4 4
+}
+
+test_pmtu_ipv6_vxlan4_exception() {
+	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan  6 4
+}
+
+test_pmtu_ipv4_geneve4_exception() {
+	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 4 4
+}
+
+test_pmtu_ipv6_geneve4_exception() {
+	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 4
+}
+
+test_pmtu_ipv4_vxlan6_exception() {
+	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan  4 6
+}
+
+test_pmtu_ipv6_vxlan6_exception() {
+	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan  6 6
+}
+
+test_pmtu_ipv4_geneve6_exception() {
+	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 4 6
+}
+
+test_pmtu_ipv6_geneve6_exception() {
+	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 6
+}
+
+test_pmtu_ipvX_over_fouY_or_gueY() {
+	inner_family=${1}
+	outer_family=${2}
+	encap=${3}
+	ll_mtu=4000
+
+	setup namespaces routing ${encap}${outer_family}${inner_family} || return 2
+	trace "${ns_a}" ${encap}_a   "${ns_b}"  ${encap}_b \
+	      "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
+	      "${ns_b}" veth_B-R1    "${ns_r1}" veth_R1-B
+
+	if [ ${inner_family} -eq 4 ]; then
+		ping=ping
+		dst=${tunnel4_b_addr}
+	else
+		ping=${ping6}
+		dst=${tunnel6_b_addr}
+	fi
+
+	if [ "${encap}" = "gue" ]; then
+		encap_overhead=4
+	else
+		encap_overhead=0
+	fi
+
+	if [ ${outer_family} -eq 4 ]; then
+		#                      IPv4 header   UDP header
+		exp_mtu=$((${ll_mtu} - 20          - 8         - ${encap_overhead}))
+	else
+		#                      IPv6 header   Option 4   UDP header
+		exp_mtu=$((${ll_mtu} - 40          - 8        - 8       - ${encap_overhead}))
+	fi
+
+	# Create route exception by exceeding link layer MTU
+	mtu "${ns_a}"  veth_A-R1 $((${ll_mtu} + 1000))
+	mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
+	mtu "${ns_b}"  veth_B-R1 ${ll_mtu}
+	mtu "${ns_r1}" veth_R1-B ${ll_mtu}
+
+	mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000))
+	mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000))
+	${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s $((${ll_mtu} + 500)) ${dst} > /dev/null
+
+	# Check that exception was created
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
+	check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ${encap} interface"
+}
+
+test_pmtu_ipv4_fou4_exception() {
+	test_pmtu_ipvX_over_fouY_or_gueY 4 4 fou
+}
+
+test_pmtu_ipv6_fou4_exception() {
+	test_pmtu_ipvX_over_fouY_or_gueY 6 4 fou
+}
+
+test_pmtu_ipv4_fou6_exception() {
+	test_pmtu_ipvX_over_fouY_or_gueY 4 6 fou
+}
+
+test_pmtu_ipv6_fou6_exception() {
+	test_pmtu_ipvX_over_fouY_or_gueY 6 6 fou
+}
+
+test_pmtu_ipv4_gue4_exception() {
+	test_pmtu_ipvX_over_fouY_or_gueY 4 4 gue
+}
+
+test_pmtu_ipv6_gue4_exception() {
+	test_pmtu_ipvX_over_fouY_or_gueY 6 4 gue
+}
+
+test_pmtu_ipv4_gue6_exception() {
+	test_pmtu_ipvX_over_fouY_or_gueY 4 6 gue
+}
+
+test_pmtu_ipv6_gue6_exception() {
+	test_pmtu_ipvX_over_fouY_or_gueY 6 6 gue
+}
+
 test_pmtu_vti4_exception() {
 	setup namespaces veth vti4 xfrm4 || return 2
 	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
@@ -484,14 +825,14 @@ test_pmtu_vti4_exception() {
 
 	# Send DF packet without exceeding link layer MTU, check that no
 	# exception is created
-	${ns_a} ping -q -M want -i 0.1 -w 2 -s ${ping_payload} ${vti4_b_addr} > /dev/null
-	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
+	${ns_a} ping -q -M want -i 0.1 -w 2 -s ${ping_payload} ${tunnel4_b_addr} > /dev/null
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
 	check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
 
 	# Now exceed link layer MTU by one byte, check that exception is created
 	# with the right PMTU value
-	${ns_a} ping -q -M want -i 0.1 -w 2 -s $((ping_payload + 1)) ${vti4_b_addr} > /dev/null
-	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
+	${ns_a} ping -q -M want -i 0.1 -w 2 -s $((ping_payload + 1)) ${tunnel4_b_addr} > /dev/null
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
 	check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))"
 }
 
@@ -506,20 +847,20 @@ test_pmtu_vti6_exception() {
 	mtu "${ns_b}" veth_b 4000
 	mtu "${ns_a}" vti6_a 5000
 	mtu "${ns_b}" vti6_b 5000
-	${ns_a} ${ping6} -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null
+	${ns_a} ${ping6} -q -i 0.1 -w 2 -s 60000 ${tunnel6_b_addr} > /dev/null
 
 	# Check that exception was created
-	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})"
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
 	check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1
 
 	# Decrease tunnel MTU, check for PMTU decrease in route exception
 	mtu "${ns_a}" vti6_a 3000
-	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})"
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
 	check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1
 
 	# Increase tunnel MTU, check for PMTU increase in route exception
 	mtu "${ns_a}" vti6_a 9000
-	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})"
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
 	check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1
 
 	return ${fail}
diff --git a/tools/testing/selftests/net/reuseport_addr_any.c b/tools/testing/selftests/net/reuseport_addr_any.c
new file mode 100644
index 000000000000..c6233935fed1
--- /dev/null
+++ b/tools/testing/selftests/net/reuseport_addr_any.c
@@ -0,0 +1,274 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Test that sockets listening on a specific address are preferred
+ * over sockets listening on addr_any.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <linux/dccp.h>
+#include <linux/in.h>
+#include <linux/unistd.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+static const char *IP4_ADDR = "127.0.0.1";
+static const char *IP6_ADDR = "::1";
+static const char *IP4_MAPPED6 = "::ffff:127.0.0.1";
+
+static const int PORT = 8888;
+
+static void build_rcv_fd(int family, int proto, int *rcv_fds, int count,
+			 const char *addr_str)
+{
+	struct sockaddr_in  addr4 = {0};
+	struct sockaddr_in6 addr6 = {0};
+	struct sockaddr *addr;
+	int opt, i, sz;
+
+	memset(&addr, 0, sizeof(addr));
+
+	switch (family) {
+	case AF_INET:
+		addr4.sin_family = family;
+		if (!addr_str)
+			addr4.sin_addr.s_addr = htonl(INADDR_ANY);
+		else if (!inet_pton(family, addr_str, &addr4.sin_addr.s_addr))
+			error(1, errno, "inet_pton failed: %s", addr_str);
+		addr4.sin_port = htons(PORT);
+		sz = sizeof(addr4);
+		addr = (struct sockaddr *)&addr4;
+		break;
+	case AF_INET6:
+		addr6.sin6_family = AF_INET6;
+		if (!addr_str)
+			addr6.sin6_addr = in6addr_any;
+		else if (!inet_pton(family, addr_str, &addr6.sin6_addr))
+			error(1, errno, "inet_pton failed: %s", addr_str);
+		addr6.sin6_port = htons(PORT);
+		sz = sizeof(addr6);
+		addr = (struct sockaddr *)&addr6;
+		break;
+	default:
+		error(1, 0, "Unsupported family %d", family);
+		/* clang does not recognize error() above as terminating
+		 * the program, so it complains that saddr, sz are
+		 * not initialized when this code path is taken. Silence it.
+		 */
+		return;
+	}
+
+	for (i = 0; i < count; ++i) {
+		rcv_fds[i] = socket(family, proto, 0);
+		if (rcv_fds[i] < 0)
+			error(1, errno, "failed to create receive socket");
+
+		opt = 1;
+		if (setsockopt(rcv_fds[i], SOL_SOCKET, SO_REUSEPORT, &opt,
+			       sizeof(opt)))
+			error(1, errno, "failed to set SO_REUSEPORT");
+
+		if (bind(rcv_fds[i], addr, sz))
+			error(1, errno, "failed to bind receive socket");
+
+		if (proto == SOCK_STREAM && listen(rcv_fds[i], 10))
+			error(1, errno, "tcp: failed to listen on receive port");
+		else if (proto == SOCK_DCCP) {
+			if (setsockopt(rcv_fds[i], SOL_DCCP,
+					DCCP_SOCKOPT_SERVICE,
+					&(int) {htonl(42)}, sizeof(int)))
+				error(1, errno, "failed to setsockopt");
+
+			if (listen(rcv_fds[i], 10))
+				error(1, errno, "dccp: failed to listen on receive port");
+		}
+	}
+}
+
+static int connect_and_send(int family, int proto)
+{
+	struct sockaddr_in  saddr4 = {0};
+	struct sockaddr_in  daddr4 = {0};
+	struct sockaddr_in6 saddr6 = {0};
+	struct sockaddr_in6 daddr6 = {0};
+	struct sockaddr *saddr, *daddr;
+	int fd, sz;
+
+	switch (family) {
+	case AF_INET:
+		saddr4.sin_family = AF_INET;
+		saddr4.sin_addr.s_addr = htonl(INADDR_ANY);
+		saddr4.sin_port = 0;
+
+		daddr4.sin_family = AF_INET;
+		if (!inet_pton(family, IP4_ADDR, &daddr4.sin_addr.s_addr))
+			error(1, errno, "inet_pton failed: %s", IP4_ADDR);
+		daddr4.sin_port = htons(PORT);
+
+		sz = sizeof(saddr4);
+		saddr = (struct sockaddr *)&saddr4;
+		daddr = (struct sockaddr *)&daddr4;
+	break;
+	case AF_INET6:
+		saddr6.sin6_family = AF_INET6;
+		saddr6.sin6_addr = in6addr_any;
+
+		daddr6.sin6_family = AF_INET6;
+		if (!inet_pton(family, IP6_ADDR, &daddr6.sin6_addr))
+			error(1, errno, "inet_pton failed: %s", IP6_ADDR);
+		daddr6.sin6_port = htons(PORT);
+
+		sz = sizeof(saddr6);
+		saddr = (struct sockaddr *)&saddr6;
+		daddr = (struct sockaddr *)&daddr6;
+	break;
+	default:
+		error(1, 0, "Unsupported family %d", family);
+		/* clang does not recognize error() above as terminating
+		 * the program, so it complains that saddr, daddr, sz are
+		 * not initialized when this code path is taken. Silence it.
+		 */
+		return -1;
+	}
+
+	fd = socket(family, proto, 0);
+	if (fd < 0)
+		error(1, errno, "failed to create send socket");
+
+	if (proto == SOCK_DCCP &&
+		setsockopt(fd, SOL_DCCP, DCCP_SOCKOPT_SERVICE,
+				&(int){htonl(42)}, sizeof(int)))
+		error(1, errno, "failed to setsockopt");
+
+	if (bind(fd, saddr, sz))
+		error(1, errno, "failed to bind send socket");
+
+	if (connect(fd, daddr, sz))
+		error(1, errno, "failed to connect send socket");
+
+	if (send(fd, "a", 1, 0) < 0)
+		error(1, errno, "failed to send message");
+
+	return fd;
+}
+
+static int receive_once(int epfd, int proto)
+{
+	struct epoll_event ev;
+	int i, fd;
+	char buf[8];
+
+	i = epoll_wait(epfd, &ev, 1, 3);
+	if (i < 0)
+		error(1, errno, "epoll_wait failed");
+
+	if (proto == SOCK_STREAM || proto == SOCK_DCCP) {
+		fd = accept(ev.data.fd, NULL, NULL);
+		if (fd < 0)
+			error(1, errno, "failed to accept");
+		i = recv(fd, buf, sizeof(buf), 0);
+		close(fd);
+	} else {
+		i = recv(ev.data.fd, buf, sizeof(buf), 0);
+	}
+
+	if (i < 0)
+		error(1, errno, "failed to recv");
+
+	return ev.data.fd;
+}
+
+static void test(int *rcv_fds, int count, int family, int proto, int fd)
+{
+	struct epoll_event ev;
+	int epfd, i, send_fd, recv_fd;
+
+	epfd = epoll_create(1);
+	if (epfd < 0)
+		error(1, errno, "failed to create epoll");
+
+	ev.events = EPOLLIN;
+	for (i = 0; i < count; ++i) {
+		ev.data.fd = rcv_fds[i];
+		if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fds[i], &ev))
+			error(1, errno, "failed to register sock epoll");
+	}
+
+	send_fd = connect_and_send(family, proto);
+
+	recv_fd = receive_once(epfd, proto);
+	if (recv_fd != fd)
+		error(1, 0, "received on an unexpected socket");
+
+	close(send_fd);
+	close(epfd);
+}
+
+
+static void run_one_test(int fam_send, int fam_rcv, int proto,
+			 const char *addr_str)
+{
+	/* Below we test that a socket listening on a specific address
+	 * is always selected in preference over a socket listening
+	 * on addr_any. Bugs where this is not the case often result
+	 * in sockets created first or last to get picked. So below
+	 * we make sure that there are always addr_any sockets created
+	 * before and after a specific socket is created.
+	 */
+	int rcv_fds[10], i;
+
+	build_rcv_fd(AF_INET, proto, rcv_fds, 2, NULL);
+	build_rcv_fd(AF_INET6, proto, rcv_fds + 2, 2, NULL);
+	build_rcv_fd(fam_rcv, proto, rcv_fds + 4, 1, addr_str);
+	build_rcv_fd(AF_INET, proto, rcv_fds + 5, 2, NULL);
+	build_rcv_fd(AF_INET6, proto, rcv_fds + 7, 2, NULL);
+	test(rcv_fds, 9, fam_send, proto, rcv_fds[4]);
+	for (i = 0; i < 9; ++i)
+		close(rcv_fds[i]);
+	fprintf(stderr, "pass\n");
+}
+
+static void test_proto(int proto, const char *proto_str)
+{
+	if (proto == SOCK_DCCP) {
+		int test_fd;
+
+		test_fd = socket(AF_INET, proto, 0);
+		if (test_fd < 0) {
+			if (errno == ESOCKTNOSUPPORT) {
+				fprintf(stderr, "DCCP not supported: skipping DCCP tests\n");
+				return;
+			} else
+				error(1, errno, "failed to create a DCCP socket");
+		}
+		close(test_fd);
+	}
+
+	fprintf(stderr, "%s IPv4 ... ", proto_str);
+	run_one_test(AF_INET, AF_INET, proto, IP4_ADDR);
+
+	fprintf(stderr, "%s IPv6 ... ", proto_str);
+	run_one_test(AF_INET6, AF_INET6, proto, IP6_ADDR);
+
+	fprintf(stderr, "%s IPv4 mapped to IPv6 ... ", proto_str);
+	run_one_test(AF_INET, AF_INET6, proto, IP4_MAPPED6);
+}
+
+int main(void)
+{
+	test_proto(SOCK_DGRAM, "UDP");
+	test_proto(SOCK_STREAM, "TCP");
+	test_proto(SOCK_DCCP, "DCCP");
+
+	fprintf(stderr, "SUCCESS\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/net/reuseport_addr_any.sh b/tools/testing/selftests/net/reuseport_addr_any.sh
new file mode 100755
index 000000000000..104592f62ad4
--- /dev/null
+++ b/tools/testing/selftests/net/reuseport_addr_any.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+./in_netns.sh ./reuseport_addr_any
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index e101af52d1d6..78fc593dfe40 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -205,6 +205,8 @@ kci_test_polrouting()
 
 kci_test_route_get()
 {
+	local hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy)
+
 	ret=0
 
 	ip route get 127.0.0.1 > /dev/null
@@ -223,6 +225,19 @@ kci_test_route_get()
 	check_err $?
 	ip route get 10.23.7.11 from 10.23.7.12 iif "$devdummy" > /dev/null
 	check_err $?
+	ip route add 10.23.8.0/24 \
+		nexthop via 10.23.7.13 dev "$devdummy" \
+		nexthop via 10.23.7.14 dev "$devdummy"
+	check_err $?
+	sysctl -wq net.ipv4.fib_multipath_hash_policy=0
+	ip route get 10.23.8.11 > /dev/null
+	check_err $?
+	sysctl -wq net.ipv4.fib_multipath_hash_policy=1
+	ip route get 10.23.8.11 > /dev/null
+	check_err $?
+	sysctl -wq net.ipv4.fib_multipath_hash_policy="$hash_policy"
+	ip route del 10.23.8.0/24
+	check_err $?
 	ip addr del dev "$devdummy" 10.23.7.11/24
 	check_err $?
 
@@ -955,6 +970,111 @@ kci_test_ip6erspan()
 	ip netns del "$testns"
 }
 
+kci_test_fdb_get()
+{
+	IP="ip -netns testns"
+	BRIDGE="bridge -netns testns"
+	brdev="test-br0"
+	vxlandev="vxlan10"
+	test_mac=de:ad:be:ef:13:37
+	localip="10.0.2.2"
+	dstip="10.0.2.3"
+	ret=0
+
+	bridge fdb help 2>&1 |grep -q 'bridge fdb get'
+	if [ $? -ne 0 ];then
+		echo "SKIP: fdb get tests: iproute2 too old"
+		return $ksft_skip
+	fi
+
+	ip netns add testns
+	if [ $? -ne 0 ]; then
+		echo "SKIP fdb get tests: cannot add net namespace $testns"
+		return $ksft_skip
+	fi
+
+	$IP link add "$vxlandev" type vxlan id 10 local $localip \
+                dstport 4789 2>/dev/null
+	check_err $?
+	$IP link add name "$brdev" type bridge &>/dev/null
+	check_err $?
+	$IP link set dev "$vxlandev" master "$brdev" &>/dev/null
+	check_err $?
+	$BRIDGE fdb add $test_mac dev "$vxlandev" master &>/dev/null
+	check_err $?
+	$BRIDGE fdb add $test_mac dev "$vxlandev" dst $dstip self &>/dev/null
+	check_err $?
+
+	$BRIDGE fdb get $test_mac brport "$vxlandev" 2>/dev/null | grep -q "dev $vxlandev master $brdev"
+	check_err $?
+	$BRIDGE fdb get $test_mac br "$brdev" 2>/dev/null | grep -q "dev $vxlandev master $brdev"
+	check_err $?
+	$BRIDGE fdb get $test_mac dev "$vxlandev" self 2>/dev/null | grep -q "dev $vxlandev dst $dstip"
+	check_err $?
+
+	ip netns del testns &>/dev/null
+
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: bridge fdb get"
+		return 1
+	fi
+
+	echo "PASS: bridge fdb get"
+}
+
+kci_test_neigh_get()
+{
+	dstmac=de:ad:be:ef:13:37
+	dstip=10.0.2.4
+	dstip6=dead::2
+	ret=0
+
+	ip neigh help 2>&1 |grep -q 'ip neigh get'
+	if [ $? -ne 0 ];then
+		echo "SKIP: fdb get tests: iproute2 too old"
+		return $ksft_skip
+	fi
+
+	# ipv4
+	ip neigh add $dstip lladdr $dstmac dev "$devdummy"  > /dev/null
+	check_err $?
+	ip neigh get $dstip dev "$devdummy" 2> /dev/null | grep -q "$dstmac"
+	check_err $?
+	ip neigh del $dstip lladdr $dstmac dev "$devdummy"  > /dev/null
+	check_err $?
+
+	# ipv4 proxy
+	ip neigh add proxy $dstip dev "$devdummy" > /dev/null
+	check_err $?
+	ip neigh get proxy $dstip dev "$devdummy" 2>/dev/null | grep -q "$dstip"
+	check_err $?
+	ip neigh del proxy $dstip dev "$devdummy" > /dev/null
+	check_err $?
+
+	# ipv6
+	ip neigh add $dstip6 lladdr $dstmac dev "$devdummy"  > /dev/null
+	check_err $?
+	ip neigh get $dstip6 dev "$devdummy" 2> /dev/null | grep -q "$dstmac"
+	check_err $?
+	ip neigh del $dstip6 lladdr $dstmac dev "$devdummy"  > /dev/null
+	check_err $?
+
+	# ipv6 proxy
+	ip neigh add proxy $dstip6 dev "$devdummy" > /dev/null
+	check_err $?
+	ip neigh get proxy $dstip6 dev "$devdummy" 2>/dev/null | grep -q "$dstip6"
+	check_err $?
+	ip neigh del proxy $dstip6 dev "$devdummy" > /dev/null
+	check_err $?
+
+	if [ $ret -ne 0 ];then
+		echo "FAIL: neigh get"
+		return 1
+	fi
+
+	echo "PASS: neigh get"
+}
+
 kci_test_rtnl()
 {
 	kci_add_dummy
@@ -979,6 +1099,8 @@ kci_test_rtnl()
 	kci_test_macsec
 	kci_test_ipsec
 	kci_test_ipsec_offload
+	kci_test_fdb_get
+	kci_test_neigh_get
 
 	kci_del_dummy
 }
diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests
index bea079edc278..2dc95fda7ef7 100755
--- a/tools/testing/selftests/net/run_afpackettests
+++ b/tools/testing/selftests/net/run_afpackettests
@@ -25,3 +25,13 @@ if [ $? -ne 0 ]; then
 else
 	echo "[PASS]"
 fi
+
+echo "--------------------"
+echo "running txring_overwrite test"
+echo "--------------------"
+./in_netns.sh ./txring_overwrite
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+else
+	echo "[PASS]"
+fi
diff --git a/tools/testing/selftests/net/test_vxlan_under_vrf.sh b/tools/testing/selftests/net/test_vxlan_under_vrf.sh
new file mode 100755
index 000000000000..09f9ed92cbe4
--- /dev/null
+++ b/tools/testing/selftests/net/test_vxlan_under_vrf.sh
@@ -0,0 +1,129 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking VXLAN underlay in a non-default VRF.
+#
+# It simulates two hypervisors running a VM each using four network namespaces:
+# two for the HVs, two for the VMs.
+# A small VXLAN tunnel is made between the two hypervisors to have the two vms
+# in the same virtual L2:
+#
+# +-------------------+                                    +-------------------+
+# |                   |                                    |                   |
+# |    vm-1 netns     |                                    |    vm-2 netns     |
+# |                   |                                    |                   |
+# |  +-------------+  |                                    |  +-------------+  |
+# |  |   veth-hv   |  |                                    |  |   veth-hv   |  |
+# |  | 10.0.0.1/24 |  |                                    |  | 10.0.0.2/24 |  |
+# |  +-------------+  |                                    |  +-------------+  |
+# |        .          |                                    |         .         |
+# +-------------------+                                    +-------------------+
+#          .                                                         .
+#          .                                                         .
+#          .                                                         .
+# +-----------------------------------+   +------------------------------------+
+# |        .                          |   |                          .         |
+# |  +----------+                     |   |                     +----------+   |
+# |  | veth-tap |                     |   |                     | veth-tap |   |
+# |  +----+-----+                     |   |                     +----+-----+   |
+# |       |                           |   |                          |         |
+# |    +--+--+      +--------------+  |   |  +--------------+     +--+--+      |
+# |    | br0 |      | vrf-underlay |  |   |  | vrf-underlay |     | br0 |      |
+# |    +--+--+      +-------+------+  |   |  +------+-------+     +--+--+      |
+# |       |                 |         |   |         |                |         |
+# |   +---+----+    +-------+-------+ |   | +-------+-------+    +---+----+    |
+# |   | vxlan0 |....|     veth0     |.|...|.|     veth0     |....| vxlan0 |    |
+# |   +--------+    | 172.16.0.1/24 | |   | | 172.16.0.2/24 |    +--------+    |
+# |                 +---------------+ |   | +---------------+                  |
+# |                                   |   |                                    |
+# |             hv-1 netns            |   |           hv-2 netns               |
+# |                                   |   |                                    |
+# +-----------------------------------+   +------------------------------------+
+#
+# This tests both the connectivity between vm-1 and vm-2, and that the underlay
+# can be moved in and out of the vrf by unsetting and setting veth0's master.
+
+set -e
+
+cleanup() {
+    ip link del veth-hv-1 2>/dev/null || true
+    ip link del veth-tap 2>/dev/null || true
+
+    for ns in hv-1 hv-2 vm-1 vm-2; do
+        ip netns del $ns || true
+    done
+}
+
+# Clean start
+cleanup &> /dev/null
+
+[[ $1 == "clean" ]] && exit 0
+
+trap cleanup EXIT
+
+# Setup "Hypervisors" simulated with netns
+ip link add veth-hv-1 type veth peer name veth-hv-2
+setup-hv-networking() {
+    hv=$1
+
+    ip netns add hv-$hv
+    ip link set veth-hv-$hv netns hv-$hv
+    ip -netns hv-$hv link set veth-hv-$hv name veth0
+
+    ip -netns hv-$hv link add vrf-underlay type vrf table 1
+    ip -netns hv-$hv link set vrf-underlay up
+    ip -netns hv-$hv addr add 172.16.0.$hv/24 dev veth0
+    ip -netns hv-$hv link set veth0 up
+
+    ip -netns hv-$hv link add br0 type bridge
+    ip -netns hv-$hv link set br0 up
+
+    ip -netns hv-$hv link add vxlan0 type vxlan id 10 local 172.16.0.$hv dev veth0 dstport 4789
+    ip -netns hv-$hv link set vxlan0 master br0
+    ip -netns hv-$hv link set vxlan0 up
+}
+setup-hv-networking 1
+setup-hv-networking 2
+
+# Check connectivity between HVs by pinging hv-2 from hv-1
+echo -n "Checking HV connectivity                                           "
+ip netns exec hv-1 ping -c 1 -W 1 172.16.0.2 &> /dev/null || (echo "[FAIL]"; false)
+echo "[ OK ]"
+
+# Setups a "VM" simulated by a netns an a veth pair
+setup-vm() {
+    id=$1
+
+    ip netns add vm-$id
+    ip link add veth-tap type veth peer name veth-hv
+
+    ip link set veth-tap netns hv-$id
+    ip -netns hv-$id link set veth-tap master br0
+    ip -netns hv-$id link set veth-tap up
+
+    ip link set veth-hv netns vm-$id
+    ip -netns vm-$id addr add 10.0.0.$id/24 dev veth-hv
+    ip -netns vm-$id link set veth-hv up
+}
+setup-vm 1
+setup-vm 2
+
+# Setup VTEP routes to make ARP work
+bridge -netns hv-1 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.2 self permanent
+bridge -netns hv-2 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.1 self permanent
+
+echo -n "Check VM connectivity through VXLAN (underlay in the default VRF)  "
+ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false)
+echo "[ OK ]"
+
+# Move the underlay to a non-default VRF
+ip -netns hv-1 link set veth0 vrf vrf-underlay
+ip -netns hv-1 link set veth0 down
+ip -netns hv-1 link set veth0 up
+ip -netns hv-2 link set veth0 vrf vrf-underlay
+ip -netns hv-2 link set veth0 down
+ip -netns hv-2 link set veth0 up
+
+echo -n "Check VM connectivity through VXLAN (underlay in a VRF)            "
+ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false)
+echo "[ OK ]"
diff --git a/tools/testing/selftests/net/txring_overwrite.c b/tools/testing/selftests/net/txring_overwrite.c
new file mode 100644
index 000000000000..fd8b1c663c39
--- /dev/null
+++ b/tools/testing/selftests/net/txring_overwrite.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Verify that consecutive sends over packet tx_ring are mirrored
+ * with their original content intact.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <assert.h>
+#include <error.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/filter.h>
+#include <linux/if_packet.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <pthread.h>
+#include <sched.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+const int eth_off = TPACKET_HDRLEN - sizeof(struct sockaddr_ll);
+const int cfg_frame_size = 1000;
+
+static void build_packet(void *buffer, size_t blen, char payload_char)
+{
+	struct udphdr *udph;
+	struct ethhdr *eth;
+	struct iphdr *iph;
+	size_t off = 0;
+
+	memset(buffer, 0, blen);
+
+	eth = buffer;
+	eth->h_proto = htons(ETH_P_IP);
+
+	off += sizeof(*eth);
+	iph = buffer + off;
+	iph->ttl	= 8;
+	iph->ihl	= 5;
+	iph->version	= 4;
+	iph->saddr	= htonl(INADDR_LOOPBACK);
+	iph->daddr	= htonl(INADDR_LOOPBACK + 1);
+	iph->protocol	= IPPROTO_UDP;
+	iph->tot_len	= htons(blen - off);
+	iph->check	= 0;
+
+	off += sizeof(*iph);
+	udph = buffer + off;
+	udph->dest	= htons(8000);
+	udph->source	= htons(8001);
+	udph->len	= htons(blen - off);
+	udph->check	= 0;
+
+	off += sizeof(*udph);
+	memset(buffer + off, payload_char, blen - off);
+}
+
+static int setup_rx(void)
+{
+	int fdr;
+
+	fdr = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_IP));
+	if (fdr == -1)
+		error(1, errno, "socket r");
+
+	return fdr;
+}
+
+static int setup_tx(char **ring)
+{
+	struct sockaddr_ll laddr = {};
+	struct tpacket_req req = {};
+	int fdt;
+
+	fdt = socket(PF_PACKET, SOCK_RAW, 0);
+	if (fdt == -1)
+		error(1, errno, "socket t");
+
+	laddr.sll_family = AF_PACKET;
+	laddr.sll_protocol = htons(0);
+	laddr.sll_ifindex = if_nametoindex("lo");
+	if (!laddr.sll_ifindex)
+		error(1, errno, "if_nametoindex");
+
+	if (bind(fdt, (void *)&laddr, sizeof(laddr)))
+		error(1, errno, "bind fdt");
+
+	req.tp_block_size = getpagesize();
+	req.tp_block_nr   = 1;
+	req.tp_frame_size = getpagesize();
+	req.tp_frame_nr   = 1;
+
+	if (setsockopt(fdt, SOL_PACKET, PACKET_TX_RING,
+		       (void *)&req, sizeof(req)))
+		error(1, errno, "setsockopt ring");
+
+	*ring = mmap(0, req.tp_block_size * req.tp_block_nr,
+		     PROT_READ | PROT_WRITE, MAP_SHARED, fdt, 0);
+	if (!*ring)
+		error(1, errno, "mmap");
+
+	return fdt;
+}
+
+static void send_pkt(int fdt, void *slot, char payload_char)
+{
+	struct tpacket_hdr *header = slot;
+	int ret;
+
+	while (header->tp_status != TP_STATUS_AVAILABLE)
+		usleep(1000);
+
+	build_packet(slot + eth_off, cfg_frame_size, payload_char);
+
+	header->tp_len = cfg_frame_size;
+	header->tp_status = TP_STATUS_SEND_REQUEST;
+
+	ret = sendto(fdt, NULL, 0, 0, NULL, 0);
+	if (ret == -1)
+		error(1, errno, "kick tx");
+}
+
+static int read_verify_pkt(int fdr, char payload_char)
+{
+	char buf[100];
+	int ret;
+
+	ret = read(fdr, buf, sizeof(buf));
+	if (ret != sizeof(buf))
+		error(1, errno, "read");
+
+	if (buf[60] != payload_char) {
+		printf("wrong pattern: 0x%x != 0x%x\n", buf[60], payload_char);
+		return 1;
+	}
+
+	printf("read: %c (0x%x)\n", buf[60], buf[60]);
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	const char payload_patterns[] = "ab";
+	char *ring;
+	int fdr, fdt, ret = 0;
+
+	fdr = setup_rx();
+	fdt = setup_tx(&ring);
+
+	send_pkt(fdt, ring, payload_patterns[0]);
+	send_pkt(fdt, ring, payload_patterns[1]);
+
+	ret |= read_verify_pkt(fdr, payload_patterns[0]);
+	ret |= read_verify_pkt(fdr, payload_patterns[1]);
+
+	if (close(fdt))
+		error(1, errno, "close t");
+	if (close(fdr))
+		error(1, errno, "close r");
+
+	return ret;
+}
diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh
new file mode 100755
index 000000000000..aeac53a99aeb
--- /dev/null
+++ b/tools/testing/selftests/net/udpgro.sh
@@ -0,0 +1,182 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of udpgro functional tests.
+
+readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
+
+cleanup() {
+	local -r jobs="$(jobs -p)"
+	local -r ns="$(ip netns list|grep $PEER_NS)"
+
+	[ -n "${jobs}" ] && kill -1 ${jobs} 2>/dev/null
+	[ -n "$ns" ] && ip netns del $ns 2>/dev/null
+}
+trap cleanup EXIT
+
+cfg_veth() {
+	ip netns add "${PEER_NS}"
+	ip -netns "${PEER_NS}" link set lo up
+	ip link add type veth
+	ip link set dev veth0 up
+	ip addr add dev veth0 192.168.1.2/24
+	ip addr add dev veth0 2001:db8::2/64 nodad
+
+	ip link set dev veth1 netns "${PEER_NS}"
+	ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24
+	ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad
+	ip -netns "${PEER_NS}" link set dev veth1 up
+	ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy
+}
+
+run_one() {
+	# use 'rx' as separator between sender args and receiver args
+	local -r all="$@"
+	local -r tx_args=${all%rx*}
+	local -r rx_args=${all#*rx}
+
+	cfg_veth
+
+	ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} && \
+		echo "ok" || \
+		echo "failed" &
+
+	# Hack: let bg programs complete the startup
+	sleep 0.1
+	./udpgso_bench_tx ${tx_args}
+	wait $(jobs -p)
+}
+
+run_test() {
+	local -r args=$@
+
+	printf " %-40s" "$1"
+	./in_netns.sh $0 __subprocess $2 rx -G -r $3
+}
+
+run_one_nat() {
+	# use 'rx' as separator between sender args and receiver args
+	local addr1 addr2 pid family="" ipt_cmd=ip6tables
+	local -r all="$@"
+	local -r tx_args=${all%rx*}
+	local -r rx_args=${all#*rx}
+
+	if [[ ${tx_args} = *-4* ]]; then
+		ipt_cmd=iptables
+		family=-4
+		addr1=192.168.1.1
+		addr2=192.168.1.3/24
+	else
+		addr1=2001:db8::1
+		addr2="2001:db8::3/64 nodad"
+	fi
+
+	cfg_veth
+	ip -netns "${PEER_NS}" addr add dev veth1 ${addr2}
+
+	# fool the GRO engine changing the destination address ...
+	ip netns exec "${PEER_NS}" $ipt_cmd -t nat -I PREROUTING -d ${addr1} -j DNAT --to-destination ${addr2%/*}
+
+	# ... so that GRO will match the UDP_GRO enabled socket, but packets
+	# will land on the 'plain' one
+	ip netns exec "${PEER_NS}" ./udpgso_bench_rx -G ${family} -b ${addr1} -n 0 &
+	pid=$!
+	ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${family} -b ${addr2%/*} ${rx_args} && \
+		echo "ok" || \
+		echo "failed"&
+
+	sleep 0.1
+	./udpgso_bench_tx ${tx_args}
+	kill -INT $pid
+	wait $(jobs -p)
+}
+
+run_one_2sock() {
+	# use 'rx' as separator between sender args and receiver args
+	local -r all="$@"
+	local -r tx_args=${all%rx*}
+	local -r rx_args=${all#*rx}
+
+	cfg_veth
+
+	ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -p 12345 &
+	ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} && \
+		echo "ok" || \
+		echo "failed" &
+
+	# Hack: let bg programs complete the startup
+	sleep 0.1
+	./udpgso_bench_tx ${tx_args} -p 12345
+	sleep 0.1
+	# first UDP GSO socket should be closed at this point
+	./udpgso_bench_tx ${tx_args}
+	wait $(jobs -p)
+}
+
+run_nat_test() {
+	local -r args=$@
+
+	printf " %-40s" "$1"
+	./in_netns.sh $0 __subprocess_nat $2 rx -r $3
+}
+
+run_2sock_test() {
+	local -r args=$@
+
+	printf " %-40s" "$1"
+	./in_netns.sh $0 __subprocess_2sock $2 rx -G -r $3
+}
+
+run_all() {
+	local -r core_args="-l 4"
+	local -r ipv4_args="${core_args} -4 -D 192.168.1.1"
+	local -r ipv6_args="${core_args} -6 -D 2001:db8::1"
+
+	echo "ipv4"
+	run_test "no GRO" "${ipv4_args} -M 10 -s 1400" "-4 -n 10 -l 1400"
+
+	# explicitly check we are not receiving UDP_SEGMENT cmsg (-S -1)
+	# when GRO does not take place
+	run_test "no GRO chk cmsg" "${ipv4_args} -M 10 -s 1400" "-4 -n 10 -l 1400 -S -1"
+
+	# the GSO packets are aggregated because:
+	# * veth schedule napi after each xmit
+	# * segmentation happens in BH context, veth napi poll is delayed after
+	#   the transmission of the last segment
+	run_test "GRO" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720"
+	run_test "GRO chk cmsg" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720 -S 1472"
+	run_test "GRO with custom segment size" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720"
+	run_test "GRO with custom segment size cmsg" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720 -S 500"
+
+	run_nat_test "bad GRO lookup" "${ipv4_args} -M 1 -s 14720 -S 0" "-n 10 -l 1472"
+	run_2sock_test "multiple GRO socks" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720 -S 1472"
+
+	echo "ipv6"
+	run_test "no GRO" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400"
+	run_test "no GRO chk cmsg" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400 -S -1"
+	run_test "GRO" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 1 -l 14520"
+	run_test "GRO chk cmsg" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 1 -l 14520 -S 1452"
+	run_test "GRO with custom segment size" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520"
+	run_test "GRO with custom segment size cmsg" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520 -S 500"
+
+	run_nat_test "bad GRO lookup" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 10 -l 1452"
+	run_2sock_test "multiple GRO socks" "${ipv6_args} -M 1 -s 14520 -S 0 " "-n 1 -l 14520 -S 1452"
+}
+
+if [ ! -f ../bpf/xdp_dummy.o ]; then
+	echo "Missing xdp_dummy helper. Build bpf selftest first"
+	exit -1
+fi
+
+if [[ $# -eq 0 ]]; then
+	run_all
+elif [[ $1 == "__subprocess" ]]; then
+	shift
+	run_one $@
+elif [[ $1 == "__subprocess_nat" ]]; then
+	shift
+	run_one_nat $@
+elif [[ $1 == "__subprocess_2sock" ]]; then
+	shift
+	run_one_2sock $@
+fi
diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh
new file mode 100755
index 000000000000..820bc50f6b68
--- /dev/null
+++ b/tools/testing/selftests/net/udpgro_bench.sh
@@ -0,0 +1,95 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of udpgro benchmarks
+
+readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
+
+cleanup() {
+	local -r jobs="$(jobs -p)"
+	local -r ns="$(ip netns list|grep $PEER_NS)"
+
+	[ -n "${jobs}" ] && kill -INT ${jobs} 2>/dev/null
+	[ -n "$ns" ] && ip netns del $ns 2>/dev/null
+}
+trap cleanup EXIT
+
+run_one() {
+	# use 'rx' as separator between sender args and receiver args
+	local -r all="$@"
+	local -r tx_args=${all%rx*}
+	local rx_args=${all#*rx}
+
+	[[ "${tx_args}" == *"-4"* ]] && rx_args="${rx_args} -4"
+
+	ip netns add "${PEER_NS}"
+	ip -netns "${PEER_NS}" link set lo up
+	ip link add type veth
+	ip link set dev veth0 up
+	ip addr add dev veth0 192.168.1.2/24
+	ip addr add dev veth0 2001:db8::2/64 nodad
+
+	ip link set dev veth1 netns "${PEER_NS}"
+	ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24
+	ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad
+	ip -netns "${PEER_NS}" link set dev veth1 up
+
+	ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy
+	ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r &
+	ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r &
+
+	# Hack: let bg programs complete the startup
+	sleep 0.1
+	./udpgso_bench_tx ${tx_args}
+}
+
+run_in_netns() {
+	local -r args=$@
+
+	./in_netns.sh $0 __subprocess ${args}
+}
+
+run_udp() {
+	local -r args=$@
+
+	echo "udp gso - over veth touching data"
+	run_in_netns ${args} -S 0 rx
+
+	echo "udp gso and gro - over veth touching data"
+	run_in_netns ${args} -S 0 rx -G
+}
+
+run_tcp() {
+	local -r args=$@
+
+	echo "tcp - over veth touching data"
+	run_in_netns ${args} -t rx
+}
+
+run_all() {
+	local -r core_args="-l 4"
+	local -r ipv4_args="${core_args} -4 -D 192.168.1.1"
+	local -r ipv6_args="${core_args} -6 -D 2001:db8::1"
+
+	echo "ipv4"
+	run_tcp "${ipv4_args}"
+	run_udp "${ipv4_args}"
+
+	echo "ipv6"
+	run_tcp "${ipv4_args}"
+	run_udp "${ipv6_args}"
+}
+
+if [ ! -f ../bpf/xdp_dummy.o ]; then
+	echo "Missing xdp_dummy helper. Build bpf selftest first"
+	exit -1
+fi
+
+if [[ $# -eq 0 ]]; then
+	run_all
+elif [[ $1 == "__subprocess" ]]; then
+	shift
+	run_one $@
+else
+	run_in_netns $@
+fi
diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh
index 99e537ab5ad9..5670a9ffd8eb 100755
--- a/tools/testing/selftests/net/udpgso_bench.sh
+++ b/tools/testing/selftests/net/udpgso_bench.sh
@@ -34,7 +34,10 @@ run_udp() {
 	run_in_netns ${args}
 
 	echo "udp gso"
-	run_in_netns ${args} -S
+	run_in_netns ${args} -S 0
+
+	echo "udp gso zerocopy"
+	run_in_netns ${args} -S 0 -z
 }
 
 run_tcp() {
diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c
index 727cf67a3f75..0c960f673324 100644
--- a/tools/testing/selftests/net/udpgso_bench_rx.c
+++ b/tools/testing/selftests/net/udpgso_bench_rx.c
@@ -31,9 +31,21 @@
 #include <sys/wait.h>
 #include <unistd.h>
 
+#ifndef UDP_GRO
+#define UDP_GRO		104
+#endif
+
 static int  cfg_port		= 8000;
 static bool cfg_tcp;
 static bool cfg_verify;
+static bool cfg_read_all;
+static bool cfg_gro_segment;
+static int  cfg_family		= PF_INET6;
+static int  cfg_alen 		= sizeof(struct sockaddr_in6);
+static int  cfg_expected_pkt_nr;
+static int  cfg_expected_pkt_len;
+static int  cfg_expected_gso_size;
+static struct sockaddr_storage cfg_bind_addr;
 
 static bool interrupted;
 static unsigned long packets, bytes;
@@ -44,6 +56,29 @@ static void sigint_handler(int signum)
 		interrupted = true;
 }
 
+static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr)
+{
+	struct sockaddr_in6 *addr6 = (void *) sockaddr;
+	struct sockaddr_in *addr4 = (void *) sockaddr;
+
+	switch (domain) {
+	case PF_INET:
+		addr4->sin_family = AF_INET;
+		addr4->sin_port = htons(cfg_port);
+		if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+			error(1, 0, "ipv4 parse error: %s", str_addr);
+		break;
+	case PF_INET6:
+		addr6->sin6_family = AF_INET6;
+		addr6->sin6_port = htons(cfg_port);
+		if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+			error(1, 0, "ipv6 parse error: %s", str_addr);
+		break;
+	default:
+		error(1, 0, "illegal domain");
+	}
+}
+
 static unsigned long gettimeofday_ms(void)
 {
 	struct timeval tv;
@@ -63,6 +98,8 @@ static void do_poll(int fd)
 
 	do {
 		ret = poll(&pfd, 1, 10);
+		if (interrupted)
+			break;
 		if (ret == -1)
 			error(1, errno, "poll");
 		if (ret == 0)
@@ -70,15 +107,14 @@ static void do_poll(int fd)
 		if (pfd.revents != POLLIN)
 			error(1, errno, "poll: 0x%x expected 0x%x\n",
 					pfd.revents, POLLIN);
-	} while (!ret && !interrupted);
+	} while (!ret);
 }
 
 static int do_socket(bool do_tcp)
 {
-	struct sockaddr_in6 addr = {0};
 	int fd, val;
 
-	fd = socket(PF_INET6, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0);
+	fd = socket(cfg_family, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0);
 	if (fd == -1)
 		error(1, errno, "socket");
 
@@ -89,10 +125,7 @@ static int do_socket(bool do_tcp)
 	if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)))
 		error(1, errno, "setsockopt reuseport");
 
-	addr.sin6_family =	PF_INET6;
-	addr.sin6_port =	htons(cfg_port);
-	addr.sin6_addr =	in6addr_any;
-	if (bind(fd, (void *) &addr, sizeof(addr)))
+	if (bind(fd, (void *)&cfg_bind_addr, cfg_alen))
 		error(1, errno, "bind");
 
 	if (do_tcp) {
@@ -102,6 +135,8 @@ static int do_socket(bool do_tcp)
 			error(1, errno, "listen");
 
 		do_poll(accept_fd);
+		if (interrupted)
+			exit(0);
 
 		fd = accept(accept_fd, NULL, NULL);
 		if (fd == -1)
@@ -164,51 +199,123 @@ static void do_verify_udp(const char *data, int len)
 	}
 }
 
+static int recv_msg(int fd, char *buf, int len, int *gso_size)
+{
+	char control[CMSG_SPACE(sizeof(uint16_t))] = {0};
+	struct msghdr msg = {0};
+	struct iovec iov = {0};
+	struct cmsghdr *cmsg;
+	uint16_t *gsosizeptr;
+	int ret;
+
+	iov.iov_base = buf;
+	iov.iov_len = len;
+
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+
+	msg.msg_control = control;
+	msg.msg_controllen = sizeof(control);
+
+	*gso_size = -1;
+	ret = recvmsg(fd, &msg, MSG_TRUNC | MSG_DONTWAIT);
+	if (ret != -1) {
+		for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
+		     cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+			if (cmsg->cmsg_level == SOL_UDP
+			    && cmsg->cmsg_type == UDP_GRO) {
+				gsosizeptr = (uint16_t *) CMSG_DATA(cmsg);
+				*gso_size = *gsosizeptr;
+				break;
+			}
+		}
+	}
+	return ret;
+}
+
 /* Flush all outstanding datagrams. Verify first few bytes of each. */
 static void do_flush_udp(int fd)
 {
-	static char rbuf[ETH_DATA_LEN];
-	int ret, len, budget = 256;
+	static char rbuf[ETH_MAX_MTU];
+	int ret, len, gso_size, budget = 256;
 
-	len = cfg_verify ? sizeof(rbuf) : 0;
+	len = cfg_read_all ? sizeof(rbuf) : 0;
 	while (budget--) {
 		/* MSG_TRUNC will make return value full datagram length */
-		ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT);
+		if (!cfg_expected_gso_size)
+			ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT);
+		else
+			ret = recv_msg(fd, rbuf, len, &gso_size);
 		if (ret == -1 && errno == EAGAIN)
-			return;
+			break;
 		if (ret == -1)
 			error(1, errno, "recv");
-		if (len) {
+		if (cfg_expected_pkt_len && ret != cfg_expected_pkt_len)
+			error(1, 0, "recv: bad packet len, got %d,"
+			      " expected %d\n", ret, cfg_expected_pkt_len);
+		if (len && cfg_verify) {
 			if (ret == 0)
 				error(1, errno, "recv: 0 byte datagram\n");
 
 			do_verify_udp(rbuf, ret);
 		}
+		if (cfg_expected_gso_size && cfg_expected_gso_size != gso_size)
+			error(1, 0, "recv: bad gso size, got %d, expected %d "
+			      "(-1 == no gso cmsg))\n", gso_size,
+			      cfg_expected_gso_size);
 
 		packets++;
 		bytes += ret;
+		if (cfg_expected_pkt_nr && packets >= cfg_expected_pkt_nr)
+			break;
 	}
 }
 
 static void usage(const char *filepath)
 {
-	error(1, 0, "Usage: %s [-tv] [-p port]", filepath);
+	error(1, 0, "Usage: %s [-Grtv] [-b addr] [-p port] [-l pktlen] [-n packetnr] [-S gsosize]", filepath);
 }
 
 static void parse_opts(int argc, char **argv)
 {
 	int c;
 
-	while ((c = getopt(argc, argv, "ptv")) != -1) {
+	/* bind to any by default */
+	setup_sockaddr(PF_INET6, "::", &cfg_bind_addr);
+	while ((c = getopt(argc, argv, "4b:Gl:n:p:rS:tv")) != -1) {
 		switch (c) {
+		case '4':
+			cfg_family = PF_INET;
+			cfg_alen = sizeof(struct sockaddr_in);
+			setup_sockaddr(PF_INET, "0.0.0.0", &cfg_bind_addr);
+			break;
+		case 'b':
+			setup_sockaddr(cfg_family, optarg, &cfg_bind_addr);
+			break;
+		case 'G':
+			cfg_gro_segment = true;
+			break;
+		case 'l':
+			cfg_expected_pkt_len = strtoul(optarg, NULL, 0);
+			break;
+		case 'n':
+			cfg_expected_pkt_nr = strtoul(optarg, NULL, 0);
+			break;
 		case 'p':
-			cfg_port = htons(strtoul(optarg, NULL, 0));
+			cfg_port = strtoul(optarg, NULL, 0);
+			break;
+		case 'r':
+			cfg_read_all = true;
+			break;
+		case 'S':
+			cfg_expected_gso_size = strtol(optarg, NULL, 0);
 			break;
 		case 't':
 			cfg_tcp = true;
 			break;
 		case 'v':
 			cfg_verify = true;
+			cfg_read_all = true;
 			break;
 		}
 	}
@@ -223,12 +330,23 @@ static void parse_opts(int argc, char **argv)
 static void do_recv(void)
 {
 	unsigned long tnow, treport;
-	int fd;
+	int fd, loop = 0;
 
 	fd = do_socket(cfg_tcp);
 
+	if (cfg_gro_segment && !cfg_tcp) {
+		int val = 1;
+		if (setsockopt(fd, IPPROTO_UDP, UDP_GRO, &val, sizeof(val)))
+			error(1, errno, "setsockopt UDP_GRO");
+	}
+
 	treport = gettimeofday_ms() + 1000;
 	do {
+		/* force termination after the second poll(); this cope both
+		 * with sender slower than receiver and missing packet errors
+		 */
+		if (cfg_expected_pkt_nr && loop++)
+			interrupted = true;
 		do_poll(fd);
 
 		if (cfg_tcp)
@@ -249,6 +367,10 @@ static void do_recv(void)
 
 	} while (!interrupted);
 
+	if (cfg_expected_pkt_nr && (packets != cfg_expected_pkt_nr))
+		error(1, 0, "wrong packet number! got %ld, expected %d\n",
+		      packets, cfg_expected_pkt_nr);
+
 	if (close(fd))
 		error(1, errno, "close");
 }
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
index e821564053cf..4074538b5df5 100644
--- a/tools/testing/selftests/net/udpgso_bench_tx.c
+++ b/tools/testing/selftests/net/udpgso_bench_tx.c
@@ -52,6 +52,8 @@ static bool	cfg_segment;
 static bool	cfg_sendmmsg;
 static bool	cfg_tcp;
 static bool	cfg_zerocopy;
+static int	cfg_msg_nr;
+static uint16_t	cfg_gso_size;
 
 static socklen_t cfg_alen;
 static struct sockaddr_storage cfg_dst_addr;
@@ -205,14 +207,14 @@ static void send_udp_segment_cmsg(struct cmsghdr *cm)
 
 	cm->cmsg_level = SOL_UDP;
 	cm->cmsg_type = UDP_SEGMENT;
-	cm->cmsg_len = CMSG_LEN(sizeof(cfg_mss));
+	cm->cmsg_len = CMSG_LEN(sizeof(cfg_gso_size));
 	valp = (void *)CMSG_DATA(cm);
-	*valp = cfg_mss;
+	*valp = cfg_gso_size;
 }
 
 static int send_udp_segment(int fd, char *data)
 {
-	char control[CMSG_SPACE(sizeof(cfg_mss))] = {0};
+	char control[CMSG_SPACE(sizeof(cfg_gso_size))] = {0};
 	struct msghdr msg = {0};
 	struct iovec iov = {0};
 	int ret;
@@ -241,7 +243,7 @@ static int send_udp_segment(int fd, char *data)
 
 static void usage(const char *filepath)
 {
-	error(1, 0, "Usage: %s [-46cmStuz] [-C cpu] [-D dst ip] [-l secs] [-p port] [-s sendsize]",
+	error(1, 0, "Usage: %s [-46cmtuz] [-C cpu] [-D dst ip] [-l secs] [-m messagenr] [-p port] [-s sendsize] [-S gsosize]",
 		    filepath);
 }
 
@@ -250,7 +252,7 @@ static void parse_opts(int argc, char **argv)
 	int max_len, hdrlen;
 	int c;
 
-	while ((c = getopt(argc, argv, "46cC:D:l:mp:s:Stuz")) != -1) {
+	while ((c = getopt(argc, argv, "46cC:D:l:mM:p:s:S:tuz")) != -1) {
 		switch (c) {
 		case '4':
 			if (cfg_family != PF_UNSPEC)
@@ -279,6 +281,9 @@ static void parse_opts(int argc, char **argv)
 		case 'm':
 			cfg_sendmmsg = true;
 			break;
+		case 'M':
+			cfg_msg_nr = strtoul(optarg, NULL, 10);
+			break;
 		case 'p':
 			cfg_port = strtoul(optarg, NULL, 0);
 			break;
@@ -286,6 +291,7 @@ static void parse_opts(int argc, char **argv)
 			cfg_payload_len = strtoul(optarg, NULL, 0);
 			break;
 		case 'S':
+			cfg_gso_size = strtoul(optarg, NULL, 0);
 			cfg_segment = true;
 			break;
 		case 't':
@@ -317,6 +323,8 @@ static void parse_opts(int argc, char **argv)
 
 	cfg_mss = ETH_DATA_LEN - hdrlen;
 	max_len = ETH_MAX_MTU - hdrlen;
+	if (!cfg_gso_size)
+		cfg_gso_size = cfg_mss;
 
 	if (cfg_payload_len > max_len)
 		error(1, 0, "payload length %u exceeds max %u",
@@ -392,10 +400,12 @@ int main(int argc, char **argv)
 		else
 			num_sends += send_udp(fd, buf[i]);
 		num_msgs++;
-
 		if (cfg_zerocopy && ((num_msgs & 0xF) == 0))
 			flush_zerocopy(fd);
 
+		if (cfg_msg_nr && num_msgs >= cfg_msg_nr)
+			break;
+
 		tnow = gettimeofday_ms();
 		if (tnow > treport) {
 			fprintf(stderr,
diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh
new file mode 100755
index 000000000000..8db35b99457c
--- /dev/null
+++ b/tools/testing/selftests/net/xfrm_policy.sh
@@ -0,0 +1,302 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Check xfrm policy resolution.  Topology:
+#
+# 1.2   1.1   3.1  3.10    2.1   2.2
+# eth1  eth1 veth0 veth0 eth1   eth1
+# ns1 ---- ns3 ----- ns4 ---- ns2
+#
+# ns3 and ns4 are connected via ipsec tunnel.
+# pings from ns1 to ns2 (and vice versa) are supposed to work like this:
+# ns1: ping 10.0.2.2: passes via ipsec tunnel.
+# ns2: ping 10.0.1.2: passes via ipsec tunnel.
+
+# ns1: ping 10.0.1.253: passes via ipsec tunnel (direct policy)
+# ns2: ping 10.0.2.253: passes via ipsec tunnel (direct policy)
+#
+# ns1: ping 10.0.2.254: does NOT pass via ipsec tunnel (exception)
+# ns2: ping 10.0.1.254: does NOT pass via ipsec tunnel (exception)
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+policy_checks_ok=1
+
+KEY_SHA=0xdeadbeef1234567890abcdefabcdefabcdefabcd
+KEY_AES=0x0123456789abcdef0123456789012345
+SPI1=0x1
+SPI2=0x2
+
+do_esp() {
+    local ns=$1
+    local me=$2
+    local remote=$3
+    local lnet=$4
+    local rnet=$5
+    local spi_out=$6
+    local spi_in=$7
+
+    ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in  enc aes $KEY_AES  auth sha1 $KEY_SHA  mode tunnel sel src $rnet dst $lnet
+    ip -net $ns xfrm state add src $me  dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet
+
+    # to encrypt packets as they go out (includes forwarded packets that need encapsulation)
+    ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 100 action allow
+    # to fwd decrypted packets after esp processing:
+    ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 100 action allow
+}
+
+do_esp_policy_get_check() {
+    local ns=$1
+    local lnet=$2
+    local rnet=$3
+
+    ip -net $ns xfrm policy get src $lnet dst $rnet dir out > /dev/null
+    if [ $? -ne 0 ] && [ $policy_checks_ok -eq 1 ] ;then
+        policy_checks_ok=0
+        echo "FAIL: ip -net $ns xfrm policy get src $lnet dst $rnet dir out"
+        ret=1
+    fi
+
+    ip -net $ns xfrm policy get src $rnet dst $lnet dir fwd > /dev/null
+    if [ $? -ne 0 ] && [ $policy_checks_ok -eq 1 ] ;then
+        policy_checks_ok=0
+        echo "FAIL: ip -net $ns xfrm policy get src $rnet dst $lnet dir fwd"
+        ret=1
+    fi
+}
+
+do_exception() {
+    local ns=$1
+    local me=$2
+    local remote=$3
+    local encryptip=$4
+    local plain=$5
+
+    # network $plain passes without tunnel
+    ip -net $ns xfrm policy add dst $plain dir out priority 10 action allow
+
+    # direct policy for $encryptip, use tunnel, higher prio takes precedence
+    ip -net $ns xfrm policy add dst $encryptip dir out tmpl src $me dst $remote proto esp mode tunnel priority 1 action allow
+}
+
+# policies that are not supposed to match any packets generated in this test.
+do_dummies4() {
+    local ns=$1
+
+    for i in $(seq 10 16);do
+      # dummy policy with wildcard src/dst.
+      echo netns exec $ns ip xfrm policy add src 0.0.0.0/0 dst 10.$i.99.0/30 dir out action block
+      echo netns exec $ns ip xfrm policy add src 10.$i.99.0/30 dst 0.0.0.0/0 dir out action block
+      for j in $(seq 32 64);do
+        echo netns exec $ns ip xfrm policy add src 10.$i.1.0/30 dst 10.$i.$j.0/30 dir out action block
+        # silly, as it encompasses the one above too, but its allowed:
+        echo netns exec $ns ip xfrm policy add src 10.$i.1.0/29 dst 10.$i.$j.0/29 dir out action block
+        # and yet again, even more broad one.
+        echo netns exec $ns ip xfrm policy add src 10.$i.1.0/24 dst 10.$i.$j.0/24 dir out action block
+        echo netns exec $ns ip xfrm policy add src 10.$i.$j.0/24 dst 10.$i.1.0/24 dir fwd action block
+      done
+    done | ip -batch /dev/stdin
+}
+
+do_dummies6() {
+    local ns=$1
+
+    for i in $(seq 10 16);do
+      for j in $(seq 32 64);do
+       echo netns exec $ns ip xfrm policy add src dead:$i::/64 dst dead:$i:$j::/64 dir out action block
+       echo netns exec $ns ip xfrm policy add src dead:$i:$j::/64 dst dead:$i::/24 dir fwd action block
+      done
+    done | ip -batch /dev/stdin
+}
+
+check_ipt_policy_count()
+{
+	ns=$1
+
+	ip netns exec $ns iptables-save -c |grep policy | ( read c rest
+		ip netns exec $ns iptables -Z
+		if [ x"$c" = x'[0:0]' ]; then
+			exit 0
+		elif [ x"$c" = x ]; then
+			echo "ERROR: No counters"
+			ret=1
+			exit 111
+		else
+			exit 1
+		fi
+	)
+}
+
+check_xfrm() {
+	# 0: iptables -m policy rule count == 0
+	# 1: iptables -m policy rule count != 0
+	rval=$1
+	ip=$2
+	lret=0
+
+	ip netns exec ns1 ping -q -c 1 10.0.2.$ip > /dev/null
+
+	check_ipt_policy_count ns3
+	if [ $? -ne $rval ] ; then
+		lret=1
+	fi
+	check_ipt_policy_count ns4
+	if [ $? -ne $rval ] ; then
+		lret=1
+	fi
+
+	ip netns exec ns2 ping -q -c 1 10.0.1.$ip > /dev/null
+
+	check_ipt_policy_count ns3
+	if [ $? -ne $rval ] ; then
+		lret=1
+	fi
+	check_ipt_policy_count ns4
+	if [ $? -ne $rval ] ; then
+		lret=1
+	fi
+
+	return $lret
+}
+
+#check for needed privileges
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit $ksft_skip
+fi
+
+ip -Version 2>/dev/null >/dev/null
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without the ip tool"
+	exit $ksft_skip
+fi
+
+# needed to check if policy lookup got valid ipsec result
+iptables --version 2>/dev/null >/dev/null
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without iptables tool"
+	exit $ksft_skip
+fi
+
+for i in 1 2 3 4; do
+    ip netns add ns$i
+    ip -net ns$i link set lo up
+done
+
+DEV=veth0
+ip link add $DEV netns ns1 type veth peer name eth1 netns ns3
+ip link add $DEV netns ns2 type veth peer name eth1 netns ns4
+
+ip link add $DEV netns ns3 type veth peer name veth0 netns ns4
+
+DEV=veth0
+for i in 1 2; do
+    ip -net ns$i link set $DEV up
+    ip -net ns$i addr add 10.0.$i.2/24 dev $DEV
+    ip -net ns$i addr add dead:$i::2/64 dev $DEV
+
+    ip -net ns$i addr add 10.0.$i.253 dev $DEV
+    ip -net ns$i addr add 10.0.$i.254 dev $DEV
+    ip -net ns$i addr add dead:$i::fd dev $DEV
+    ip -net ns$i addr add dead:$i::fe dev $DEV
+done
+
+for i in 3 4; do
+ip -net ns$i link set eth1 up
+ip -net ns$i link set veth0 up
+done
+
+ip -net ns1 route add default via 10.0.1.1
+ip -net ns2 route add default via 10.0.2.1
+
+ip -net ns3 addr add 10.0.1.1/24 dev eth1
+ip -net ns3 addr add 10.0.3.1/24 dev veth0
+ip -net ns3 addr add 2001:1::1/64 dev eth1
+ip -net ns3 addr add 2001:3::1/64 dev veth0
+
+ip -net ns3 route add default via 10.0.3.10
+
+ip -net ns4 addr add 10.0.2.1/24 dev eth1
+ip -net ns4 addr add 10.0.3.10/24 dev veth0
+ip -net ns4 addr add 2001:2::1/64 dev eth1
+ip -net ns4 addr add 2001:3::10/64 dev veth0
+ip -net ns4 route add default via 10.0.3.1
+
+for j in 4 6; do
+	for i in 3 4;do
+		ip netns exec ns$i sysctl net.ipv$j.conf.eth1.forwarding=1 > /dev/null
+		ip netns exec ns$i sysctl net.ipv$j.conf.veth0.forwarding=1 > /dev/null
+	done
+done
+
+# abuse iptables rule counter to check if ping matches a policy
+ip netns exec ns3 iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec
+ip netns exec ns4 iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not insert iptables rule"
+	for i in 1 2 3 4;do ip netns del ns$i;done
+	exit $ksft_skip
+fi
+
+#          localip  remoteip  localnet    remotenet
+do_esp ns3 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
+do_esp ns3 dead:3::1 dead:3::10 dead:1::/64 dead:2::/64 $SPI1 $SPI2
+do_esp ns4 10.0.3.10 10.0.3.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
+do_esp ns4 dead:3::10 dead:3::1 dead:2::/64 dead:1::/64 $SPI2 $SPI1
+
+do_dummies4 ns3
+do_dummies6 ns4
+
+do_esp_policy_get_check ns3 10.0.1.0/24 10.0.2.0/24
+do_esp_policy_get_check ns4 10.0.2.0/24 10.0.1.0/24
+do_esp_policy_get_check ns3 dead:1::/64 dead:2::/64
+do_esp_policy_get_check ns4 dead:2::/64 dead:1::/64
+
+# ping to .254 should use ipsec, exception is not installed.
+check_xfrm 1 254
+if [ $? -ne 0 ]; then
+	echo "FAIL: expected ping to .254 to use ipsec tunnel"
+	ret=1
+else
+	echo "PASS: policy before exception matches"
+fi
+
+# installs exceptions
+#                localip  remoteip   encryptdst  plaindst
+do_exception ns3 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28
+do_exception ns4 10.0.3.10 10.0.3.1 10.0.1.253 10.0.1.240/28
+
+do_exception ns3 dead:3::1 dead:3::10 dead:2::fd  dead:2:f0::/96
+do_exception ns4 dead:3::10 dead:3::1 dead:1::fd  dead:1:f0::/96
+
+# ping to .254 should now be excluded from the tunnel
+check_xfrm 0 254
+if [ $? -ne 0 ]; then
+	echo "FAIL: expected ping to .254 to fail"
+	ret=1
+else
+	echo "PASS: ping to .254 bypassed ipsec tunnel"
+fi
+
+# ping to .253 should use use ipsec due to direct policy exception.
+check_xfrm 1 253
+if [ $? -ne 0 ]; then
+	echo "FAIL: expected ping to .253 to use ipsec tunnel"
+	ret=1
+else
+	echo "PASS: direct policy matches"
+fi
+
+# ping to .2 should use ipsec.
+check_xfrm 1 2
+if [ $? -ne 0 ]; then
+	echo "FAIL: expected ping to .2 to use ipsec tunnel"
+	ret=1
+else
+	echo "PASS: policy matches"
+fi
+
+for i in 1 2 3 4;do ip netns del ns$i;done
+
+exit $ret