From 875e8939953483d856de226b72d14c6a000f9457 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 4 Dec 2018 08:15:10 +0000 Subject: skbuff: Rename 'offload_mr_fwd_mark' to 'offload_l3_fwd_mark' Commit abf4bb6b63d0 ("skbuff: Add the offload_mr_fwd_mark field") added the 'offload_mr_fwd_mark' field to indicate that a packet has already undergone L3 multicast routing by a capable device. The field is used to prevent the kernel from forwarding a packet through a netdev through which the device has already forwarded the packet. Currently, no unicast packet is routed by both the device and the kernel, but this is about to change by subsequent patches and we need to be able to mark such packets, so that they will no be forwarded twice. Instead of adding yet another field to 'struct sk_buff', we can just rename 'offload_mr_fwd_mark' to 'offload_l3_fwd_mark', as a packet either has a multicast or a unicast destination IP. While at it, add a comment about both 'offload_fwd_mark' and 'offload_l3_fwd_mark'. Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 10 +++++----- include/linux/skbuff.h | 4 +++- net/core/skbuff.c | 2 +- net/ipv4/ipmr.c | 2 +- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index c293ff1eed63..920085fbbf2a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3554,10 +3554,10 @@ static void mlxsw_sp_rx_listener_mark_func(struct sk_buff *skb, u8 local_port, return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv); } -static void mlxsw_sp_rx_listener_mr_mark_func(struct sk_buff *skb, +static void mlxsw_sp_rx_listener_l3_mark_func(struct sk_buff *skb, u8 local_port, void *priv) { - skb->offload_mr_fwd_mark = 1; + skb->offload_l3_fwd_mark = 1; skb->offload_fwd_mark = 1; return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv); } @@ -3605,8 +3605,8 @@ out: MLXSW_RXL(mlxsw_sp_rx_listener_mark_func, _trap_id, _action, \ _is_ctrl, SP_##_trap_group, DISCARD) -#define MLXSW_SP_RXL_MR_MARK(_trap_id, _action, _trap_group, _is_ctrl) \ - MLXSW_RXL(mlxsw_sp_rx_listener_mr_mark_func, _trap_id, _action, \ +#define MLXSW_SP_RXL_L3_MARK(_trap_id, _action, _trap_group, _is_ctrl) \ + MLXSW_RXL(mlxsw_sp_rx_listener_l3_mark_func, _trap_id, _action, \ _is_ctrl, SP_##_trap_group, DISCARD) #define MLXSW_SP_EVENTL(_func, _trap_id) \ @@ -3683,7 +3683,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { MLXSW_SP_RXL_MARK(IPV6_PIM, TRAP_TO_CPU, PIM, false), MLXSW_SP_RXL_MARK(RPF, TRAP_TO_CPU, RPF, false), MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false), - MLXSW_SP_RXL_MR_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false), + MLXSW_SP_RXL_L3_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false), /* NVE traps */ MLXSW_SP_RXL_MARK(NVE_ENCAP_ARP, TRAP_TO_CPU, ARP, false), }; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 75d50ab7997c..b1831a5ca173 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -616,6 +616,8 @@ typedef unsigned char *sk_buff_data_t; * @pkt_type: Packet class * @fclone: skbuff clone status * @ipvs_property: skbuff is owned by ipvs + * @offload_fwd_mark: Packet was L2-forwarded in hardware + * @offload_l3_fwd_mark: Packet was L3-forwarded in hardware * @tc_skip_classify: do not classify packet. set by IFB device * @tc_at_ingress: used within tc_classify to distinguish in/egress * @tc_redirected: packet was redirected by a tc action @@ -799,7 +801,7 @@ struct sk_buff { __u8 remcsum_offload:1; #ifdef CONFIG_NET_SWITCHDEV __u8 offload_fwd_mark:1; - __u8 offload_mr_fwd_mark:1; + __u8 offload_l3_fwd_mark:1; #endif #ifdef CONFIG_NET_CLS_ACT __u8 tc_skip_classify:1; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c78ce114537e..40552547c69a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4885,7 +4885,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) #ifdef CONFIG_NET_SWITCHDEV skb->offload_fwd_mark = 0; - skb->offload_mr_fwd_mark = 0; + skb->offload_l3_fwd_mark = 0; #endif if (!xnet) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a6defbec4f1b..5cbc749a50aa 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1802,7 +1802,7 @@ static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, struct vif_device *out_vif = &mrt->vif_table[out_vifi]; struct vif_device *in_vif = &mrt->vif_table[in_vifi]; - if (!skb->offload_mr_fwd_mark) + if (!skb->offload_l3_fwd_mark) return false; if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len) return false; -- cgit v1.2.3-70-g09d2 From f839a6c92504cff92a10f522cf686b51ff18dd35 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 4 Dec 2018 08:15:11 +0000 Subject: net: Do not route unicast IP packets twice Packets marked with 'offload_l3_fwd_mark' were already forwarded by a capable device and should not be forwarded again by the kernel. Therefore, have the kernel consume them. The check is performed in ip{,6}_forward_finish() in order to allow the kernel to process such packets in ip{,6}_forward() and generate required exceptions. For example, ICMP redirects. Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- net/ipv4/ip_forward.c | 7 +++++++ net/ipv6/ip6_output.c | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 32662e9e5d21..06ee4696703c 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -69,6 +69,13 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s __IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS); __IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len); +#ifdef CONFIG_NET_SWITCHDEV + if (skb->offload_l3_fwd_mark) { + consume_skb(skb); + return 0; + } +#endif + if (unlikely(opt->optlen)) ip_forward_options(skb); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index ec8c235ea891..6592a39e5ec1 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -378,6 +378,13 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk, __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); +#ifdef CONFIG_NET_SWITCHDEV + if (skb->offload_l3_fwd_mark) { + consume_skb(skb); + return 0; + } +#endif + return dst_output(net, sk, skb); } -- cgit v1.2.3-70-g09d2 From 2f4f44946b748918de6345dd12d0c418c0206002 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 4 Dec 2018 08:15:12 +0000 Subject: mlxsw: spectrum: Mirror loopbacked packets instead of trapping them When the ASIC detects that a unicast packet is routed through the same router interface (RIF) from which it ingressed (iRIF == eRIF), it raises a trap called loopback error (LBERROR). Thus far, this trap was configured to send a sole copy of the packet to the CPU so that ICMP redirect packets could be potentially generated by the kernel. This is problematic as the CPU cannot forward packets at 3.2Tb/s and there are scenarios (e.g., "one-armed router") where iRIF == eRIF is not an exception. Solve this by changing the trap to send a copy of the packet to the CPU. To prevent the kernel from forwarding the packet again, it is marked with 'offload_l3_fwd_mark'. The trap is configured in a trap group of its own with a dedicated policer in order not to prevent packets trapped by other traps from reaching the CPU. Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 1 + drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 5c8a38f61a93..b9c37b0bb175 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -5072,6 +5072,7 @@ enum mlxsw_reg_htgt_trap_group { MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT, MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD, MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND, + MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR, }; /* reg_htgt_trap_group diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 920085fbbf2a..0f37b203103a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3639,7 +3639,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { /* L3 traps */ MLXSW_SP_RXL_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false), MLXSW_SP_RXL_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false), - MLXSW_SP_RXL_MARK(LBERROR, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_L3_MARK(LBERROR, MIRROR_TO_CPU, LBERROR, false), MLXSW_SP_RXL_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false), MLXSW_SP_RXL_MARK(IPV6_UNSPECIFIED_ADDRESS, TRAP_TO_CPU, ROUTER_EXP, false), @@ -3713,6 +3713,7 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF: case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM: case MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR: rate = 128; burst_size = 7; break; @@ -3798,6 +3799,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE: case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR: priority = 1; tc = 1; break; -- cgit v1.2.3-70-g09d2 From b6f153d3e5a5bfbda17b997bd9e258143aa11809 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 4 Dec 2018 08:15:14 +0000 Subject: selftests: mlxsw: Add one-armed router test Construct a "one-armed router" topology and test that packets are forwarded by the ASIC and that a copy of the packet is sent to the kernel, which does not forward the packet again. Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/one_armed_router.sh | 259 +++++++++++++++++++++ 1 file changed, 259 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh b/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh new file mode 100755 index 000000000000..f02d83e94576 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh @@ -0,0 +1,259 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test a "one-armed router" [1] scenario. Packets forwarded between H1 and H2 +# should be forwarded by the ASIC, but also trapped so that ICMP redirect +# packets could be potentially generated. +# +# 1. https://en.wikipedia.org/wiki/One-armed_router +# +# +---------------------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 192.0.2.1/24 | +# | | 2001:db8:1::1/64 | +# | | | +# | | default via 192.0.2.2 | +# | | default via 2001:db8:1::2 | +# +----|----------------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | +--|--------------------------------------------------------------------+ | +# | | + $swp1 BR0 (802.1d) | | +# | | | | +# | | 192.0.2.2/24 | | +# | | 2001:db8:1::2/64 | | +# | | 198.51.100.2/24 | | +# | | 2001:db8:2::2/64 | | +# | | | | +# | | + $swp2 | | +# | +--|--------------------------------------------------------------------+ | +# | | | +# +----|----------------------------------------------------------------------+ +# | +# +----|----------------------------+ +# | | default via 198.51.100.2 | +# | | default via 2001:db8:2::2 | +# | | | +# | | 2001:db8:2::1/64 | +# | | 198.51.100.1/24 | +# | + $h2 | +# | H2 (vrf) | +# +---------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS="ping_ipv4 ping_ipv6 fwd_mark_ipv4 fwd_mark_ipv6" +NUM_NETIFS=4 +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 + + ip -4 route add default vrf v$h1 nexthop via 192.0.2.2 + ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2 + ip -4 route del default vrf v$h1 nexthop via 192.0.2.2 + + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 198.51.100.1/24 2001:db8:2::1/64 + + ip -4 route add default vrf v$h2 nexthop via 198.51.100.2 + ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2 +} + +h2_destroy() +{ + ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2 + ip -4 route del default vrf v$h2 nexthop via 198.51.100.2 + + simple_if_fini $h2 198.51.100.1/24 2001:db8:2::1/64 +} + +switch_create() +{ + ip link add name br0 type bridge mcast_snooping 0 + ip link set dev br0 up + + ip link set dev $swp1 master br0 + ip link set dev $swp1 up + ip link set dev $swp2 master br0 + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact + + __addr_add_del br0 add 192.0.2.2/24 2001:db8:1::2/64 + __addr_add_del br0 add 198.51.100.2/24 2001:db8:2::2/64 +} + +switch_destroy() +{ + __addr_add_del br0 del 198.51.100.2/24 2001:db8:2::2/64 + __addr_add_del br0 del 192.0.2.2/24 2001:db8:1::2/64 + + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link set dev br0 down + ip link del dev br0 +} + +ping_ipv4() +{ + ping_test $h1 198.51.100.1 ": h1->h2" +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::1 ": h1->h2" +} + +fwd_mark_ipv4() +{ + # Transmit packets from H1 to H2 and make sure they are trapped at + # swp1 due to loopback error, but only forwarded by the ASIC through + # swp2 + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + skip_hw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \ + action pass + + tc filter add dev $swp2 egress protocol ip pref 1 handle 101 flower \ + skip_hw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \ + action pass + + tc filter add dev $swp2 egress protocol ip pref 2 handle 102 flower \ + skip_sw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \ + action pass + + ip vrf exec v$h1 $MZ $h1 -c 10 -d 100msec -p 64 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q + + RET=0 + + tc_check_packets "dev $swp1 ingress" 101 10 + check_err $? + + log_test "fwd mark: trapping IPv4 packets due to LBERROR" + + RET=0 + + tc_check_packets "dev $swp2 egress" 101 0 + check_err $? + + log_test "fwd mark: forwarding IPv4 packets in software" + + RET=0 + + tc_check_packets "dev $swp2 egress" 102 10 + check_err $? + + log_test "fwd mark: forwarding IPv4 packets in hardware" + + tc filter del dev $swp2 egress protocol ip pref 2 handle 102 flower + tc filter del dev $swp2 egress protocol ip pref 1 handle 101 flower + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower +} + +fwd_mark_ipv6() +{ + tc filter add dev $swp1 ingress protocol ipv6 pref 1 handle 101 flower \ + skip_hw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \ + action pass + + tc filter add dev $swp2 egress protocol ipv6 pref 1 handle 101 flower \ + skip_hw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \ + action pass + + tc filter add dev $swp2 egress protocol ipv6 pref 2 handle 102 flower \ + skip_sw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \ + action pass + + ip vrf exec v$h1 $MZ $h1 -6 -c 10 -d 100msec -p 64 -A 2001:db8:1::1 \ + -B 2001:db8:2::1 -t udp dp=52768,sp=42768 -q + + RET=0 + + tc_check_packets "dev $swp1 ingress" 101 10 + check_err $? + + log_test "fwd mark: trapping IPv6 packets due to LBERROR" + + RET=0 + + tc_check_packets "dev $swp2 egress" 101 0 + check_err $? + + log_test "fwd mark: forwarding IPv6 packets in software" + + RET=0 + + tc_check_packets "dev $swp2 egress" 102 10 + check_err $? + + log_test "fwd mark: forwarding IPv6 packets in hardware" + + tc filter del dev $swp2 egress protocol ipv6 pref 2 handle 102 flower + tc filter del dev $swp2 egress protocol ipv6 pref 1 handle 101 flower + tc filter del dev $swp1 ingress protocol ipv6 pref 1 handle 101 flower +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + forwarding_enable + + sysctl_set net.ipv4.conf.all.accept_redirects 0 + sysctl_set net.ipv6.conf.all.accept_redirects 0 + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + + sysctl_restore net.ipv6.conf.all.accept_redirects + sysctl_restore net.ipv4.conf.all.accept_redirects + + forwarding_restore + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS -- cgit v1.2.3-70-g09d2