summaryrefslogtreecommitdiff
path: root/net/sched
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-03 16:27:18 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-03 16:27:18 -0700
commitcb8e59cc87201af93dfbb6c3dccc8fcad72a09c2 (patch)
treea334db9022f89654b777bbce8c4c6632e65b9031 /net/sched
parent2e63f6ce7ed2c4ff83ba30ad9ccad422289a6c63 (diff)
parent065fcfd49763ec71ae345bb5c5a74f961031e70e (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from David Miller: 1) Allow setting bluetooth L2CAP modes via socket option, from Luiz Augusto von Dentz. 2) Add GSO partial support to igc, from Sasha Neftin. 3) Several cleanups and improvements to r8169 from Heiner Kallweit. 4) Add IF_OPER_TESTING link state and use it when ethtool triggers a device self-test. From Andrew Lunn. 5) Start moving away from custom driver versions, use the globally defined kernel version instead, from Leon Romanovsky. 6) Support GRO vis gro_cells in DSA layer, from Alexander Lobakin. 7) Allow hard IRQ deferral during NAPI, from Eric Dumazet. 8) Add sriov and vf support to hinic, from Luo bin. 9) Support Media Redundancy Protocol (MRP) in the bridging code, from Horatiu Vultur. 10) Support netmap in the nft_nat code, from Pablo Neira Ayuso. 11) Allow UDPv6 encapsulation of ESP in the ipsec code, from Sabrina Dubroca. Also add ipv6 support for espintcp. 12) Lots of ReST conversions of the networking documentation, from Mauro Carvalho Chehab. 13) Support configuration of ethtool rxnfc flows in bcmgenet driver, from Doug Berger. 14) Allow to dump cgroup id and filter by it in inet_diag code, from Dmitry Yakunin. 15) Add infrastructure to export netlink attribute policies to userspace, from Johannes Berg. 16) Several optimizations to sch_fq scheduler, from Eric Dumazet. 17) Fallback to the default qdisc if qdisc init fails because otherwise a packet scheduler init failure will make a device inoperative. From Jesper Dangaard Brouer. 18) Several RISCV bpf jit optimizations, from Luke Nelson. 19) Correct the return type of the ->ndo_start_xmit() method in several drivers, it's netdev_tx_t but many drivers were using 'int'. From Yunjian Wang. 20) Add an ethtool interface for PHY master/slave config, from Oleksij Rempel. 21) Add BPF iterators, from Yonghang Song. 22) Add cable test infrastructure, including ethool interfaces, from Andrew Lunn. Marvell PHY driver is the first to support this facility. 23) Remove zero-length arrays all over, from Gustavo A. R. Silva. 24) Calculate and maintain an explicit frame size in XDP, from Jesper Dangaard Brouer. 25) Add CAP_BPF, from Alexei Starovoitov. 26) Support terse dumps in the packet scheduler, from Vlad Buslov. 27) Support XDP_TX bulking in dpaa2 driver, from Ioana Ciornei. 28) Add devm_register_netdev(), from Bartosz Golaszewski. 29) Minimize qdisc resets, from Cong Wang. 30) Get rid of kernel_getsockopt and kernel_setsockopt in order to eliminate set_fs/get_fs calls. From Christoph Hellwig. * git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2517 commits) selftests: net: ip_defrag: ignore EPERM net_failover: fixed rollback in net_failover_open() Revert "tipc: Fix potential tipc_aead refcnt leak in tipc_crypto_rcv" Revert "tipc: Fix potential tipc_node refcnt leak in tipc_rcv" vmxnet3: allow rx flow hash ops only when rss is enabled hinic: add set_channels ethtool_ops support selftests/bpf: Add a default $(CXX) value tools/bpf: Don't use $(COMPILE.c) bpf, selftests: Use bpf_probe_read_kernel s390/bpf: Use bcr 0,%0 as tail call nop filler s390/bpf: Maintain 8-byte stack alignment selftests/bpf: Fix verifier test selftests/bpf: Fix sample_cnt shared between two threads bpf, selftests: Adapt cls_redirect to call csum_level helper bpf: Add csum_level helper for fixing up csum levels bpf: Fix up bpf_skb_adjust_room helper's skb csum setting sfc: add missing annotation for efx_ef10_try_update_nic_stats_vf() crypto/chtls: IPv6 support for inline TLS Crypto/chcr: Fixes a coccinile check error Crypto/chcr: Fixes compilations warnings ...
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/Kconfig12
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_api.c43
-rw-r--r--net/sched/act_ct.c2
-rw-r--r--net/sched/act_gate.c639
-rw-r--r--net/sched/cls_api.c260
-rw-r--r--net/sched/cls_flower.c350
-rw-r--r--net/sched/em_ipt.c2
-rw-r--r--net/sched/sch_api.c3
-rw-r--r--net/sched/sch_cake.c65
-rw-r--r--net/sched/sch_choke.c6
-rw-r--r--net/sched/sch_fq.c143
-rw-r--r--net/sched/sch_generic.c109
-rw-r--r--net/sched/sch_red.c9
14 files changed, 1361 insertions, 283 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index bfbefb7bff9d..2f20073f4f84 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -981,6 +981,18 @@ config NET_ACT_CT
To compile this code as a module, choose M here: the
module will be called act_ct.
+config NET_ACT_GATE
+ tristate "Frame gate entry list control tc action"
+ depends on NET_CLS_ACT
+ help
+ Say Y here to allow to control the ingress flow to be passed at
+ specific time slot and be dropped at other specific time slot by
+ the gate entry list.
+
+ If unsure, say N.
+ To compile this code as a module, choose M here: the
+ module will be called act_gate.
+
config NET_IFE_SKBMARK
tristate "Support to encoding decoding skb mark on IFE action"
depends on NET_ACT_IFE
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 31c367a6cd09..66bbf9a98f9e 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_NET_IFE_SKBPRIO) += act_meta_skbprio.o
obj-$(CONFIG_NET_IFE_SKBTCINDEX) += act_meta_skbtcindex.o
obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o
obj-$(CONFIG_NET_ACT_CT) += act_ct.o
+obj-$(CONFIG_NET_ACT_GATE) += act_gate.o
obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index df4560909157..8ac7eb0a8309 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -766,12 +766,10 @@ tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
return a->ops->dump(skb, a, bind, ref);
}
-int
-tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+static int
+tcf_action_dump_terse(struct sk_buff *skb, struct tc_action *a)
{
- int err = -EINVAL;
unsigned char *b = skb_tail_pointer(skb);
- struct nlattr *nest;
struct tc_cookie *cookie;
if (nla_put_string(skb, TCA_KIND, a->ops->kind))
@@ -789,6 +787,23 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
}
rcu_read_unlock();
+ return 0;
+
+nla_put_failure:
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+int
+tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+{
+ int err = -EINVAL;
+ unsigned char *b = skb_tail_pointer(skb);
+ struct nlattr *nest;
+
+ if (tcf_action_dump_terse(skb, a))
+ goto nla_put_failure;
+
if (a->hw_stats != TCA_ACT_HW_STATS_ANY &&
nla_put_bitfield32(skb, TCA_ACT_HW_STATS,
a->hw_stats, TCA_ACT_HW_STATS_ANY))
@@ -820,7 +835,7 @@ nla_put_failure:
EXPORT_SYMBOL(tcf_action_dump_1);
int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[],
- int bind, int ref)
+ int bind, int ref, bool terse)
{
struct tc_action *a;
int err = -EINVAL, i;
@@ -831,7 +846,8 @@ int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[],
nest = nla_nest_start_noflag(skb, i + 1);
if (nest == NULL)
goto nla_put_failure;
- err = tcf_action_dump_1(skb, a, bind, ref);
+ err = terse ? tcf_action_dump_terse(skb, a) :
+ tcf_action_dump_1(skb, a, bind, ref);
if (err < 0)
goto errout;
nla_nest_end(skb, nest);
@@ -876,19 +892,14 @@ static u8 tcf_action_hw_stats_get(struct nlattr *hw_stats_attr)
return hw_stats_bf.value;
}
-static const u32 tca_act_flags_allowed = TCA_ACT_FLAGS_NO_PERCPU_STATS;
-static const u32 tca_act_hw_stats_allowed = TCA_ACT_HW_STATS_ANY;
-
static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = {
[TCA_ACT_KIND] = { .type = NLA_STRING },
[TCA_ACT_INDEX] = { .type = NLA_U32 },
[TCA_ACT_COOKIE] = { .type = NLA_BINARY,
.len = TC_COOKIE_MAX_SIZE },
[TCA_ACT_OPTIONS] = { .type = NLA_NESTED },
- [TCA_ACT_FLAGS] = { .type = NLA_BITFIELD32,
- .validation_data = &tca_act_flags_allowed },
- [TCA_ACT_HW_STATS] = { .type = NLA_BITFIELD32,
- .validation_data = &tca_act_hw_stats_allowed },
+ [TCA_ACT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_ACT_FLAGS_NO_PERCPU_STATS),
+ [TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY),
};
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
@@ -1138,7 +1149,7 @@ static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[],
if (!nest)
goto out_nlmsg_trim;
- if (tcf_action_dump(skb, actions, bind, ref) < 0)
+ if (tcf_action_dump(skb, actions, bind, ref, false) < 0)
goto out_nlmsg_trim;
nla_nest_end(skb, nest);
@@ -1454,10 +1465,8 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
return ret;
}
-static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON;
static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = {
- [TCA_ROOT_FLAGS] = { .type = NLA_BITFIELD32,
- .validation_data = &tcaa_root_flags_allowed },
+ [TCA_ROOT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_FLAG_LARGE_DUMP_ON),
[TCA_ROOT_TIME_DELTA] = { .type = NLA_U32 },
};
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 20577355235a..e29f0f45d688 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -30,6 +30,7 @@
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#include <uapi/linux/netfilter/nf_nat.h>
@@ -539,6 +540,7 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
flow_offload_refresh(nf_ft, flow);
nf_conntrack_get(&ct->ct_general);
nf_ct_set(skb, ct, ctinfo);
+ nf_ct_acct_update(ct, dir, skb->len);
return true;
}
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
new file mode 100644
index 000000000000..9c628591f452
--- /dev/null
+++ b/net/sched/act_gate.c
@@ -0,0 +1,639 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Copyright 2020 NXP */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <net/act_api.h>
+#include <net/netlink.h>
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_gate.h>
+
+static unsigned int gate_net_id;
+static struct tc_action_ops act_gate_ops;
+
+static ktime_t gate_get_time(struct tcf_gate *gact)
+{
+ ktime_t mono = ktime_get();
+
+ switch (gact->tk_offset) {
+ case TK_OFFS_MAX:
+ return mono;
+ default:
+ return ktime_mono_to_any(mono, gact->tk_offset);
+ }
+
+ return KTIME_MAX;
+}
+
+static int gate_get_start_time(struct tcf_gate *gact, ktime_t *start)
+{
+ struct tcf_gate_params *param = &gact->param;
+ ktime_t now, base, cycle;
+ u64 n;
+
+ base = ns_to_ktime(param->tcfg_basetime);
+ now = gate_get_time(gact);
+
+ if (ktime_after(base, now)) {
+ *start = base;
+ return 0;
+ }
+
+ cycle = param->tcfg_cycletime;
+
+ /* cycle time should not be zero */
+ if (!cycle)
+ return -EFAULT;
+
+ n = div64_u64(ktime_sub_ns(now, base), cycle);
+ *start = ktime_add_ns(base, (n + 1) * cycle);
+ return 0;
+}
+
+static void gate_start_timer(struct tcf_gate *gact, ktime_t start)
+{
+ ktime_t expires;
+
+ expires = hrtimer_get_expires(&gact->hitimer);
+ if (expires == 0)
+ expires = KTIME_MAX;
+
+ start = min_t(ktime_t, start, expires);
+
+ hrtimer_start(&gact->hitimer, start, HRTIMER_MODE_ABS_SOFT);
+}
+
+static enum hrtimer_restart gate_timer_func(struct hrtimer *timer)
+{
+ struct tcf_gate *gact = container_of(timer, struct tcf_gate,
+ hitimer);
+ struct tcf_gate_params *p = &gact->param;
+ struct tcfg_gate_entry *next;
+ ktime_t close_time, now;
+
+ spin_lock(&gact->tcf_lock);
+
+ next = gact->next_entry;
+
+ /* cycle start, clear pending bit, clear total octets */
+ gact->current_gate_status = next->gate_state ? GATE_ACT_GATE_OPEN : 0;
+ gact->current_entry_octets = 0;
+ gact->current_max_octets = next->maxoctets;
+
+ gact->current_close_time = ktime_add_ns(gact->current_close_time,
+ next->interval);
+
+ close_time = gact->current_close_time;
+
+ if (list_is_last(&next->list, &p->entries))
+ next = list_first_entry(&p->entries,
+ struct tcfg_gate_entry, list);
+ else
+ next = list_next_entry(next, list);
+
+ now = gate_get_time(gact);
+
+ if (ktime_after(now, close_time)) {
+ ktime_t cycle, base;
+ u64 n;
+
+ cycle = p->tcfg_cycletime;
+ base = ns_to_ktime(p->tcfg_basetime);
+ n = div64_u64(ktime_sub_ns(now, base), cycle);
+ close_time = ktime_add_ns(base, (n + 1) * cycle);
+ }
+
+ gact->next_entry = next;
+
+ hrtimer_set_expires(&gact->hitimer, close_time);
+
+ spin_unlock(&gact->tcf_lock);
+
+ return HRTIMER_RESTART;
+}
+
+static int tcf_gate_act(struct sk_buff *skb, const struct tc_action *a,
+ struct tcf_result *res)
+{
+ struct tcf_gate *gact = to_gate(a);
+
+ spin_lock(&gact->tcf_lock);
+
+ tcf_lastuse_update(&gact->tcf_tm);
+ bstats_update(&gact->tcf_bstats, skb);
+
+ if (unlikely(gact->current_gate_status & GATE_ACT_PENDING)) {
+ spin_unlock(&gact->tcf_lock);
+ return gact->tcf_action;
+ }
+
+ if (!(gact->current_gate_status & GATE_ACT_GATE_OPEN))
+ goto drop;
+
+ if (gact->current_max_octets >= 0) {
+ gact->current_entry_octets += qdisc_pkt_len(skb);
+ if (gact->current_entry_octets > gact->current_max_octets) {
+ gact->tcf_qstats.overlimits++;
+ goto drop;
+ }
+ }
+
+ spin_unlock(&gact->tcf_lock);
+
+ return gact->tcf_action;
+drop:
+ gact->tcf_qstats.drops++;
+ spin_unlock(&gact->tcf_lock);
+
+ return TC_ACT_SHOT;
+}
+
+static const struct nla_policy entry_policy[TCA_GATE_ENTRY_MAX + 1] = {
+ [TCA_GATE_ENTRY_INDEX] = { .type = NLA_U32 },
+ [TCA_GATE_ENTRY_GATE] = { .type = NLA_FLAG },
+ [TCA_GATE_ENTRY_INTERVAL] = { .type = NLA_U32 },
+ [TCA_GATE_ENTRY_IPV] = { .type = NLA_S32 },
+ [TCA_GATE_ENTRY_MAX_OCTETS] = { .type = NLA_S32 },
+};
+
+static const struct nla_policy gate_policy[TCA_GATE_MAX + 1] = {
+ [TCA_GATE_PARMS] = { .len = sizeof(struct tc_gate),
+ .type = NLA_EXACT_LEN },
+ [TCA_GATE_PRIORITY] = { .type = NLA_S32 },
+ [TCA_GATE_ENTRY_LIST] = { .type = NLA_NESTED },
+ [TCA_GATE_BASE_TIME] = { .type = NLA_U64 },
+ [TCA_GATE_CYCLE_TIME] = { .type = NLA_U64 },
+ [TCA_GATE_CYCLE_TIME_EXT] = { .type = NLA_U64 },
+ [TCA_GATE_FLAGS] = { .type = NLA_U32 },
+ [TCA_GATE_CLOCKID] = { .type = NLA_S32 },
+};
+
+static int fill_gate_entry(struct nlattr **tb, struct tcfg_gate_entry *entry,
+ struct netlink_ext_ack *extack)
+{
+ u32 interval = 0;
+
+ entry->gate_state = nla_get_flag(tb[TCA_GATE_ENTRY_GATE]);
+
+ if (tb[TCA_GATE_ENTRY_INTERVAL])
+ interval = nla_get_u32(tb[TCA_GATE_ENTRY_INTERVAL]);
+
+ if (interval == 0) {
+ NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry");
+ return -EINVAL;
+ }
+
+ entry->interval = interval;
+
+ if (tb[TCA_GATE_ENTRY_IPV])
+ entry->ipv = nla_get_s32(tb[TCA_GATE_ENTRY_IPV]);
+ else
+ entry->ipv = -1;
+
+ if (tb[TCA_GATE_ENTRY_MAX_OCTETS])
+ entry->maxoctets = nla_get_s32(tb[TCA_GATE_ENTRY_MAX_OCTETS]);
+ else
+ entry->maxoctets = -1;
+
+ return 0;
+}
+
+static int parse_gate_entry(struct nlattr *n, struct tcfg_gate_entry *entry,
+ int index, struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[TCA_GATE_ENTRY_MAX + 1] = { };
+ int err;
+
+ err = nla_parse_nested(tb, TCA_GATE_ENTRY_MAX, n, entry_policy, extack);
+ if (err < 0) {
+ NL_SET_ERR_MSG(extack, "Could not parse nested entry");
+ return -EINVAL;
+ }
+
+ entry->index = index;
+
+ return fill_gate_entry(tb, entry, extack);
+}
+
+static void release_entry_list(struct list_head *entries)
+{
+ struct tcfg_gate_entry *entry, *e;
+
+ list_for_each_entry_safe(entry, e, entries, list) {
+ list_del(&entry->list);
+ kfree(entry);
+ }
+}
+
+static int parse_gate_list(struct nlattr *list_attr,
+ struct tcf_gate_params *sched,
+ struct netlink_ext_ack *extack)
+{
+ struct tcfg_gate_entry *entry;
+ struct nlattr *n;
+ int err, rem;
+ int i = 0;
+
+ if (!list_attr)
+ return -EINVAL;
+
+ nla_for_each_nested(n, list_attr, rem) {
+ if (nla_type(n) != TCA_GATE_ONE_ENTRY) {
+ NL_SET_ERR_MSG(extack, "Attribute isn't type 'entry'");
+ continue;
+ }
+
+ entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+ if (!entry) {
+ NL_SET_ERR_MSG(extack, "Not enough memory for entry");
+ err = -ENOMEM;
+ goto release_list;
+ }
+
+ err = parse_gate_entry(n, entry, i, extack);
+ if (err < 0) {
+ kfree(entry);
+ goto release_list;
+ }
+
+ list_add_tail(&entry->list, &sched->entries);
+ i++;
+ }
+
+ sched->num_entries = i;
+
+ return i;
+
+release_list:
+ release_entry_list(&sched->entries);
+
+ return err;
+}
+
+static int tcf_gate_init(struct net *net, struct nlattr *nla,
+ struct nlattr *est, struct tc_action **a,
+ int ovr, int bind, bool rtnl_held,
+ struct tcf_proto *tp, u32 flags,
+ struct netlink_ext_ack *extack)
+{
+ struct tc_action_net *tn = net_generic(net, gate_net_id);
+ enum tk_offsets tk_offset = TK_OFFS_TAI;
+ struct nlattr *tb[TCA_GATE_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
+ struct tcf_gate_params *p;
+ s32 clockid = CLOCK_TAI;
+ struct tcf_gate *gact;
+ struct tc_gate *parm;
+ int ret = 0, err;
+ u64 basetime = 0;
+ u32 gflags = 0;
+ s32 prio = -1;
+ ktime_t start;
+ u32 index;
+
+ if (!nla)
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, TCA_GATE_MAX, nla, gate_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (!tb[TCA_GATE_PARMS])
+ return -EINVAL;
+
+ parm = nla_data(tb[TCA_GATE_PARMS]);
+ index = parm->index;
+
+ err = tcf_idr_check_alloc(tn, &index, a, bind);
+ if (err < 0)
+ return err;
+
+ if (err && bind)
+ return 0;
+
+ if (!err) {
+ ret = tcf_idr_create(tn, index, est, a,
+ &act_gate_ops, bind, false, 0);
+ if (ret) {
+ tcf_idr_cleanup(tn, index);
+ return ret;
+ }
+
+ ret = ACT_P_CREATED;
+ } else if (!ovr) {
+ tcf_idr_release(*a, bind);
+ return -EEXIST;
+ }
+ if (ret == ACT_P_CREATED) {
+ to_gate(*a)->param.tcfg_clockid = -1;
+ INIT_LIST_HEAD(&(to_gate(*a)->param.entries));
+ }
+
+ if (tb[TCA_GATE_PRIORITY])
+ prio = nla_get_s32(tb[TCA_GATE_PRIORITY]);
+
+ if (tb[TCA_GATE_BASE_TIME])
+ basetime = nla_get_u64(tb[TCA_GATE_BASE_TIME]);
+
+ if (tb[TCA_GATE_FLAGS])
+ gflags = nla_get_u32(tb[TCA_GATE_FLAGS]);
+
+ if (tb[TCA_GATE_CLOCKID]) {
+ clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]);
+ switch (clockid) {
+ case CLOCK_REALTIME:
+ tk_offset = TK_OFFS_REAL;
+ break;
+ case CLOCK_MONOTONIC:
+ tk_offset = TK_OFFS_MAX;
+ break;
+ case CLOCK_BOOTTIME:
+ tk_offset = TK_OFFS_BOOT;
+ break;
+ case CLOCK_TAI:
+ tk_offset = TK_OFFS_TAI;
+ break;
+ default:
+ NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
+ goto release_idr;
+ }
+ }
+
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
+
+ gact = to_gate(*a);
+
+ spin_lock_bh(&gact->tcf_lock);
+ p = &gact->param;
+
+ if (tb[TCA_GATE_CYCLE_TIME]) {
+ p->tcfg_cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]);
+ if (!p->tcfg_cycletime_ext)
+ goto chain_put;
+ }
+
+ if (tb[TCA_GATE_ENTRY_LIST]) {
+ err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack);
+ if (err < 0)
+ goto chain_put;
+ }
+
+ if (!p->tcfg_cycletime) {
+ struct tcfg_gate_entry *entry;
+ ktime_t cycle = 0;
+
+ list_for_each_entry(entry, &p->entries, list)
+ cycle = ktime_add_ns(cycle, entry->interval);
+ p->tcfg_cycletime = cycle;
+ }
+
+ if (tb[TCA_GATE_CYCLE_TIME_EXT])
+ p->tcfg_cycletime_ext =
+ nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]);
+
+ p->tcfg_priority = prio;
+ p->tcfg_basetime = basetime;
+ p->tcfg_clockid = clockid;
+ p->tcfg_flags = gflags;
+
+ gact->tk_offset = tk_offset;
+ hrtimer_init(&gact->hitimer, clockid, HRTIMER_MODE_ABS_SOFT);
+ gact->hitimer.function = gate_timer_func;
+
+ err = gate_get_start_time(gact, &start);
+ if (err < 0) {
+ NL_SET_ERR_MSG(extack,
+ "Internal error: failed get start time");
+ release_entry_list(&p->entries);
+ goto chain_put;
+ }
+
+ gact->current_close_time = start;
+ gact->current_gate_status = GATE_ACT_GATE_OPEN | GATE_ACT_PENDING;
+
+ gact->next_entry = list_first_entry(&p->entries,
+ struct tcfg_gate_entry, list);
+
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+
+ gate_start_timer(gact, start);
+
+ spin_unlock_bh(&gact->tcf_lock);
+
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+
+ if (ret == ACT_P_CREATED)
+ tcf_idr_insert(tn, *a);
+
+ return ret;
+
+chain_put:
+ spin_unlock_bh(&gact->tcf_lock);
+
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
+}
+
+static void tcf_gate_cleanup(struct tc_action *a)
+{
+ struct tcf_gate *gact = to_gate(a);
+ struct tcf_gate_params *p;
+
+ p = &gact->param;
+ if (p->tcfg_clockid != -1)
+ hrtimer_cancel(&gact->hitimer);
+
+ release_entry_list(&p->entries);
+}
+
+static int dumping_entry(struct sk_buff *skb,
+ struct tcfg_gate_entry *entry)
+{
+ struct nlattr *item;
+
+ item = nla_nest_start_noflag(skb, TCA_GATE_ONE_ENTRY);
+ if (!item)
+ return -ENOSPC;
+
+ if (nla_put_u32(skb, TCA_GATE_ENTRY_INDEX, entry->index))
+ goto nla_put_failure;
+
+ if (entry->gate_state && nla_put_flag(skb, TCA_GATE_ENTRY_GATE))
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_GATE_ENTRY_INTERVAL, entry->interval))
+ goto nla_put_failure;
+
+ if (nla_put_s32(skb, TCA_GATE_ENTRY_MAX_OCTETS, entry->maxoctets))
+ goto nla_put_failure;
+
+ if (nla_put_s32(skb, TCA_GATE_ENTRY_IPV, entry->ipv))
+ goto nla_put_failure;
+
+ return nla_nest_end(skb, item);
+
+nla_put_failure:
+ nla_nest_cancel(skb, item);
+ return -1;
+}
+
+static int tcf_gate_dump(struct sk_buff *skb, struct tc_action *a,
+ int bind, int ref)
+{
+ unsigned char *b = skb_tail_pointer(skb);
+ struct tcf_gate *gact = to_gate(a);
+ struct tc_gate opt = {
+ .index = gact->tcf_index,
+ .refcnt = refcount_read(&gact->tcf_refcnt) - ref,
+ .bindcnt = atomic_read(&gact->tcf_bindcnt) - bind,
+ };
+ struct tcfg_gate_entry *entry;
+ struct tcf_gate_params *p;
+ struct nlattr *entry_list;
+ struct tcf_t t;
+
+ spin_lock_bh(&gact->tcf_lock);
+ opt.action = gact->tcf_action;
+
+ p = &gact->param;
+
+ if (nla_put(skb, TCA_GATE_PARMS, sizeof(opt), &opt))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(skb, TCA_GATE_BASE_TIME,
+ p->tcfg_basetime, TCA_GATE_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(skb, TCA_GATE_CYCLE_TIME,
+ p->tcfg_cycletime, TCA_GATE_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(skb, TCA_GATE_CYCLE_TIME_EXT,
+ p->tcfg_cycletime_ext, TCA_GATE_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_s32(skb, TCA_GATE_CLOCKID, p->tcfg_clockid))
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_GATE_FLAGS, p->tcfg_flags))
+ goto nla_put_failure;
+
+ if (nla_put_s32(skb, TCA_GATE_PRIORITY, p->tcfg_priority))
+ goto nla_put_failure;
+
+ entry_list = nla_nest_start_noflag(skb, TCA_GATE_ENTRY_LIST);
+ if (!entry_list)
+ goto nla_put_failure;
+
+ list_for_each_entry(entry, &p->entries, list) {
+ if (dumping_entry(skb, entry) < 0)
+ goto nla_put_failure;
+ }
+
+ nla_nest_end(skb, entry_list);
+
+ tcf_tm_dump(&t, &gact->tcf_tm);
+ if (nla_put_64bit(skb, TCA_GATE_TM, sizeof(t), &t, TCA_GATE_PAD))
+ goto nla_put_failure;
+ spin_unlock_bh(&gact->tcf_lock);
+
+ return skb->len;
+
+nla_put_failure:
+ spin_unlock_bh(&gact->tcf_lock);
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+static int tcf_gate_walker(struct net *net, struct sk_buff *skb,
+ struct netlink_callback *cb, int type,
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
+{
+ struct tc_action_net *tn = net_generic(net, gate_net_id);
+
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
+}
+
+static void tcf_gate_stats_update(struct tc_action *a, u64 bytes, u32 packets,
+ u64 lastuse, bool hw)
+{
+ struct tcf_gate *gact = to_gate(a);
+ struct tcf_t *tm = &gact->tcf_tm;
+
+ tcf_action_update_stats(a, bytes, packets, false, hw);
+ tm->lastuse = max_t(u64, tm->lastuse, lastuse);
+}
+
+static int tcf_gate_search(struct net *net, struct tc_action **a, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, gate_net_id);
+
+ return tcf_idr_search(tn, a, index);
+}
+
+static size_t tcf_gate_get_fill_size(const struct tc_action *act)
+{
+ return nla_total_size(sizeof(struct tc_gate));
+}
+
+static struct tc_action_ops act_gate_ops = {
+ .kind = "gate",
+ .id = TCA_ID_GATE,
+ .owner = THIS_MODULE,
+ .act = tcf_gate_act,
+ .dump = tcf_gate_dump,
+ .init = tcf_gate_init,
+ .cleanup = tcf_gate_cleanup,
+ .walk = tcf_gate_walker,
+ .stats_update = tcf_gate_stats_update,
+ .get_fill_size = tcf_gate_get_fill_size,
+ .lookup = tcf_gate_search,
+ .size = sizeof(struct tcf_gate),
+};
+
+static __net_init int gate_init_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, gate_net_id);
+
+ return tc_action_net_init(net, tn, &act_gate_ops);
+}
+
+static void __net_exit gate_exit_net(struct list_head *net_list)
+{
+ tc_action_net_exit(net_list, gate_net_id);
+}
+
+static struct pernet_operations gate_net_ops = {
+ .init = gate_init_net,
+ .exit_batch = gate_exit_net,
+ .id = &gate_net_id,
+ .size = sizeof(struct tc_action_net),
+};
+
+static int __init gate_init_module(void)
+{
+ return tcf_register_action(&act_gate_ops, &gate_net_ops);
+}
+
+static void __exit gate_cleanup_module(void)
+{
+ tcf_unregister_action(&act_gate_ops, &gate_net_ops);
+}
+
+module_init(gate_init_module);
+module_exit(gate_cleanup_module);
+MODULE_LICENSE("GPL v2");
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 0a7ecc292bd3..a00a203b2ef5 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -39,6 +39,7 @@
#include <net/tc_act/tc_skbedit.h>
#include <net/tc_act/tc_ct.h>
#include <net/tc_act/tc_mpls.h>
+#include <net/tc_act/tc_gate.h>
#include <net/flow_offload.h>
extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
@@ -620,96 +621,42 @@ static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
static int tcf_block_setup(struct tcf_block *block,
struct flow_block_offload *bo);
-static void tc_indr_block_cmd(struct net_device *dev, struct tcf_block *block,
- flow_indr_block_bind_cb_t *cb, void *cb_priv,
- enum flow_block_command command, bool ingress)
-{
- struct flow_block_offload bo = {
- .command = command,
- .binder_type = ingress ?
- FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS :
- FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS,
- .net = dev_net(dev),
- .block_shared = tcf_block_non_null_shared(block),
- };
- INIT_LIST_HEAD(&bo.cb_list);
-
- if (!block)
- return;
-
- bo.block = &block->flow_block;
-
- down_write(&block->cb_lock);
- cb(dev, cb_priv, TC_SETUP_BLOCK, &bo);
-
- tcf_block_setup(block, &bo);
- up_write(&block->cb_lock);
-}
-
-static struct tcf_block *tc_dev_block(struct net_device *dev, bool ingress)
+static void tcf_block_offload_init(struct flow_block_offload *bo,
+ struct net_device *dev,
+ enum flow_block_command command,
+ enum flow_block_binder_type binder_type,
+ struct flow_block *flow_block,
+ bool shared, struct netlink_ext_ack *extack)
{
- const struct Qdisc_class_ops *cops;
- const struct Qdisc_ops *ops;
- struct Qdisc *qdisc;
-
- if (!dev_ingress_queue(dev))
- return NULL;
-
- qdisc = dev_ingress_queue(dev)->qdisc_sleeping;
- if (!qdisc)
- return NULL;
-
- ops = qdisc->ops;
- if (!ops)
- return NULL;
-
- if (!ingress && !strcmp("ingress", ops->id))
- return NULL;
-
- cops = ops->cl_ops;
- if (!cops)
- return NULL;
-
- if (!cops->tcf_block)
- return NULL;
-
- return cops->tcf_block(qdisc,
- ingress ? TC_H_MIN_INGRESS : TC_H_MIN_EGRESS,
- NULL);
+ bo->net = dev_net(dev);
+ bo->command = command;
+ bo->binder_type = binder_type;
+ bo->block = flow_block;
+ bo->block_shared = shared;
+ bo->extack = extack;
+ INIT_LIST_HEAD(&bo->cb_list);
}
-static void tc_indr_block_get_and_cmd(struct net_device *dev,
- flow_indr_block_bind_cb_t *cb,
- void *cb_priv,
- enum flow_block_command command)
-{
- struct tcf_block *block;
-
- block = tc_dev_block(dev, true);
- tc_indr_block_cmd(dev, block, cb, cb_priv, command, true);
-
- block = tc_dev_block(dev, false);
- tc_indr_block_cmd(dev, block, cb, cb_priv, command, false);
-}
+static void tcf_block_unbind(struct tcf_block *block,
+ struct flow_block_offload *bo);
-static void tc_indr_block_call(struct tcf_block *block,
- struct net_device *dev,
- struct tcf_block_ext_info *ei,
- enum flow_block_command command,
- struct netlink_ext_ack *extack)
+static void tc_block_indr_cleanup(struct flow_block_cb *block_cb)
{
- struct flow_block_offload bo = {
- .command = command,
- .binder_type = ei->binder_type,
- .net = dev_net(dev),
- .block = &block->flow_block,
- .block_shared = tcf_block_shared(block),
- .extack = extack,
- };
- INIT_LIST_HEAD(&bo.cb_list);
+ struct tcf_block *block = block_cb->indr.data;
+ struct net_device *dev = block_cb->indr.dev;
+ struct netlink_ext_ack extack = {};
+ struct flow_block_offload bo;
- flow_indr_block_call(dev, &bo, command, TC_SETUP_BLOCK);
- tcf_block_setup(block, &bo);
+ tcf_block_offload_init(&bo, dev, FLOW_BLOCK_UNBIND,
+ block_cb->indr.binder_type,
+ &block->flow_block, tcf_block_shared(block),
+ &extack);
+ down_write(&block->cb_lock);
+ list_move(&block_cb->list, &bo.cb_list);
+ up_write(&block->cb_lock);
+ rtnl_lock();
+ tcf_block_unbind(block, &bo);
+ rtnl_unlock();
}
static bool tcf_block_offload_in_use(struct tcf_block *block)
@@ -726,17 +673,21 @@ static int tcf_block_offload_cmd(struct tcf_block *block,
struct flow_block_offload bo = {};
int err;
- bo.net = dev_net(dev);
- bo.command = command;
- bo.binder_type = ei->binder_type;
- bo.block = &block->flow_block;
- bo.block_shared = tcf_block_shared(block);
- bo.extack = extack;
- INIT_LIST_HEAD(&bo.cb_list);
+ tcf_block_offload_init(&bo, dev, command, ei->binder_type,
+ &block->flow_block, tcf_block_shared(block),
+ extack);
- err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
- if (err < 0)
+ if (dev->netdev_ops->ndo_setup_tc)
+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
+ else
+ err = flow_indr_dev_setup_offload(dev, TC_SETUP_BLOCK, block,
+ &bo, tc_block_indr_cleanup);
+
+ if (err < 0) {
+ if (err != -EOPNOTSUPP)
+ NL_SET_ERR_MSG(extack, "Driver ndo_setup_tc failed");
return err;
+ }
return tcf_block_setup(block, &bo);
}
@@ -749,13 +700,13 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
int err;
down_write(&block->cb_lock);
- if (!dev->netdev_ops->ndo_setup_tc)
- goto no_offload_dev_inc;
/* If tc offload feature is disabled and the block we try to bind
* to already has some offloaded filters, forbid to bind.
*/
- if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) {
+ if (dev->netdev_ops->ndo_setup_tc &&
+ !tc_can_offload(dev) &&
+ tcf_block_offload_in_use(block)) {
NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled");
err = -EOPNOTSUPP;
goto err_unlock;
@@ -767,18 +718,15 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
if (err)
goto err_unlock;
- tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack);
up_write(&block->cb_lock);
return 0;
no_offload_dev_inc:
- if (tcf_block_offload_in_use(block)) {
- err = -EOPNOTSUPP;
+ if (tcf_block_offload_in_use(block))
goto err_unlock;
- }
+
err = 0;
block->nooffloaddevcnt++;
- tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack);
err_unlock:
up_write(&block->cb_lock);
return err;
@@ -791,10 +739,6 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
int err;
down_write(&block->cb_lock);
- tc_indr_block_call(block, dev, ei, FLOW_BLOCK_UNBIND, NULL);
-
- if (!dev->netdev_ops->ndo_setup_tc)
- goto no_offload_dev_dec;
err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_UNBIND, NULL);
if (err == -EOPNOTSUPP)
goto no_offload_dev_dec;
@@ -1847,7 +1791,7 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
struct tcf_proto *tp, struct tcf_block *block,
struct Qdisc *q, u32 parent, void *fh,
u32 portid, u32 seq, u16 flags, int event,
- bool rtnl_held)
+ bool terse_dump, bool rtnl_held)
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
@@ -1874,6 +1818,14 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
goto nla_put_failure;
if (!fh) {
tcm->tcm_handle = 0;
+ } else if (terse_dump) {
+ if (tp->ops->terse_dump) {
+ if (tp->ops->terse_dump(net, tp, fh, skb, tcm,
+ rtnl_held) < 0)
+ goto nla_put_failure;
+ } else {
+ goto cls_op_not_supp;
+ }
} else {
if (tp->ops->dump &&
tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0)
@@ -1884,6 +1836,7 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
out_nlmsg_trim:
nla_put_failure:
+cls_op_not_supp:
nlmsg_trim(skb, b);
return -1;
}
@@ -1904,7 +1857,7 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
n->nlmsg_seq, n->nlmsg_flags, event,
- rtnl_held) <= 0) {
+ false, rtnl_held) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
@@ -1936,7 +1889,7 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
- rtnl_held) <= 0) {
+ false, rtnl_held) <= 0) {
NL_SET_ERR_MSG(extack, "Failed to build del event notification");
kfree_skb(skb);
return -EINVAL;
@@ -2497,6 +2450,7 @@ struct tcf_dump_args {
struct tcf_block *block;
struct Qdisc *q;
u32 parent;
+ bool terse_dump;
};
static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
@@ -2507,12 +2461,12 @@ static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
n, NETLINK_CB(a->cb->skb).portid,
a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWTFILTER, true);
+ RTM_NEWTFILTER, a->terse_dump, true);
}
static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
struct sk_buff *skb, struct netlink_callback *cb,
- long index_start, long *p_index)
+ long index_start, long *p_index, bool terse)
{
struct net *net = sock_net(skb->sk);
struct tcf_block *block = chain->block;
@@ -2541,7 +2495,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWTFILTER, true) <= 0)
+ RTM_NEWTFILTER, false, true) <= 0)
goto errout;
cb->args[1] = 1;
}
@@ -2557,6 +2511,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
arg.w.skip = cb->args[1] - 1;
arg.w.count = 0;
arg.w.cookie = cb->args[2];
+ arg.terse_dump = terse;
tp->ops->walk(tp, &arg.w, true);
cb->args[2] = arg.w.cookie;
cb->args[1] = arg.w.count + 1;
@@ -2570,6 +2525,10 @@ errout:
return false;
}
+static const struct nla_policy tcf_tfilter_dump_policy[TCA_MAX + 1] = {
+ [TCA_DUMP_FLAGS] = NLA_POLICY_BITFIELD32(TCA_DUMP_FLAGS_TERSE),
+};
+
/* called with RTNL */
static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
{
@@ -2579,6 +2538,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
struct Qdisc *q = NULL;
struct tcf_block *block;
struct tcmsg *tcm = nlmsg_data(cb->nlh);
+ bool terse_dump = false;
long index_start;
long index;
u32 parent;
@@ -2588,10 +2548,17 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
- NULL, cb->extack);
+ tcf_tfilter_dump_policy, cb->extack);
if (err)
return err;
+ if (tca[TCA_DUMP_FLAGS]) {
+ struct nla_bitfield32 flags =
+ nla_get_bitfield32(tca[TCA_DUMP_FLAGS]);
+
+ terse_dump = flags.value & TCA_DUMP_FLAGS_TERSE;
+ }
+
if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
if (!block)
@@ -2649,7 +2616,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
nla_get_u32(tca[TCA_CHAIN]) != chain->index)
continue;
if (!tcf_chain_dump(chain, q, parent, skb, cb,
- index_start, &index)) {
+ index_start, &index, terse_dump)) {
tcf_chain_put(chain);
err = -EMSGSIZE;
break;
@@ -3152,7 +3119,8 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
if (nest == NULL)
goto nla_put_failure;
- if (tcf_action_dump(skb, exts->actions, 0, 0) < 0)
+ if (tcf_action_dump(skb, exts->actions, 0, 0, false)
+ < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
} else if (exts->police) {
@@ -3176,6 +3144,31 @@ nla_put_failure:
}
EXPORT_SYMBOL(tcf_exts_dump);
+int tcf_exts_terse_dump(struct sk_buff *skb, struct tcf_exts *exts)
+{
+#ifdef CONFIG_NET_CLS_ACT
+ struct nlattr *nest;
+
+ if (!exts->action || !tcf_exts_has_actions(exts))
+ return 0;
+
+ nest = nla_nest_start_noflag(skb, exts->action);
+ if (!nest)
+ goto nla_put_failure;
+
+ if (tcf_action_dump(skb, exts->actions, 0, 0, true) < 0)
+ goto nla_put_failure;
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -1;
+#else
+ return 0;
+#endif
+}
+EXPORT_SYMBOL(tcf_exts_terse_dump);
int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
{
@@ -3523,6 +3516,27 @@ static void tcf_sample_get_group(struct flow_action_entry *entry,
#endif
}
+static void tcf_gate_entry_destructor(void *priv)
+{
+ struct action_gate_entry *oe = priv;
+
+ kfree(oe);
+}
+
+static int tcf_gate_get_entries(struct flow_action_entry *entry,
+ const struct tc_action *act)
+{
+ entry->gate.entries = tcf_gate_get_list(act);
+
+ if (!entry->gate.entries)
+ return -EINVAL;
+
+ entry->destructor = tcf_gate_entry_destructor;
+ entry->destructor_priv = entry->gate.entries;
+
+ return 0;
+}
+
static enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats)
{
if (WARN_ON_ONCE(hw_stats > TCA_ACT_HW_STATS_ANY))
@@ -3679,6 +3693,17 @@ int tc_setup_flow_action(struct flow_action *flow_action,
} else if (is_tcf_skbedit_priority(act)) {
entry->id = FLOW_ACTION_PRIORITY;
entry->priority = tcf_skbedit_priority(act);
+ } else if (is_tcf_gate(act)) {
+ entry->id = FLOW_ACTION_GATE;
+ entry->gate.index = tcf_gate_index(act);
+ entry->gate.prio = tcf_gate_prio(act);
+ entry->gate.basetime = tcf_gate_basetime(act);
+ entry->gate.cycletime = tcf_gate_cycletime(act);
+ entry->gate.cycletimeext = tcf_gate_cycletimeext(act);
+ entry->gate.num_entries = tcf_gate_num_entries(act);
+ err = tcf_gate_get_entries(entry, act);
+ if (err)
+ goto err_out;
} else {
err = -EOPNOTSUPP;
goto err_out_locked;
@@ -3739,11 +3764,6 @@ static struct pernet_operations tcf_net_ops = {
.size = sizeof(struct tcf_net),
};
-static struct flow_indr_block_entry block_entry = {
- .cb = tc_indr_block_get_and_cmd,
- .list = LIST_HEAD_INIT(block_entry.list),
-};
-
static int __init tc_filter_init(void)
{
int err;
@@ -3756,8 +3776,6 @@ static int __init tc_filter_init(void)
if (err)
goto err_register_pernet_subsys;
- flow_indr_add_block_cb(&block_entry);
-
rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
RTNL_FLAG_DOIT_UNLOCKED);
rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 74a0febcafb8..b2da37286082 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -272,14 +272,16 @@ static struct cls_fl_filter *fl_lookup_range(struct fl_flow_mask *mask,
return NULL;
}
-static struct cls_fl_filter *fl_lookup(struct fl_flow_mask *mask,
- struct fl_flow_key *mkey,
- struct fl_flow_key *key)
+static noinline_for_stack
+struct cls_fl_filter *fl_mask_lookup(struct fl_flow_mask *mask, struct fl_flow_key *key)
{
+ struct fl_flow_key mkey;
+
+ fl_set_masked_key(&mkey, key, mask);
if ((mask->flags & TCA_FLOWER_MASK_FLAGS_RANGE))
- return fl_lookup_range(mask, mkey, key);
+ return fl_lookup_range(mask, &mkey, key);
- return __fl_lookup(mask, mkey);
+ return __fl_lookup(mask, &mkey);
}
static u16 fl_ct_info_to_flower_map[] = {
@@ -299,7 +301,6 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
struct cls_fl_head *head = rcu_dereference_bh(tp->root);
- struct fl_flow_key skb_mkey;
struct fl_flow_key skb_key;
struct fl_flow_mask *mask;
struct cls_fl_filter *f;
@@ -319,9 +320,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
ARRAY_SIZE(fl_ct_info_to_flower_map));
skb_flow_dissect(skb, &mask->dissector, &skb_key, 0);
- fl_set_masked_key(&skb_mkey, &skb_key, mask);
-
- f = fl_lookup(mask, &skb_mkey, &skb_key);
+ f = fl_mask_lookup(mask, &skb_key);
if (f && !tc_skip_sw(f->flags)) {
*res = f->res;
return tcf_exts_exec(skb, &f->exts, res);
@@ -668,6 +667,7 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_MPLS_BOS] = { .type = NLA_U8 },
[TCA_FLOWER_KEY_MPLS_TC] = { .type = NLA_U8 },
[TCA_FLOWER_KEY_MPLS_LABEL] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_MPLS_OPTS] = { .type = NLA_NESTED },
[TCA_FLOWER_KEY_TCP_FLAGS] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_TCP_FLAGS_MASK] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_IP_TOS] = { .type = NLA_U8 },
@@ -726,6 +726,15 @@ erspan_opt_policy[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX + 1] = {
[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID] = { .type = NLA_U8 },
};
+static const struct nla_policy
+mpls_stack_entry_policy[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1] = {
+ [TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_MPLS_OPT_LSE_TC] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL] = { .type = NLA_U32 },
+};
+
static void fl_set_key_val(struct nlattr **tb,
void *val, int val_type,
void *mask, int mask_type, int len)
@@ -776,14 +785,157 @@ static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key,
return 0;
}
+static int fl_set_key_mpls_lse(const struct nlattr *nla_lse,
+ struct flow_dissector_key_mpls *key_val,
+ struct flow_dissector_key_mpls *key_mask,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1];
+ struct flow_dissector_mpls_lse *lse_mask;
+ struct flow_dissector_mpls_lse *lse_val;
+ u8 lse_index;
+ u8 depth;
+ int err;
+
+ err = nla_parse_nested(tb, TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX, nla_lse,
+ mpls_stack_entry_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (!tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH]) {
+ NL_SET_ERR_MSG(extack, "Missing MPLS option \"depth\"");
+ return -EINVAL;
+ }
+
+ depth = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH]);
+
+ /* LSE depth starts at 1, for consistency with terminology used by
+ * RFC 3031 (section 3.9), where depth 0 refers to unlabeled packets.
+ */
+ if (depth < 1 || depth > FLOW_DIS_MPLS_MAX) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH],
+ "Invalid MPLS depth");
+ return -EINVAL;
+ }
+ lse_index = depth - 1;
+
+ dissector_set_mpls_lse(key_val, lse_index);
+ dissector_set_mpls_lse(key_mask, lse_index);
+
+ lse_val = &key_val->ls[lse_index];
+ lse_mask = &key_mask->ls[lse_index];
+
+ if (tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL]) {
+ lse_val->mpls_ttl = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL]);
+ lse_mask->mpls_ttl = MPLS_TTL_MASK;
+ }
+ if (tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS]) {
+ u8 bos = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS]);
+
+ if (bos & ~MPLS_BOS_MASK) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS],
+ "Bottom Of Stack (BOS) must be 0 or 1");
+ return -EINVAL;
+ }
+ lse_val->mpls_bos = bos;
+ lse_mask->mpls_bos = MPLS_BOS_MASK;
+ }
+ if (tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TC]) {
+ u8 tc = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TC]);
+
+ if (tc & ~MPLS_TC_MASK) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TC],
+ "Traffic Class (TC) must be between 0 and 7");
+ return -EINVAL;
+ }
+ lse_val->mpls_tc = tc;
+ lse_mask->mpls_tc = MPLS_TC_MASK;
+ }
+ if (tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL]) {
+ u32 label = nla_get_u32(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL]);
+
+ if (label & ~MPLS_LABEL_MASK) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL],
+ "Label must be between 0 and 1048575");
+ return -EINVAL;
+ }
+ lse_val->mpls_label = label;
+ lse_mask->mpls_label = MPLS_LABEL_MASK;
+ }
+
+ return 0;
+}
+
+static int fl_set_key_mpls_opts(const struct nlattr *nla_mpls_opts,
+ struct flow_dissector_key_mpls *key_val,
+ struct flow_dissector_key_mpls *key_mask,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *nla_lse;
+ int rem;
+ int err;
+
+ if (!(nla_mpls_opts->nla_type & NLA_F_NESTED)) {
+ NL_SET_ERR_MSG_ATTR(extack, nla_mpls_opts,
+ "NLA_F_NESTED is missing");
+ return -EINVAL;
+ }
+
+ nla_for_each_nested(nla_lse, nla_mpls_opts, rem) {
+ if (nla_type(nla_lse) != TCA_FLOWER_KEY_MPLS_OPTS_LSE) {
+ NL_SET_ERR_MSG_ATTR(extack, nla_lse,
+ "Invalid MPLS option type");
+ return -EINVAL;
+ }
+
+ err = fl_set_key_mpls_lse(nla_lse, key_val, key_mask, extack);
+ if (err < 0)
+ return err;
+ }
+ if (rem) {
+ NL_SET_ERR_MSG(extack,
+ "Bytes leftover after parsing MPLS options");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int fl_set_key_mpls(struct nlattr **tb,
struct flow_dissector_key_mpls *key_val,
struct flow_dissector_key_mpls *key_mask,
struct netlink_ext_ack *extack)
{
+ struct flow_dissector_mpls_lse *lse_mask;
+ struct flow_dissector_mpls_lse *lse_val;
+
+ if (tb[TCA_FLOWER_KEY_MPLS_OPTS]) {
+ if (tb[TCA_FLOWER_KEY_MPLS_TTL] ||
+ tb[TCA_FLOWER_KEY_MPLS_BOS] ||
+ tb[TCA_FLOWER_KEY_MPLS_TC] ||
+ tb[TCA_FLOWER_KEY_MPLS_LABEL]) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ tb[TCA_FLOWER_KEY_MPLS_OPTS],
+ "MPLS label, Traffic Class, Bottom Of Stack and Time To Live must be encapsulated in the MPLS options attribute");
+ return -EBADMSG;
+ }
+
+ return fl_set_key_mpls_opts(tb[TCA_FLOWER_KEY_MPLS_OPTS],
+ key_val, key_mask, extack);
+ }
+
+ lse_val = &key_val->ls[0];
+ lse_mask = &key_mask->ls[0];
+
if (tb[TCA_FLOWER_KEY_MPLS_TTL]) {
- key_val->mpls_ttl = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TTL]);
- key_mask->mpls_ttl = MPLS_TTL_MASK;
+ lse_val->mpls_ttl = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TTL]);
+ lse_mask->mpls_ttl = MPLS_TTL_MASK;
+ dissector_set_mpls_lse(key_val, 0);
+ dissector_set_mpls_lse(key_mask, 0);
}
if (tb[TCA_FLOWER_KEY_MPLS_BOS]) {
u8 bos = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_BOS]);
@@ -794,8 +946,10 @@ static int fl_set_key_mpls(struct nlattr **tb,
"Bottom Of Stack (BOS) must be 0 or 1");
return -EINVAL;
}
- key_val->mpls_bos = bos;
- key_mask->mpls_bos = MPLS_BOS_MASK;
+ lse_val->mpls_bos = bos;
+ lse_mask->mpls_bos = MPLS_BOS_MASK;
+ dissector_set_mpls_lse(key_val, 0);
+ dissector_set_mpls_lse(key_mask, 0);
}
if (tb[TCA_FLOWER_KEY_MPLS_TC]) {
u8 tc = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TC]);
@@ -806,8 +960,10 @@ static int fl_set_key_mpls(struct nlattr **tb,
"Traffic Class (TC) must be between 0 and 7");
return -EINVAL;
}
- key_val->mpls_tc = tc;
- key_mask->mpls_tc = MPLS_TC_MASK;
+ lse_val->mpls_tc = tc;
+ lse_mask->mpls_tc = MPLS_TC_MASK;
+ dissector_set_mpls_lse(key_val, 0);
+ dissector_set_mpls_lse(key_mask, 0);
}
if (tb[TCA_FLOWER_KEY_MPLS_LABEL]) {
u32 label = nla_get_u32(tb[TCA_FLOWER_KEY_MPLS_LABEL]);
@@ -818,8 +974,10 @@ static int fl_set_key_mpls(struct nlattr **tb,
"Label must be between 0 and 1048575");
return -EINVAL;
}
- key_val->mpls_label = label;
- key_mask->mpls_label = MPLS_LABEL_MASK;
+ lse_val->mpls_label = label;
+ lse_mask->mpls_label = MPLS_LABEL_MASK;
+ dissector_set_mpls_lse(key_val, 0);
+ dissector_set_mpls_lse(key_mask, 0);
}
return 0;
}
@@ -2218,35 +2376,132 @@ static int fl_dump_key_port_range(struct sk_buff *skb, struct fl_flow_key *key,
return 0;
}
+static int fl_dump_key_mpls_opt_lse(struct sk_buff *skb,
+ struct flow_dissector_key_mpls *mpls_key,
+ struct flow_dissector_key_mpls *mpls_mask,
+ u8 lse_index)
+{
+ struct flow_dissector_mpls_lse *lse_mask = &mpls_mask->ls[lse_index];
+ struct flow_dissector_mpls_lse *lse_key = &mpls_key->ls[lse_index];
+ int err;
+
+ err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH,
+ lse_index + 1);
+ if (err)
+ return err;
+
+ if (lse_mask->mpls_ttl) {
+ err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL,
+ lse_key->mpls_ttl);
+ if (err)
+ return err;
+ }
+ if (lse_mask->mpls_bos) {
+ err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS,
+ lse_key->mpls_bos);
+ if (err)
+ return err;
+ }
+ if (lse_mask->mpls_tc) {
+ err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_TC,
+ lse_key->mpls_tc);
+ if (err)
+ return err;
+ }
+ if (lse_mask->mpls_label) {
+ err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL,
+ lse_key->mpls_label);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int fl_dump_key_mpls_opts(struct sk_buff *skb,
+ struct flow_dissector_key_mpls *mpls_key,
+ struct flow_dissector_key_mpls *mpls_mask)
+{
+ struct nlattr *opts;
+ struct nlattr *lse;
+ u8 lse_index;
+ int err;
+
+ opts = nla_nest_start(skb, TCA_FLOWER_KEY_MPLS_OPTS);
+ if (!opts)
+ return -EMSGSIZE;
+
+ for (lse_index = 0; lse_index < FLOW_DIS_MPLS_MAX; lse_index++) {
+ if (!(mpls_mask->used_lses & 1 << lse_index))
+ continue;
+
+ lse = nla_nest_start(skb, TCA_FLOWER_KEY_MPLS_OPTS_LSE);
+ if (!lse) {
+ err = -EMSGSIZE;
+ goto err_opts;
+ }
+
+ err = fl_dump_key_mpls_opt_lse(skb, mpls_key, mpls_mask,
+ lse_index);
+ if (err)
+ goto err_opts_lse;
+ nla_nest_end(skb, lse);
+ }
+ nla_nest_end(skb, opts);
+
+ return 0;
+
+err_opts_lse:
+ nla_nest_cancel(skb, lse);
+err_opts:
+ nla_nest_cancel(skb, opts);
+
+ return err;
+}
+
static int fl_dump_key_mpls(struct sk_buff *skb,
struct flow_dissector_key_mpls *mpls_key,
struct flow_dissector_key_mpls *mpls_mask)
{
+ struct flow_dissector_mpls_lse *lse_mask;
+ struct flow_dissector_mpls_lse *lse_key;
int err;
- if (!memchr_inv(mpls_mask, 0, sizeof(*mpls_mask)))
+ if (!mpls_mask->used_lses)
return 0;
- if (mpls_mask->mpls_ttl) {
+
+ lse_mask = &mpls_mask->ls[0];
+ lse_key = &mpls_key->ls[0];
+
+ /* For backward compatibility, don't use the MPLS nested attributes if
+ * the rule can be expressed using the old attributes.
+ */
+ if (mpls_mask->used_lses & ~1 ||
+ (!lse_mask->mpls_ttl && !lse_mask->mpls_bos &&
+ !lse_mask->mpls_tc && !lse_mask->mpls_label))
+ return fl_dump_key_mpls_opts(skb, mpls_key, mpls_mask);
+
+ if (lse_mask->mpls_ttl) {
err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_TTL,
- mpls_key->mpls_ttl);
+ lse_key->mpls_ttl);
if (err)
return err;
}
- if (mpls_mask->mpls_tc) {
+ if (lse_mask->mpls_tc) {
err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_TC,
- mpls_key->mpls_tc);
+ lse_key->mpls_tc);
if (err)
return err;
}
- if (mpls_mask->mpls_label) {
+ if (lse_mask->mpls_label) {
err = nla_put_u32(skb, TCA_FLOWER_KEY_MPLS_LABEL,
- mpls_key->mpls_label);
+ lse_key->mpls_label);
if (err)
return err;
}
- if (mpls_mask->mpls_bos) {
+ if (lse_mask->mpls_bos) {
err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_BOS,
- mpls_key->mpls_bos);
+ lse_key->mpls_bos);
if (err)
return err;
}
@@ -2768,6 +3023,48 @@ nla_put_failure:
return -1;
}
+static int fl_terse_dump(struct net *net, struct tcf_proto *tp, void *fh,
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
+{
+ struct cls_fl_filter *f = fh;
+ struct nlattr *nest;
+ bool skip_hw;
+
+ if (!f)
+ return skb->len;
+
+ t->tcm_handle = f->handle;
+
+ nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
+ if (!nest)
+ goto nla_put_failure;
+
+ spin_lock(&tp->lock);
+
+ skip_hw = tc_skip_hw(f->flags);
+
+ if (f->flags && nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags))
+ goto nla_put_failure_locked;
+
+ spin_unlock(&tp->lock);
+
+ if (!skip_hw)
+ fl_hw_update_stats(tp, f, rtnl_held);
+
+ if (tcf_exts_terse_dump(skb, &f->exts))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+
+ return skb->len;
+
+nla_put_failure_locked:
+ spin_unlock(&tp->lock);
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -1;
+}
+
static int fl_tmplt_dump(struct sk_buff *skb, struct net *net, void *tmplt_priv)
{
struct fl_flow_tmplt *tmplt = tmplt_priv;
@@ -2832,6 +3129,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
.hw_add = fl_hw_add,
.hw_del = fl_hw_del,
.dump = fl_dump,
+ .terse_dump = fl_terse_dump,
.bind_class = fl_bind_class,
.tmplt_create = fl_tmplt_create,
.tmplt_destroy = fl_tmplt_destroy,
diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c
index eecfe072c508..18755d29fd15 100644
--- a/net/sched/em_ipt.c
+++ b/net/sched/em_ipt.c
@@ -199,7 +199,7 @@ static void em_ipt_destroy(struct tcf_ematch *em)
im->match->destroy(&par);
}
module_put(im->match->me);
- kfree((void *)im);
+ kfree(im);
}
static int em_ipt_match(struct sk_buff *skb, struct tcf_ematch *em,
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 0d99df1e764d..9a3449b56bd6 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -32,6 +32,8 @@
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+#include <trace/events/qdisc.h>
+
/*
Short review.
@@ -1283,6 +1285,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
}
qdisc_hash_add(sch, false);
+ trace_qdisc_create(ops, dev, parent);
return sch;
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 1496e87cd07b..60f8ae578819 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -584,26 +584,48 @@ static bool cobalt_should_drop(struct cobalt_vars *vars,
return drop;
}
-static void cake_update_flowkeys(struct flow_keys *keys,
+static bool cake_update_flowkeys(struct flow_keys *keys,
const struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
struct nf_conntrack_tuple tuple = {};
- bool rev = !skb->_nfct;
+ bool rev = !skb->_nfct, upd = false;
+ __be32 ip;
if (tc_skb_protocol(skb) != htons(ETH_P_IP))
- return;
+ return false;
if (!nf_ct_get_tuple_skb(&tuple, skb))
- return;
+ return false;
- keys->addrs.v4addrs.src = rev ? tuple.dst.u3.ip : tuple.src.u3.ip;
- keys->addrs.v4addrs.dst = rev ? tuple.src.u3.ip : tuple.dst.u3.ip;
+ ip = rev ? tuple.dst.u3.ip : tuple.src.u3.ip;
+ if (ip != keys->addrs.v4addrs.src) {
+ keys->addrs.v4addrs.src = ip;
+ upd = true;
+ }
+ ip = rev ? tuple.src.u3.ip : tuple.dst.u3.ip;
+ if (ip != keys->addrs.v4addrs.dst) {
+ keys->addrs.v4addrs.dst = ip;
+ upd = true;
+ }
if (keys->ports.ports) {
- keys->ports.src = rev ? tuple.dst.u.all : tuple.src.u.all;
- keys->ports.dst = rev ? tuple.src.u.all : tuple.dst.u.all;
+ __be16 port;
+
+ port = rev ? tuple.dst.u.all : tuple.src.u.all;
+ if (port != keys->ports.src) {
+ keys->ports.src = port;
+ upd = true;
+ }
+ port = rev ? tuple.src.u.all : tuple.dst.u.all;
+ if (port != keys->ports.dst) {
+ port = keys->ports.dst;
+ upd = true;
+ }
}
+ return upd;
+#else
+ return false;
#endif
}
@@ -624,23 +646,36 @@ static bool cake_ddst(int flow_mode)
static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
int flow_mode, u16 flow_override, u16 host_override)
{
+ bool hash_flows = (!flow_override && !!(flow_mode & CAKE_FLOW_FLOWS));
+ bool hash_hosts = (!host_override && !!(flow_mode & CAKE_FLOW_HOSTS));
+ bool nat_enabled = !!(flow_mode & CAKE_FLOW_NAT_FLAG);
u32 flow_hash = 0, srchost_hash = 0, dsthost_hash = 0;
u16 reduced_hash, srchost_idx, dsthost_idx;
struct flow_keys keys, host_keys;
+ bool use_skbhash = skb->l4_hash;
if (unlikely(flow_mode == CAKE_FLOW_NONE))
return 0;
- /* If both overrides are set we can skip packet dissection entirely */
- if ((flow_override || !(flow_mode & CAKE_FLOW_FLOWS)) &&
- (host_override || !(flow_mode & CAKE_FLOW_HOSTS)))
+ /* If both overrides are set, or we can use the SKB hash and nat mode is
+ * disabled, we can skip packet dissection entirely. If nat mode is
+ * enabled there's another check below after doing the conntrack lookup.
+ */
+ if ((!hash_flows || (use_skbhash && !nat_enabled)) && !hash_hosts)
goto skip_hash;
skb_flow_dissect_flow_keys(skb, &keys,
FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
- if (flow_mode & CAKE_FLOW_NAT_FLAG)
- cake_update_flowkeys(&keys, skb);
+ /* Don't use the SKB hash if we change the lookup keys from conntrack */
+ if (nat_enabled && cake_update_flowkeys(&keys, skb))
+ use_skbhash = false;
+
+ /* If we can still use the SKB hash and don't need the host hash, we can
+ * skip the rest of the hashing procedure
+ */
+ if (use_skbhash && !hash_hosts)
+ goto skip_hash;
/* flow_hash_from_keys() sorts the addresses by value, so we have
* to preserve their order in a separate data structure to treat
@@ -679,12 +714,14 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
/* This *must* be after the above switch, since as a
* side-effect it sorts the src and dst addresses.
*/
- if (flow_mode & CAKE_FLOW_FLOWS)
+ if (hash_flows && !use_skbhash)
flow_hash = flow_hash_from_keys(&keys);
skip_hash:
if (flow_override)
flow_hash = flow_override - 1;
+ else if (use_skbhash)
+ flow_hash = skb->hash;
if (host_override) {
dsthost_hash = host_override - 1;
srchost_hash = host_override - 1;
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 1bcf8fbfd40e..bd618b00d319 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -131,7 +131,6 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx,
}
struct choke_skb_cb {
- u16 classid;
u8 keys_valid;
struct flow_keys_digest keys;
};
@@ -142,11 +141,6 @@ static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
}
-static inline void choke_set_classid(struct sk_buff *skb, u16 classid)
-{
- choke_skb_cb(skb)->classid = classid;
-}
-
/*
* Compare flow of two packets
* Returns true only if source and destination address and port match.
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 4c060134c736..8f06a808c59a 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -66,22 +66,27 @@ static inline struct fq_skb_cb *fq_skb_cb(struct sk_buff *skb)
* in linear list (head,tail), otherwise are placed in a rbtree (t_root).
*/
struct fq_flow {
+/* First cache line : used in fq_gc(), fq_enqueue(), fq_dequeue() */
struct rb_root t_root;
struct sk_buff *head; /* list of skbs for this flow : first skb */
union {
struct sk_buff *tail; /* last skb in the list */
- unsigned long age; /* jiffies when flow was emptied, for gc */
+ unsigned long age; /* (jiffies | 1UL) when flow was emptied, for gc */
};
struct rb_node fq_node; /* anchor in fq_root[] trees */
struct sock *sk;
+ u32 socket_hash; /* sk_hash */
int qlen; /* number of packets in flow queue */
+
+/* Second cache line, used in fq_dequeue() */
int credit;
- u32 socket_hash; /* sk_hash */
- struct fq_flow *next; /* next pointer in RR lists, or &detached */
+ /* 32bit hole on 64bit arches */
+
+ struct fq_flow *next; /* next pointer in RR lists */
struct rb_node rate_node; /* anchor in q->delayed tree */
u64 time_next_packet;
-};
+} ____cacheline_aligned_in_smp;
struct fq_flow_head {
struct fq_flow *first;
@@ -95,6 +100,7 @@ struct fq_sched_data {
struct rb_root delayed; /* for rate limited flows */
u64 time_next_delayed_flow;
+ u64 ktime_cache; /* copy of last ktime_get_ns() */
unsigned long unthrottle_latency_ns;
struct fq_flow internal; /* for non classified or high prio packets */
@@ -104,12 +110,13 @@ struct fq_sched_data {
u32 flow_plimit; /* max packets per flow */
unsigned long flow_max_rate; /* optional max rate per flow */
u64 ce_threshold;
+ u64 horizon; /* horizon in ns */
u32 orphan_mask; /* mask for orphaned skb */
u32 low_rate_threshold;
struct rb_root *fq_root;
u8 rate_enable;
u8 fq_trees_log;
-
+ u8 horizon_drop;
u32 flows;
u32 inactive_flows;
u32 throttled_flows;
@@ -118,6 +125,8 @@ struct fq_sched_data {
u64 stat_internal_packets;
u64 stat_throttled;
u64 stat_ce_mark;
+ u64 stat_horizon_drops;
+ u64 stat_horizon_caps;
u64 stat_flows_plimit;
u64 stat_pkts_too_long;
u64 stat_allocation_errors;
@@ -126,20 +135,25 @@ struct fq_sched_data {
struct qdisc_watchdog watchdog;
};
-/* special value to mark a detached flow (not on old/new list) */
-static struct fq_flow detached, throttled;
-
+/*
+ * f->tail and f->age share the same location.
+ * We can use the low order bit to differentiate if this location points
+ * to a sk_buff or contains a jiffies value, if we force this value to be odd.
+ * This assumes f->tail low order bit must be 0 since alignof(struct sk_buff) >= 2
+ */
static void fq_flow_set_detached(struct fq_flow *f)
{
- f->next = &detached;
- f->age = jiffies;
+ f->age = jiffies | 1UL;
}
static bool fq_flow_is_detached(const struct fq_flow *f)
{
- return f->next == &detached;
+ return !!(f->age & 1UL);
}
+/* special value to mark a throttled flow (not on old/new list) */
+static struct fq_flow throttled;
+
static bool fq_flow_is_throttled(const struct fq_flow *f)
{
return f->next == &throttled;
@@ -204,9 +218,10 @@ static void fq_gc(struct fq_sched_data *q,
struct rb_root *root,
struct sock *sk)
{
- struct fq_flow *f, *tofree[FQ_GC_MAX];
struct rb_node **p, *parent;
- int fcnt = 0;
+ void *tofree[FQ_GC_MAX];
+ struct fq_flow *f;
+ int i, fcnt = 0;
p = &root->rb_node;
parent = NULL;
@@ -229,15 +244,18 @@ static void fq_gc(struct fq_sched_data *q,
p = &parent->rb_left;
}
+ if (!fcnt)
+ return;
+
+ for (i = fcnt; i > 0; ) {
+ f = tofree[--i];
+ rb_erase(&f->fq_node, root);
+ }
q->flows -= fcnt;
q->inactive_flows -= fcnt;
q->stat_gc_flows += fcnt;
- while (fcnt) {
- struct fq_flow *f = tofree[--fcnt];
- rb_erase(&f->fq_node, root);
- kmem_cache_free(fq_flow_cachep, f);
- }
+ kmem_cache_free_bulk(fq_flow_cachep, fcnt, tofree);
}
static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
@@ -370,19 +388,17 @@ static void fq_erase_head(struct Qdisc *sch, struct fq_flow *flow,
}
}
-/* remove one skb from head of flow queue */
-static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow)
+/* Remove one skb from flow queue.
+ * This skb must be the return value of prior fq_peek().
+ */
+static void fq_dequeue_skb(struct Qdisc *sch, struct fq_flow *flow,
+ struct sk_buff *skb)
{
- struct sk_buff *skb = fq_peek(flow);
-
- if (skb) {
- fq_erase_head(sch, flow, skb);
- skb_mark_not_on_list(skb);
- flow->qlen--;
- qdisc_qstats_backlog_dec(sch, skb);
- sch->q.qlen--;
- }
- return skb;
+ fq_erase_head(sch, flow, skb);
+ skb_mark_not_on_list(skb);
+ flow->qlen--;
+ qdisc_qstats_backlog_dec(sch, skb);
+ sch->q.qlen--;
}
static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb)
@@ -390,8 +406,6 @@ static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb)
struct rb_node **p, *parent;
struct sk_buff *head, *aux;
- fq_skb_cb(skb)->time_to_send = skb->tstamp ?: ktime_get_ns();
-
head = flow->head;
if (!head ||
fq_skb_cb(skb)->time_to_send >= fq_skb_cb(flow->tail)->time_to_send) {
@@ -419,6 +433,12 @@ static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb)
rb_insert_color(&skb->rbnode, &flow->t_root);
}
+static bool fq_packet_beyond_horizon(const struct sk_buff *skb,
+ const struct fq_sched_data *q)
+{
+ return unlikely((s64)skb->tstamp > (s64)(q->ktime_cache + q->horizon));
+}
+
static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
@@ -428,6 +448,28 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (unlikely(sch->q.qlen >= sch->limit))
return qdisc_drop(skb, sch, to_free);
+ if (!skb->tstamp) {
+ fq_skb_cb(skb)->time_to_send = q->ktime_cache = ktime_get_ns();
+ } else {
+ /* Check if packet timestamp is too far in the future.
+ * Try first if our cached value, to avoid ktime_get_ns()
+ * cost in most cases.
+ */
+ if (fq_packet_beyond_horizon(skb, q)) {
+ /* Refresh our cache and check another time */
+ q->ktime_cache = ktime_get_ns();
+ if (fq_packet_beyond_horizon(skb, q)) {
+ if (q->horizon_drop) {
+ q->stat_horizon_drops++;
+ return qdisc_drop(skb, sch, to_free);
+ }
+ q->stat_horizon_caps++;
+ skb->tstamp = q->ktime_cache + q->horizon;
+ }
+ }
+ fq_skb_cb(skb)->time_to_send = skb->tstamp;
+ }
+
f = fq_classify(skb, q);
if (unlikely(f->qlen >= q->flow_plimit && f != &q->internal)) {
q->stat_flows_plimit++;
@@ -494,11 +536,13 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
if (!sch->q.qlen)
return NULL;
- skb = fq_dequeue_head(sch, &q->internal);
- if (skb)
+ skb = fq_peek(&q->internal);
+ if (unlikely(skb)) {
+ fq_dequeue_skb(sch, &q->internal, skb);
goto out;
+ }
- now = ktime_get_ns();
+ q->ktime_cache = now = ktime_get_ns();
fq_check_throttled(q, now);
begin:
head = &q->new_flows;
@@ -532,14 +576,13 @@ begin:
fq_flow_set_throttled(q, f);
goto begin;
}
+ prefetch(&skb->end);
if ((s64)(now - time_next_packet - q->ce_threshold) > 0) {
INET_ECN_set_ce(skb);
q->stat_ce_mark++;
}
- }
-
- skb = fq_dequeue_head(sch, f);
- if (!skb) {
+ fq_dequeue_skb(sch, f, skb);
+ } else {
head->first = f->next;
/* force a pass through old_flows to prevent starvation */
if ((head == &q->new_flows) && q->old_flows.first) {
@@ -550,7 +593,6 @@ begin:
}
goto begin;
}
- prefetch(&skb->end);
plen = qdisc_pkt_len(skb);
f->credit -= plen;
@@ -753,6 +795,8 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
[TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 },
[TCA_FQ_CE_THRESHOLD] = { .type = NLA_U32 },
[TCA_FQ_TIMER_SLACK] = { .type = NLA_U32 },
+ [TCA_FQ_HORIZON] = { .type = NLA_U32 },
+ [TCA_FQ_HORIZON_DROP] = { .type = NLA_U8 },
};
static int fq_change(struct Qdisc *sch, struct nlattr *opt,
@@ -842,7 +886,15 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_FQ_TIMER_SLACK])
q->timer_slack = nla_get_u32(tb[TCA_FQ_TIMER_SLACK]);
+ if (tb[TCA_FQ_HORIZON])
+ q->horizon = (u64)NSEC_PER_USEC *
+ nla_get_u32(tb[TCA_FQ_HORIZON]);
+
+ if (tb[TCA_FQ_HORIZON_DROP])
+ q->horizon_drop = nla_get_u8(tb[TCA_FQ_HORIZON_DROP]);
+
if (!err) {
+
sch_tree_unlock(sch);
err = fq_resize(sch, fq_log);
sch_tree_lock(sch);
@@ -895,6 +947,9 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt,
q->timer_slack = 10 * NSEC_PER_USEC; /* 10 usec of hrtimer slack */
+ q->horizon = 10ULL * NSEC_PER_SEC; /* 10 seconds */
+ q->horizon_drop = 1; /* by default, drop packets beyond horizon */
+
/* Default ce_threshold of 4294 seconds */
q->ce_threshold = (u64)NSEC_PER_USEC * ~0U;
@@ -912,6 +967,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct fq_sched_data *q = qdisc_priv(sch);
u64 ce_threshold = q->ce_threshold;
+ u64 horizon = q->horizon;
struct nlattr *opts;
opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
@@ -921,6 +977,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
/* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */
do_div(ce_threshold, NSEC_PER_USEC);
+ do_div(horizon, NSEC_PER_USEC);
if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) ||
nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) ||
@@ -936,7 +993,9 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
q->low_rate_threshold) ||
nla_put_u32(skb, TCA_FQ_CE_THRESHOLD, (u32)ce_threshold) ||
nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log) ||
- nla_put_u32(skb, TCA_FQ_TIMER_SLACK, q->timer_slack))
+ nla_put_u32(skb, TCA_FQ_TIMER_SLACK, q->timer_slack) ||
+ nla_put_u32(skb, TCA_FQ_HORIZON, (u32)horizon) ||
+ nla_put_u8(skb, TCA_FQ_HORIZON_DROP, q->horizon_drop))
goto nla_put_failure;
return nla_nest_end(skb, opts);
@@ -967,6 +1026,8 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
st.unthrottle_latency_ns = min_t(unsigned long,
q->unthrottle_latency_ns, ~0U);
st.ce_mark = q->stat_ce_mark;
+ st.horizon_drops = q->stat_horizon_drops;
+ st.horizon_caps = q->stat_horizon_caps;
sch_tree_unlock(sch);
return gnet_stats_copy_app(d, &st, sizeof(st));
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 2efd5b61acef..b19a0021a0bd 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -794,6 +794,9 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
};
EXPORT_SYMBOL(pfifo_fast_ops);
+static struct lock_class_key qdisc_tx_busylock;
+static struct lock_class_key qdisc_running_key;
+
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
const struct Qdisc_ops *ops,
struct netlink_ext_ack *extack)
@@ -846,9 +849,17 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
}
spin_lock_init(&sch->busylock);
+ lockdep_set_class(&sch->busylock,
+ dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
+
/* seqlock has the same scope of busylock, for NOLOCK qdisc */
spin_lock_init(&sch->seqlock);
+ lockdep_set_class(&sch->busylock,
+ dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
+
seqcount_init(&sch->running);
+ lockdep_set_class(&sch->running,
+ dev->qdisc_running_key ?: &qdisc_running_key);
sch->ops = ops;
sch->flags = ops->static_flags;
@@ -859,12 +870,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
dev_hold(dev);
refcount_set(&sch->refcnt, 1);
- if (sch != &noop_qdisc) {
- lockdep_set_class(&sch->busylock, &dev->qdisc_tx_busylock_key);
- lockdep_set_class(&sch->seqlock, &dev->qdisc_tx_busylock_key);
- lockdep_set_class(&sch->running, &dev->qdisc_running_key);
- }
-
return sch;
errout1:
kfree(p);
@@ -891,8 +896,10 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
}
sch->parent = parentid;
- if (!ops->init || ops->init(sch, NULL, extack) == 0)
+ if (!ops->init || ops->init(sch, NULL, extack) == 0) {
+ trace_qdisc_create(ops, dev_queue->dev, parentid);
return sch;
+ }
qdisc_put(sch);
return NULL;
@@ -906,6 +913,8 @@ void qdisc_reset(struct Qdisc *qdisc)
const struct Qdisc_ops *ops = qdisc->ops;
struct sk_buff *skb, *tmp;
+ trace_qdisc_reset(qdisc);
+
if (ops->reset)
ops->reset(qdisc);
@@ -944,7 +953,6 @@ static void qdisc_free_cb(struct rcu_head *head)
static void qdisc_destroy(struct Qdisc *qdisc)
{
const struct Qdisc_ops *ops = qdisc->ops;
- struct sk_buff *skb, *tmp;
#ifdef CONFIG_NET_SCHED
qdisc_hash_del(qdisc);
@@ -952,23 +960,16 @@ static void qdisc_destroy(struct Qdisc *qdisc)
qdisc_put_stab(rtnl_dereference(qdisc->stab));
#endif
gen_kill_estimator(&qdisc->rate_est);
- if (ops->reset)
- ops->reset(qdisc);
+
+ qdisc_reset(qdisc);
+
if (ops->destroy)
ops->destroy(qdisc);
module_put(ops->owner);
dev_put(qdisc_dev(qdisc));
- skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
- __skb_unlink(skb, &qdisc->gso_skb);
- kfree_skb_list(skb);
- }
-
- skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
- __skb_unlink(skb, &qdisc->skb_bad_txq);
- kfree_skb_list(skb);
- }
+ trace_qdisc_destroy(qdisc);
call_rcu(&qdisc->rcu, qdisc_free_cb);
}
@@ -1037,10 +1038,9 @@ static void attach_one_default_qdisc(struct net_device *dev,
ops = &pfifo_fast_ops;
qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT, NULL);
- if (!qdisc) {
- netdev_info(dev, "activation failed\n");
+ if (!qdisc)
return;
- }
+
if (!netif_is_multiqueue(dev))
qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
dev_queue->qdisc_sleeping = qdisc;
@@ -1065,6 +1065,18 @@ static void attach_default_qdiscs(struct net_device *dev)
qdisc->ops->attach(qdisc);
}
}
+
+ /* Detect default qdisc setup/init failed and fallback to "noqueue" */
+ if (dev->qdisc == &noop_qdisc) {
+ netdev_warn(dev, "default qdisc (%s) fail, fallback to %s\n",
+ default_qdisc_ops->id, noqueue_qdisc_ops.id);
+ dev->priv_flags |= IFF_NO_QUEUE;
+ netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
+ dev->qdisc = txq->qdisc_sleeping;
+ qdisc_refcount_inc(dev->qdisc);
+ dev->priv_flags ^= IFF_NO_QUEUE;
+ }
+
#ifdef CONFIG_NET_SCHED
if (dev->qdisc != &noop_qdisc)
qdisc_hash_add(dev->qdisc, false);
@@ -1116,6 +1128,28 @@ void dev_activate(struct net_device *dev)
}
EXPORT_SYMBOL(dev_activate);
+static void qdisc_deactivate(struct Qdisc *qdisc)
+{
+ bool nolock = qdisc->flags & TCQ_F_NOLOCK;
+
+ if (qdisc->flags & TCQ_F_BUILTIN)
+ return;
+ if (test_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state))
+ return;
+
+ if (nolock)
+ spin_lock_bh(&qdisc->seqlock);
+ spin_lock_bh(qdisc_lock(qdisc));
+
+ set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
+
+ qdisc_reset(qdisc);
+
+ spin_unlock_bh(qdisc_lock(qdisc));
+ if (nolock)
+ spin_unlock_bh(&qdisc->seqlock);
+}
+
static void dev_deactivate_queue(struct net_device *dev,
struct netdev_queue *dev_queue,
void *_qdisc_default)
@@ -1125,21 +1159,8 @@ static void dev_deactivate_queue(struct net_device *dev,
qdisc = rtnl_dereference(dev_queue->qdisc);
if (qdisc) {
- bool nolock = qdisc->flags & TCQ_F_NOLOCK;
-
- if (nolock)
- spin_lock_bh(&qdisc->seqlock);
- spin_lock_bh(qdisc_lock(qdisc));
-
- if (!(qdisc->flags & TCQ_F_BUILTIN))
- set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
-
+ qdisc_deactivate(qdisc);
rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
- qdisc_reset(qdisc);
-
- spin_unlock_bh(qdisc_lock(qdisc));
- if (nolock)
- spin_unlock_bh(&qdisc->seqlock);
}
}
@@ -1170,16 +1191,6 @@ static bool some_qdisc_is_busy(struct net_device *dev)
return false;
}
-static void dev_qdisc_reset(struct net_device *dev,
- struct netdev_queue *dev_queue,
- void *none)
-{
- struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
-
- if (qdisc)
- qdisc_reset(qdisc);
-}
-
/**
* dev_deactivate_many - deactivate transmissions on several devices
* @head: list of devices to deactivate
@@ -1216,12 +1227,6 @@ void dev_deactivate_many(struct list_head *head)
*/
schedule_timeout_uninterruptible(1);
}
- /* The new qdisc is assigned at this point so we can safely
- * unwind stale skb lists and qdisc statistics
- */
- netdev_for_each_tx_queue(dev, dev_qdisc_reset, NULL);
- if (dev_ingress_queue(dev))
- dev_qdisc_reset(dev, dev_ingress_queue(dev), NULL);
}
}
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index c7de47c942e3..555a1b9e467f 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -48,7 +48,7 @@ struct red_sched_data {
struct Qdisc *qdisc;
};
-static const u32 red_supported_flags = TC_RED_HISTORIC_FLAGS | TC_RED_NODROP;
+#define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP)
static inline int red_use_ecn(struct red_sched_data *q)
{
@@ -212,8 +212,7 @@ static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
[TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
[TCA_RED_STAB] = { .len = RED_STAB_SIZE },
[TCA_RED_MAX_P] = { .type = NLA_U32 },
- [TCA_RED_FLAGS] = { .type = NLA_BITFIELD32,
- .validation_data = &red_supported_flags },
+ [TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS),
};
static int red_change(struct Qdisc *sch, struct nlattr *opt,
@@ -248,7 +247,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
return -EINVAL;
err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
- tb[TCA_RED_FLAGS], red_supported_flags,
+ tb[TCA_RED_FLAGS], TC_RED_SUPPORTED_FLAGS,
&flags_bf, &userbits, extack);
if (err)
return err;
@@ -372,7 +371,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
nla_put_bitfield32(skb, TCA_RED_FLAGS,
- q->flags, red_supported_flags))
+ q->flags, TC_RED_SUPPORTED_FLAGS))
goto nla_put_failure;
return nla_nest_end(skb, opts);