summaryrefslogtreecommitdiff
path: root/net/sched
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-04-29 11:57:23 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-04-29 11:57:23 -0700
commit9d31d2338950293ec19d9b095fbaa9030899dcb4 (patch)
treee688040d0557c24a2eeb9f6c9c223d949f6f7ef9 /net/sched
parent635de956a7f5a6ffcb04f29d70630c64c717b56b (diff)
parent4a52dd8fefb45626dace70a63c0738dbd83b7edb (diff)
Merge tag 'net-next-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core: - bpf: - allow bpf programs calling kernel functions (initially to reuse TCP congestion control implementations) - enable task local storage for tracing programs - remove the need to store per-task state in hash maps, and allow tracing programs access to task local storage previously added for BPF_LSM - add bpf_for_each_map_elem() helper, allowing programs to walk all map elements in a more robust and easier to verify fashion - sockmap: support UDP and cross-protocol BPF_SK_SKB_VERDICT redirection - lpm: add support for batched ops in LPM trie - add BTF_KIND_FLOAT support - mostly to allow use of BTF on s390 which has floats in its headers files - improve BPF syscall documentation and extend the use of kdoc parsing scripts we already employ for bpf-helpers - libbpf, bpftool: support static linking of BPF ELF files - improve support for encapsulation of L2 packets - xdp: restructure redirect actions to avoid a runtime lookup, improving performance by 4-8% in microbenchmarks - xsk: build skb by page (aka generic zerocopy xmit) - improve performance of software AF_XDP path by 33% for devices which don't need headers in the linear skb part (e.g. virtio) - nexthop: resilient next-hop groups - improve path stability on next-hops group changes (incl. offload for mlxsw) - ipv6: segment routing: add support for IPv4 decapsulation - icmp: add support for RFC 8335 extended PROBE messages - inet: use bigger hash table for IP ID generation - tcp: deal better with delayed TX completions - make sure we don't give up on fast TCP retransmissions only because driver is slow in reporting that it completed transmitting the original - tcp: reorder tcp_congestion_ops for better cache locality - mptcp: - add sockopt support for common TCP options - add support for common TCP msg flags - include multiple address ids in RM_ADDR - add reset option support for resetting one subflow - udp: GRO L4 improvements - improve 'forward' / 'frag_list' co-existence with UDP tunnel GRO, allowing the first to take place correctly even for encapsulated UDP traffic - micro-optimize dev_gro_receive() and flow dissection, avoid retpoline overhead on VLAN and TEB GRO - use less memory for sysctls, add a new sysctl type, to allow using u8 instead of "int" and "long" and shrink networking sysctls - veth: allow GRO without XDP - this allows aggregating UDP packets before handing them off to routing, bridge, OvS, etc. - allow specifing ifindex when device is moved to another namespace - netfilter: - nft_socket: add support for cgroupsv2 - nftables: add catch-all set element - special element used to define a default action in case normal lookup missed - use net_generic infra in many modules to avoid allocating per-ns memory unnecessarily - xps: improve the xps handling to avoid potential out-of-bound accesses and use-after-free when XPS change race with other re-configuration under traffic - add a config knob to turn off per-cpu netdev refcnt to catch underflows in testing Device APIs: - add WWAN subsystem to organize the WWAN interfaces better and hopefully start driving towards more unified and vendor- independent APIs - ethtool: - add interface for reading IEEE MIB stats (incl. mlx5 and bnxt support) - allow network drivers to dump arbitrary SFP EEPROM data, current offset+length API was a poor fit for modern SFP which define EEPROM in terms of pages (incl. mlx5 support) - act_police, flow_offload: add support for packet-per-second policing (incl. offload for nfp) - psample: add additional metadata attributes like transit delay for packets sampled from switch HW (and corresponding egress and policy-based sampling in the mlxsw driver) - dsa: improve support for sandwiched LAGs with bridge and DSA - netfilter: - flowtable: use direct xmit in topologies with IP forwarding, bridging, vlans etc. - nftables: counter hardware offload support - Bluetooth: - improvements for firmware download w/ Intel devices - add support for reading AOSP vendor capabilities - add support for virtio transport driver - mac80211: - allow concurrent monitor iface and ethernet rx decap - set priority and queue mapping for injected frames - phy: add support for Clause-45 PHY Loopback - pci/iov: add sysfs MSI-X vector assignment interface to distribute MSI-X resources to VFs (incl. mlx5 support) New hardware/drivers: - dsa: mv88e6xxx: add support for Marvell mv88e6393x - 11-port Ethernet switch with 8x 1-Gigabit Ethernet and 3x 10-Gigabit interfaces. - dsa: support for legacy Broadcom tags used on BCM5325, BCM5365 and BCM63xx switches - Microchip KSZ8863 and KSZ8873; 3x 10/100Mbps Ethernet switches - ath11k: support for QCN9074 a 802.11ax device - Bluetooth: Broadcom BCM4330 and BMC4334 - phy: Marvell 88X2222 transceiver support - mdio: add BCM6368 MDIO mux bus controller - r8152: support RTL8153 and RTL8156 (USB Ethernet) chips - mana: driver for Microsoft Azure Network Adapter (MANA) - Actions Semi Owl Ethernet MAC - can: driver for ETAS ES58X CAN/USB interfaces Pure driver changes: - add XDP support to: enetc, igc, stmmac - add AF_XDP support to: stmmac - virtio: - page_to_skb() use build_skb when there's sufficient tailroom (21% improvement for 1000B UDP frames) - support XDP even without dedicated Tx queues - share the Tx queues with the stack when necessary - mlx5: - flow rules: add support for mirroring with conntrack, matching on ICMP, GTP, flex filters and more - support packet sampling with flow offloads - persist uplink representor netdev across eswitch mode changes - allow coexistence of CQE compression and HW time-stamping - add ethtool extended link error state reporting - ice, iavf: support flow filters, UDP Segmentation Offload - dpaa2-switch: - move the driver out of staging - add spanning tree (STP) support - add rx copybreak support - add tc flower hardware offload on ingress traffic - ionic: - implement Rx page reuse - support HW PTP time-stamping - octeon: support TC hardware offloads - flower matching on ingress and egress ratelimitting. - stmmac: - add RX frame steering based on VLAN priority in tc flower - support frame preemption (FPE) - intel: add cross time-stamping freq difference adjustment - ocelot: - support forwarding of MRP frames in HW - support multiple bridges - support PTP Sync one-step timestamping - dsa: mv88e6xxx, dpaa2-switch: offload bridge port flags like learning, flooding etc. - ipa: add IPA v4.5, v4.9 and v4.11 support (Qualcomm SDX55, SM8350, SC7280 SoCs) - mt7601u: enable TDLS support - mt76: - add support for 802.3 rx frames (mt7915/mt7615) - mt7915 flash pre-calibration support - mt7921/mt7663 runtime power management fixes" * tag 'net-next-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2451 commits) net: selftest: fix build issue if INET is disabled net: netrom: nr_in: Remove redundant assignment to ns net: tun: Remove redundant assignment to ret net: phy: marvell: add downshift support for M88E1240 net: dsa: ksz: Make reg_mib_cnt a u8 as it never exceeds 255 net/sched: act_ct: Remove redundant ct get and check icmp: standardize naming of RFC 8335 PROBE constants bpf, selftests: Update array map tests for per-cpu batched ops bpf: Add batched ops support for percpu array bpf: Implement formatted output helpers with bstr_printf seq_file: Add a seq_bprintf function sfc: adjust efx->xdp_tx_queue_count with the real number of initialized queues net:nfc:digital: Fix a double free in digital_tg_recv_dep_req net: fix a concurrency bug in l2tp_tunnel_register() net/smc: Remove redundant assignment to rc mpls: Remove redundant assignment to err llc2: Remove redundant assignment to rc net/tls: Remove redundant initialization of record rds: Remove redundant assignment to nr_sig dt-bindings: net: mdio-gpio: add compatible for microchip,mdio-smi0 ...
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/act_ct.c10
-rw-r--r--net/sched/act_police.c59
-rw-r--r--net/sched/act_sample.c27
-rw-r--r--net/sched/cls_api.c3
-rw-r--r--net/sched/cls_flower.c40
-rw-r--r--net/sched/sch_cbq.c4
-rw-r--r--net/sched/sch_generic.c75
-rw-r--r--net/sched/sch_taprio.c70
8 files changed, 208 insertions, 80 deletions
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 16e888a9601d..ec7a1c438df9 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -732,7 +732,8 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
#endif
}
- *qdisc_skb_cb(skb) = cb;
+ if (err != -EINPROGRESS)
+ *qdisc_skb_cb(skb) = cb;
skb_clear_hash(skb);
skb->ignore_df = 1;
return err;
@@ -967,7 +968,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
err = tcf_ct_handle_fragments(net, skb, family, p->zone, &defrag);
if (err == -EINPROGRESS) {
retval = TC_ACT_STOLEN;
- goto out;
+ goto out_clear;
}
if (err)
goto drop;
@@ -990,9 +991,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
/* Associate skb with specified zone. */
if (tmpl) {
- ct = nf_ct_get(skb, &ctinfo);
- if (skb_nfct(skb))
- nf_conntrack_put(skb_nfct(skb));
+ nf_conntrack_put(skb_nfct(skb));
nf_conntrack_get(&tmpl->ct_general);
nf_ct_set(skb, tmpl, IP_CT_NEW);
}
@@ -1030,7 +1029,6 @@ do_nat:
out_push:
skb_push_rcsum(skb, nh_ofs);
-out:
qdisc_skb_cb(skb)->post_ct = true;
out_clear:
tcf_action_update_bstats(&c->common, skb);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 8d8452b1cdd4..0fab8de176d2 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -42,6 +42,8 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
[TCA_POLICE_RESULT] = { .type = NLA_U32 },
[TCA_POLICE_RATE64] = { .type = NLA_U64 },
[TCA_POLICE_PEAKRATE64] = { .type = NLA_U64 },
+ [TCA_POLICE_PKTRATE64] = { .type = NLA_U64, .min = 1 },
+ [TCA_POLICE_PKTBURST64] = { .type = NLA_U64, .min = 1 },
};
static int tcf_police_init(struct net *net, struct nlattr *nla,
@@ -61,6 +63,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
bool exists = false;
u32 index;
u64 rate64, prate64;
+ u64 pps, ppsburst;
if (nla == NULL)
return -EINVAL;
@@ -142,6 +145,21 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
}
}
+ if ((tb[TCA_POLICE_PKTRATE64] && !tb[TCA_POLICE_PKTBURST64]) ||
+ (!tb[TCA_POLICE_PKTRATE64] && tb[TCA_POLICE_PKTBURST64])) {
+ NL_SET_ERR_MSG(extack,
+ "Both or neither packet-per-second burst and rate must be provided");
+ err = -EINVAL;
+ goto failure;
+ }
+
+ if (tb[TCA_POLICE_PKTRATE64] && R_tab) {
+ NL_SET_ERR_MSG(extack,
+ "packet-per-second and byte-per-second rate limits not allowed in same action");
+ err = -EINVAL;
+ goto failure;
+ }
+
new = kzalloc(sizeof(*new), GFP_KERNEL);
if (unlikely(!new)) {
err = -ENOMEM;
@@ -183,6 +201,14 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
if (tb[TCA_POLICE_AVRATE])
new->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]);
+ if (tb[TCA_POLICE_PKTRATE64]) {
+ pps = nla_get_u64(tb[TCA_POLICE_PKTRATE64]);
+ ppsburst = nla_get_u64(tb[TCA_POLICE_PKTBURST64]);
+ new->pps_present = true;
+ new->tcfp_pkt_burst = PSCHED_TICKS2NS(ppsburst);
+ psched_ppscfg_precompute(&new->ppsrate, pps);
+ }
+
spin_lock_bh(&police->tcf_lock);
spin_lock_bh(&police->tcfp_lock);
police->tcfp_t_c = ktime_get_ns();
@@ -217,8 +243,8 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_police *police = to_police(a);
+ s64 now, toks, ppstoks = 0, ptoks = 0;
struct tcf_police_params *p;
- s64 now, toks, ptoks = 0;
int ret;
tcf_lastuse_update(&police->tcf_tm);
@@ -236,7 +262,7 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
}
if (qdisc_pkt_len(skb) <= p->tcfp_mtu) {
- if (!p->rate_present) {
+ if (!p->rate_present && !p->pps_present) {
ret = p->tcfp_result;
goto end;
}
@@ -251,14 +277,23 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
ptoks -= (s64)psched_l2t_ns(&p->peak,
qdisc_pkt_len(skb));
}
- toks += police->tcfp_toks;
- if (toks > p->tcfp_burst)
- toks = p->tcfp_burst;
- toks -= (s64)psched_l2t_ns(&p->rate, qdisc_pkt_len(skb));
- if ((toks|ptoks) >= 0) {
+ if (p->rate_present) {
+ toks += police->tcfp_toks;
+ if (toks > p->tcfp_burst)
+ toks = p->tcfp_burst;
+ toks -= (s64)psched_l2t_ns(&p->rate, qdisc_pkt_len(skb));
+ } else if (p->pps_present) {
+ ppstoks = min_t(s64, now - police->tcfp_t_c, p->tcfp_pkt_burst);
+ ppstoks += police->tcfp_pkttoks;
+ if (ppstoks > p->tcfp_pkt_burst)
+ ppstoks = p->tcfp_pkt_burst;
+ ppstoks -= (s64)psched_pkt2t_ns(&p->ppsrate, 1);
+ }
+ if ((toks | ptoks | ppstoks) >= 0) {
police->tcfp_t_c = now;
police->tcfp_toks = toks;
police->tcfp_ptoks = ptoks;
+ police->tcfp_pkttoks = ppstoks;
spin_unlock_bh(&police->tcfp_lock);
ret = p->tcfp_result;
goto inc_drops;
@@ -331,6 +366,16 @@ static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a,
TCA_POLICE_PAD))
goto nla_put_failure;
}
+ if (p->pps_present) {
+ if (nla_put_u64_64bit(skb, TCA_POLICE_PKTRATE64,
+ police->params->ppsrate.rate_pkts_ps,
+ TCA_POLICE_PAD))
+ goto nla_put_failure;
+ if (nla_put_u64_64bit(skb, TCA_POLICE_PKTBURST64,
+ PSCHED_NS2TICKS(p->tcfp_pkt_burst),
+ TCA_POLICE_PAD))
+ goto nla_put_failure;
+ }
if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt))
goto nla_put_failure;
if (p->tcfp_result &&
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 3ebf9ede3cf1..6a0c16e4351d 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -158,10 +158,8 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
{
struct tcf_sample *s = to_sample(a);
struct psample_group *psample_group;
+ struct psample_metadata md = {};
int retval;
- int size;
- int iif;
- int oif;
tcf_lastuse_update(&s->tcf_tm);
bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb);
@@ -172,20 +170,18 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
/* randomly sample packets according to rate */
if (psample_group && (prandom_u32() % s->rate == 0)) {
if (!skb_at_tc_ingress(skb)) {
- iif = skb->skb_iif;
- oif = skb->dev->ifindex;
+ md.in_ifindex = skb->skb_iif;
+ md.out_ifindex = skb->dev->ifindex;
} else {
- iif = skb->dev->ifindex;
- oif = 0;
+ md.in_ifindex = skb->dev->ifindex;
}
/* on ingress, the mac header gets popped, so push it back */
if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(skb->dev))
skb_push(skb, skb->mac_len);
- size = s->truncate ? s->trunc_size : skb->len;
- psample_sample_packet(psample_group, skb, size, iif, oif,
- s->rate);
+ md.trunc_size = s->truncate ? s->trunc_size : skb->len;
+ psample_sample_packet(psample_group, skb, s->rate, &md);
if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(skb->dev))
skb_pull(skb, skb->mac_len);
@@ -194,6 +190,16 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
return retval;
}
+static void tcf_sample_stats_update(struct tc_action *a, u64 bytes, u64 packets,
+ u64 drops, u64 lastuse, bool hw)
+{
+ struct tcf_sample *s = to_sample(a);
+ struct tcf_t *tm = &s->tcf_tm;
+
+ tcf_action_update_stats(a, bytes, packets, drops, hw);
+ tm->lastuse = max_t(u64, tm->lastuse, lastuse);
+}
+
static int tcf_sample_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
@@ -280,6 +286,7 @@ static struct tc_action_ops act_sample_ops = {
.id = TCA_ID_SAMPLE,
.owner = THIS_MODULE,
.act = tcf_sample_act,
+ .stats_update = tcf_sample_stats_update,
.dump = tcf_sample_dump,
.init = tcf_sample_init,
.cleanup = tcf_sample_cleanup,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 340d5af86e87..40fbea626dfd 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3662,6 +3662,9 @@ int tc_setup_flow_action(struct flow_action *flow_action,
entry->police.burst = tcf_police_burst(act);
entry->police.rate_bytes_ps =
tcf_police_rate_bytes_ps(act);
+ entry->police.burst_pkt = tcf_police_burst_pkt(act);
+ entry->police.rate_pkt_ps =
+ tcf_police_rate_pkt_ps(act);
entry->police.mtu = tcf_police_tcfp_mtu(act);
entry->police.index = act->tcfa_index;
} else if (is_tcf_ct(act)) {
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index c69a4ba9c33f..d7869a984881 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -209,16 +209,16 @@ static bool fl_range_port_dst_cmp(struct cls_fl_filter *filter,
struct fl_flow_key *key,
struct fl_flow_key *mkey)
{
- __be16 min_mask, max_mask, min_val, max_val;
+ u16 min_mask, max_mask, min_val, max_val;
- min_mask = htons(filter->mask->key.tp_range.tp_min.dst);
- max_mask = htons(filter->mask->key.tp_range.tp_max.dst);
- min_val = htons(filter->key.tp_range.tp_min.dst);
- max_val = htons(filter->key.tp_range.tp_max.dst);
+ min_mask = ntohs(filter->mask->key.tp_range.tp_min.dst);
+ max_mask = ntohs(filter->mask->key.tp_range.tp_max.dst);
+ min_val = ntohs(filter->key.tp_range.tp_min.dst);
+ max_val = ntohs(filter->key.tp_range.tp_max.dst);
if (min_mask && max_mask) {
- if (htons(key->tp_range.tp.dst) < min_val ||
- htons(key->tp_range.tp.dst) > max_val)
+ if (ntohs(key->tp_range.tp.dst) < min_val ||
+ ntohs(key->tp_range.tp.dst) > max_val)
return false;
/* skb does not have min and max values */
@@ -232,16 +232,16 @@ static bool fl_range_port_src_cmp(struct cls_fl_filter *filter,
struct fl_flow_key *key,
struct fl_flow_key *mkey)
{
- __be16 min_mask, max_mask, min_val, max_val;
+ u16 min_mask, max_mask, min_val, max_val;
- min_mask = htons(filter->mask->key.tp_range.tp_min.src);
- max_mask = htons(filter->mask->key.tp_range.tp_max.src);
- min_val = htons(filter->key.tp_range.tp_min.src);
- max_val = htons(filter->key.tp_range.tp_max.src);
+ min_mask = ntohs(filter->mask->key.tp_range.tp_min.src);
+ max_mask = ntohs(filter->mask->key.tp_range.tp_max.src);
+ min_val = ntohs(filter->key.tp_range.tp_min.src);
+ max_val = ntohs(filter->key.tp_range.tp_max.src);
if (min_mask && max_mask) {
- if (htons(key->tp_range.tp.src) < min_val ||
- htons(key->tp_range.tp.src) > max_val)
+ if (ntohs(key->tp_range.tp.src) < min_val ||
+ ntohs(key->tp_range.tp.src) > max_val)
return false;
/* skb does not have min and max values */
@@ -783,16 +783,16 @@ static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key,
TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_max.src));
if (mask->tp_range.tp_min.dst && mask->tp_range.tp_max.dst &&
- htons(key->tp_range.tp_max.dst) <=
- htons(key->tp_range.tp_min.dst)) {
+ ntohs(key->tp_range.tp_max.dst) <=
+ ntohs(key->tp_range.tp_min.dst)) {
NL_SET_ERR_MSG_ATTR(extack,
tb[TCA_FLOWER_KEY_PORT_DST_MIN],
"Invalid destination port range (min must be strictly smaller than max)");
return -EINVAL;
}
if (mask->tp_range.tp_min.src && mask->tp_range.tp_max.src &&
- htons(key->tp_range.tp_max.src) <=
- htons(key->tp_range.tp_min.src)) {
+ ntohs(key->tp_range.tp_max.src) <=
+ ntohs(key->tp_range.tp_min.src)) {
NL_SET_ERR_MSG_ATTR(extack,
tb[TCA_FLOWER_KEY_PORT_SRC_MIN],
"Invalid source port range (min must be strictly smaller than max)");
@@ -1044,8 +1044,8 @@ static int fl_set_key_flags(struct nlattr **tb, u32 *flags_key,
return -EINVAL;
}
- key = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS]));
- mask = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS_MASK]));
+ key = be32_to_cpu(nla_get_be32(tb[TCA_FLOWER_KEY_FLAGS]));
+ mask = be32_to_cpu(nla_get_be32(tb[TCA_FLOWER_KEY_FLAGS_MASK]));
*flags_key = 0;
*flags_mask = 0;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 320b3d31fa97..b79a7e27bb31 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -263,7 +263,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
/*
* Step 3+n. If classifier selected a link sharing class,
* apply agency specific classifier.
- * Repeat this procdure until we hit a leaf node.
+ * Repeat this procedure until we hit a leaf node.
*/
head = cl;
}
@@ -859,7 +859,7 @@ cbq_dequeue(struct Qdisc *sch)
return NULL;
}
-/* CBQ class maintanance routines */
+/* CBQ class maintenance routines */
static void cbq_adjust_levels(struct cbq_class *this)
{
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 49eae93d1489..44991ea726fc 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -1325,6 +1325,48 @@ void dev_shutdown(struct net_device *dev)
WARN_ON(timer_pending(&dev->watchdog_timer));
}
+/**
+ * psched_ratecfg_precompute__() - Pre-compute values for reciprocal division
+ * @rate: Rate to compute reciprocal division values of
+ * @mult: Multiplier for reciprocal division
+ * @shift: Shift for reciprocal division
+ *
+ * The multiplier and shift for reciprocal division by rate are stored
+ * in mult and shift.
+ *
+ * The deal here is to replace a divide by a reciprocal one
+ * in fast path (a reciprocal divide is a multiply and a shift)
+ *
+ * Normal formula would be :
+ * time_in_ns = (NSEC_PER_SEC * len) / rate_bps
+ *
+ * We compute mult/shift to use instead :
+ * time_in_ns = (len * mult) >> shift;
+ *
+ * We try to get the highest possible mult value for accuracy,
+ * but have to make sure no overflows will ever happen.
+ *
+ * reciprocal_value() is not used here it doesn't handle 64-bit values.
+ */
+static void psched_ratecfg_precompute__(u64 rate, u32 *mult, u8 *shift)
+{
+ u64 factor = NSEC_PER_SEC;
+
+ *mult = 1;
+ *shift = 0;
+
+ if (rate <= 0)
+ return;
+
+ for (;;) {
+ *mult = div64_u64(factor, rate);
+ if (*mult & (1U << 31) || factor & (1ULL << 63))
+ break;
+ factor <<= 1;
+ (*shift)++;
+ }
+}
+
void psched_ratecfg_precompute(struct psched_ratecfg *r,
const struct tc_ratespec *conf,
u64 rate64)
@@ -1333,34 +1375,17 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r,
r->overhead = conf->overhead;
r->rate_bytes_ps = max_t(u64, conf->rate, rate64);
r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
- r->mult = 1;
- /*
- * The deal here is to replace a divide by a reciprocal one
- * in fast path (a reciprocal divide is a multiply and a shift)
- *
- * Normal formula would be :
- * time_in_ns = (NSEC_PER_SEC * len) / rate_bps
- *
- * We compute mult/shift to use instead :
- * time_in_ns = (len * mult) >> shift;
- *
- * We try to get the highest possible mult value for accuracy,
- * but have to make sure no overflows will ever happen.
- */
- if (r->rate_bytes_ps > 0) {
- u64 factor = NSEC_PER_SEC;
-
- for (;;) {
- r->mult = div64_u64(factor, r->rate_bytes_ps);
- if (r->mult & (1U << 31) || factor & (1ULL << 63))
- break;
- factor <<= 1;
- r->shift++;
- }
- }
+ psched_ratecfg_precompute__(r->rate_bytes_ps, &r->mult, &r->shift);
}
EXPORT_SYMBOL(psched_ratecfg_precompute);
+void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64)
+{
+ r->rate_pkts_ps = pktrate64;
+ psched_ratecfg_precompute__(r->rate_pkts_ps, &r->mult, &r->shift);
+}
+EXPORT_SYMBOL(psched_ppscfg_precompute);
+
static void mini_qdisc_rcu_func(struct rcu_head *head)
{
}
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 8287894541e3..5c91df52b8c2 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -411,18 +411,10 @@ done:
return txtime;
}
-static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
- struct sk_buff **to_free)
+static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch,
+ struct Qdisc *child, struct sk_buff **to_free)
{
struct taprio_sched *q = qdisc_priv(sch);
- struct Qdisc *child;
- int queue;
-
- queue = skb_get_queue_mapping(skb);
-
- child = q->qdiscs[queue];
- if (unlikely(!child))
- return qdisc_drop(skb, sch, to_free);
if (skb->sk && sock_flag(skb->sk, SOCK_TXTIME)) {
if (!is_valid_interval(skb, sch))
@@ -439,6 +431,58 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return qdisc_enqueue(skb, child, to_free);
}
+static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct Qdisc *child;
+ int queue;
+
+ queue = skb_get_queue_mapping(skb);
+
+ child = q->qdiscs[queue];
+ if (unlikely(!child))
+ return qdisc_drop(skb, sch, to_free);
+
+ /* Large packets might not be transmitted when the transmission duration
+ * exceeds any configured interval. Therefore, segment the skb into
+ * smaller chunks. Skip it for the full offload case, as the driver
+ * and/or the hardware is expected to handle this.
+ */
+ if (skb_is_gso(skb) && !FULL_OFFLOAD_IS_ENABLED(q->flags)) {
+ unsigned int slen = 0, numsegs = 0, len = qdisc_pkt_len(skb);
+ netdev_features_t features = netif_skb_features(skb);
+ struct sk_buff *segs, *nskb;
+ int ret;
+
+ segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+ if (IS_ERR_OR_NULL(segs))
+ return qdisc_drop(skb, sch, to_free);
+
+ skb_list_walk_safe(segs, segs, nskb) {
+ skb_mark_not_on_list(segs);
+ qdisc_skb_cb(segs)->pkt_len = segs->len;
+ slen += segs->len;
+
+ ret = taprio_enqueue_one(segs, sch, child, to_free);
+ if (ret != NET_XMIT_SUCCESS) {
+ if (net_xmit_drop_count(ret))
+ qdisc_qstats_drop(sch);
+ } else {
+ numsegs++;
+ }
+ }
+
+ if (numsegs > 1)
+ qdisc_tree_reduce_backlog(sch, 1 - numsegs, len - slen);
+ consume_skb(skb);
+
+ return numsegs > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
+ }
+
+ return taprio_enqueue_one(skb, sch, child, to_free);
+}
+
static struct sk_buff *taprio_peek_soft(struct Qdisc *sch)
{
struct taprio_sched *q = qdisc_priv(sch);
@@ -901,6 +945,12 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
list_for_each_entry(entry, &new->entries, list)
cycle = ktime_add_ns(cycle, entry->interval);
+
+ if (!cycle) {
+ NL_SET_ERR_MSG(extack, "'cycle_time' can never be 0");
+ return -EINVAL;
+ }
+
new->cycle_time = cycle;
}