diff options
Diffstat (limited to 'net/sched')
-rw-r--r-- | net/sched/cls_api.c | 57 | ||||
-rw-r--r-- | net/sched/cls_bpf.c | 2 | ||||
-rw-r--r-- | net/sched/cls_flow.c | 3 | ||||
-rw-r--r-- | net/sched/cls_flower.c | 2 | ||||
-rw-r--r-- | net/sched/cls_matchall.c | 2 | ||||
-rw-r--r-- | net/sched/cls_u32.c | 4 | ||||
-rw-r--r-- | net/sched/sch_api.c | 10 | ||||
-rw-r--r-- | net/sched/sch_cake.c | 183 | ||||
-rw-r--r-- | net/sched/sch_codel.c | 5 | ||||
-rw-r--r-- | net/sched/sch_ets.c | 2 | ||||
-rw-r--r-- | net/sched/sch_fifo.c | 3 | ||||
-rw-r--r-- | net/sched/sch_fq.c | 14 | ||||
-rw-r--r-- | net/sched/sch_fq_codel.c | 3 | ||||
-rw-r--r-- | net/sched/sch_fq_pie.c | 6 | ||||
-rw-r--r-- | net/sched/sch_generic.c | 59 | ||||
-rw-r--r-- | net/sched/sch_gred.c | 4 | ||||
-rw-r--r-- | net/sched/sch_netem.c | 22 | ||||
-rw-r--r-- | net/sched/sch_pie.c | 5 | ||||
-rw-r--r-- | net/sched/sch_red.c | 4 | ||||
-rw-r--r-- | net/sched/sch_sfb.c | 4 | ||||
-rw-r--r-- | net/sched/sch_sfq.c | 4 |
21 files changed, 223 insertions, 175 deletions
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 7578e27260c9..8e47e5355be6 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -390,6 +390,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, tp->protocol = protocol; tp->prio = prio; tp->chain = chain; + tp->usesw = !tp->ops->reoffload; spin_lock_init(&tp->lock); refcount_set(&tp->refcnt, 1); @@ -410,39 +411,31 @@ static void tcf_proto_get(struct tcf_proto *tp) refcount_inc(&tp->refcnt); } -static void tcf_maintain_bypass(struct tcf_block *block) +static void tcf_proto_count_usesw(struct tcf_proto *tp, bool add) { - int filtercnt = atomic_read(&block->filtercnt); - int skipswcnt = atomic_read(&block->skipswcnt); - bool bypass_wanted = filtercnt > 0 && filtercnt == skipswcnt; - - if (bypass_wanted != block->bypass_wanted) { #ifdef CONFIG_NET_CLS_ACT - if (bypass_wanted) - static_branch_inc(&tcf_bypass_check_needed_key); - else - static_branch_dec(&tcf_bypass_check_needed_key); -#endif - block->bypass_wanted = bypass_wanted; + struct tcf_block *block = tp->chain->block; + bool counted = false; + + if (!add) { + if (tp->usesw && tp->counted) { + if (!atomic_dec_return(&block->useswcnt)) + static_branch_dec(&tcf_sw_enabled_key); + tp->counted = false; + } + return; } -} - -static void tcf_block_filter_cnt_update(struct tcf_block *block, bool *counted, bool add) -{ - lockdep_assert_not_held(&block->cb_lock); - down_write(&block->cb_lock); - if (*counted != add) { - if (add) { - atomic_inc(&block->filtercnt); - *counted = true; - } else { - atomic_dec(&block->filtercnt); - *counted = false; - } + spin_lock(&tp->lock); + if (tp->usesw && !tp->counted) { + counted = true; + tp->counted = true; } - tcf_maintain_bypass(block); - up_write(&block->cb_lock); + spin_unlock(&tp->lock); + + if (counted && atomic_inc_return(&block->useswcnt) == 1) + static_branch_inc(&tcf_sw_enabled_key); +#endif } static void tcf_chain_put(struct tcf_chain *chain); @@ -451,7 +444,7 @@ static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held, bool sig_destroy, struct netlink_ext_ack *extack) { tp->ops->destroy(tp, rtnl_held, extack); - tcf_block_filter_cnt_update(tp->chain->block, &tp->counted, false); + tcf_proto_count_usesw(tp, false); if (sig_destroy) tcf_proto_signal_destroyed(tp->chain, tp); tcf_chain_put(tp->chain); @@ -2409,7 +2402,7 @@ replay: tfilter_notify(net, skb, n, tp, block, q, parent, fh, RTM_NEWTFILTER, false, rtnl_held, extack); tfilter_put(tp, fh); - tcf_block_filter_cnt_update(block, &tp->counted, true); + tcf_proto_count_usesw(tp, true); /* q pointer is NULL for shared blocks */ if (q) q->flags &= ~TCQ_F_CAN_BYPASS; @@ -3532,8 +3525,6 @@ static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) if (*flags & TCA_CLS_FLAGS_IN_HW) return; *flags |= TCA_CLS_FLAGS_IN_HW; - if (tc_skip_sw(*flags)) - atomic_inc(&block->skipswcnt); atomic_inc(&block->offloadcnt); } @@ -3542,8 +3533,6 @@ static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags) if (!(*flags & TCA_CLS_FLAGS_IN_HW)) return; *flags &= ~TCA_CLS_FLAGS_IN_HW; - if (tc_skip_sw(*flags)) - atomic_dec(&block->skipswcnt); atomic_dec(&block->offloadcnt); } diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 1941ebec23ff..7fbe42f0e5c2 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -509,6 +509,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (!tc_in_hw(prog->gen_flags)) prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW; + tcf_proto_update_usesw(tp, prog->gen_flags); + if (oldprog) { idr_replace(&head->handle_idr, prog, handle); list_replace_rcu(&oldprog->link, &prog->link); diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 5502998aace7..5c2580a07530 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -356,7 +356,8 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { [TCA_FLOW_KEYS] = { .type = NLA_U32 }, [TCA_FLOW_MODE] = { .type = NLA_U32 }, [TCA_FLOW_BASECLASS] = { .type = NLA_U32 }, - [TCA_FLOW_RSHIFT] = { .type = NLA_U32 }, + [TCA_FLOW_RSHIFT] = NLA_POLICY_MAX(NLA_U32, + 31 /* BITS_PER_U32 - 1 */), [TCA_FLOW_ADDEND] = { .type = NLA_U32 }, [TCA_FLOW_MASK] = { .type = NLA_U32 }, [TCA_FLOW_XOR] = { .type = NLA_U32 }, diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 1008ec8a464c..03505673d523 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -2503,6 +2503,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, if (!tc_in_hw(fnew->flags)) fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW; + tcf_proto_update_usesw(tp, fnew->flags); + spin_lock(&tp->lock); /* tp was deleted concurrently. -EAGAIN will cause caller to lookup diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 9f1e62ca508d..f03bf5da39ee 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -228,6 +228,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, if (!tc_in_hw(new->flags)) new->flags |= TCA_CLS_FLAGS_NOT_IN_HW; + tcf_proto_update_usesw(tp, new->flags); + *arg = head; rcu_assign_pointer(tp->root, new); return 0; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index d3a03c57545b..2a1c00048fd6 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -951,6 +951,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, if (!tc_in_hw(new->flags)) new->flags |= TCA_CLS_FLAGS_NOT_IN_HW; + tcf_proto_update_usesw(tp, new->flags); + u32_replace_knode(tp, tp_c, new); tcf_unbind_filter(tp, &n->res); tcf_exts_get_net(&n->exts); @@ -1164,6 +1166,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, if (!tc_in_hw(n->flags)) n->flags |= TCA_CLS_FLAGS_NOT_IN_HW; + tcf_proto_update_usesw(tp, n->flags); + ins = &ht->ht[TC_U32_HASH(handle)]; for (pins = rtnl_dereference(*ins); pins; ins = &pins->next, pins = rtnl_dereference(*ins)) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 300430b8c4d2..e3e91cf867eb 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1560,7 +1560,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, } if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) { - NL_SET_ERR_MSG(extack, "Invalid qdisc name"); + NL_SET_ERR_MSG(extack, "Invalid qdisc name: must match existing qdisc"); return -EINVAL; } @@ -1664,13 +1664,17 @@ replay: q = qdisc_lookup(dev, tcm->tcm_handle); if (!q) goto create_n_graft; + if (q->parent != tcm->tcm_parent) { + NL_SET_ERR_MSG(extack, "Cannot move an existing qdisc to a different parent"); + return -EINVAL; + } if (n->nlmsg_flags & NLM_F_EXCL) { NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override"); return -EEXIST; } if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) { - NL_SET_ERR_MSG(extack, "Invalid qdisc name"); + NL_SET_ERR_MSG(extack, "Invalid qdisc name: must match existing qdisc"); return -EINVAL; } if (q->flags & TCQ_F_INGRESS) { @@ -1746,7 +1750,7 @@ replay: return -EEXIST; } if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) { - NL_SET_ERR_MSG(extack, "Invalid qdisc name"); + NL_SET_ERR_MSG(extack, "Invalid qdisc name: must match existing qdisc"); return -EINVAL; } err = qdisc_change(q, tca, extack); diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 8d8b2db4653c..48dd8c88903f 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -484,13 +484,14 @@ static bool cobalt_queue_empty(struct cobalt_vars *vars, /* Call this with a freshly dequeued packet for possible congestion marking. * Returns true as an instruction to drop the packet, false for delivery. */ -static bool cobalt_should_drop(struct cobalt_vars *vars, - struct cobalt_params *p, - ktime_t now, - struct sk_buff *skb, - u32 bulk_flows) +static enum skb_drop_reason cobalt_should_drop(struct cobalt_vars *vars, + struct cobalt_params *p, + ktime_t now, + struct sk_buff *skb, + u32 bulk_flows) { - bool next_due, over_target, drop = false; + enum skb_drop_reason reason = SKB_NOT_DROPPED_YET; + bool next_due, over_target; ktime_t schedule; u64 sojourn; @@ -533,7 +534,8 @@ static bool cobalt_should_drop(struct cobalt_vars *vars, if (next_due && vars->dropping) { /* Use ECN mark if possible, otherwise drop */ - drop = !(vars->ecn_marked = INET_ECN_set_ce(skb)); + if (!(vars->ecn_marked = INET_ECN_set_ce(skb))) + reason = SKB_DROP_REASON_QDISC_CONGESTED; vars->count++; if (!vars->count) @@ -556,16 +558,17 @@ static bool cobalt_should_drop(struct cobalt_vars *vars, } /* Simple BLUE implementation. Lack of ECN is deliberate. */ - if (vars->p_drop) - drop |= (get_random_u32() < vars->p_drop); + if (vars->p_drop && reason == SKB_NOT_DROPPED_YET && + get_random_u32() < vars->p_drop) + reason = SKB_DROP_REASON_CAKE_FLOOD; /* Overload the drop_next field as an activity timeout */ if (!vars->count) vars->drop_next = ktime_add_ns(now, p->interval); - else if (ktime_to_ns(schedule) > 0 && !drop) + else if (ktime_to_ns(schedule) > 0 && reason == SKB_NOT_DROPPED_YET) vars->drop_next = now; - return drop; + return reason; } static bool cake_update_flowkeys(struct flow_keys *keys, @@ -627,6 +630,63 @@ static bool cake_ddst(int flow_mode) return (flow_mode & CAKE_FLOW_DUAL_DST) == CAKE_FLOW_DUAL_DST; } +static void cake_dec_srchost_bulk_flow_count(struct cake_tin_data *q, + struct cake_flow *flow, + int flow_mode) +{ + if (likely(cake_dsrc(flow_mode) && + q->hosts[flow->srchost].srchost_bulk_flow_count)) + q->hosts[flow->srchost].srchost_bulk_flow_count--; +} + +static void cake_inc_srchost_bulk_flow_count(struct cake_tin_data *q, + struct cake_flow *flow, + int flow_mode) +{ + if (likely(cake_dsrc(flow_mode) && + q->hosts[flow->srchost].srchost_bulk_flow_count < CAKE_QUEUES)) + q->hosts[flow->srchost].srchost_bulk_flow_count++; +} + +static void cake_dec_dsthost_bulk_flow_count(struct cake_tin_data *q, + struct cake_flow *flow, + int flow_mode) +{ + if (likely(cake_ddst(flow_mode) && + q->hosts[flow->dsthost].dsthost_bulk_flow_count)) + q->hosts[flow->dsthost].dsthost_bulk_flow_count--; +} + +static void cake_inc_dsthost_bulk_flow_count(struct cake_tin_data *q, + struct cake_flow *flow, + int flow_mode) +{ + if (likely(cake_ddst(flow_mode) && + q->hosts[flow->dsthost].dsthost_bulk_flow_count < CAKE_QUEUES)) + q->hosts[flow->dsthost].dsthost_bulk_flow_count++; +} + +static u16 cake_get_flow_quantum(struct cake_tin_data *q, + struct cake_flow *flow, + int flow_mode) +{ + u16 host_load = 1; + + if (cake_dsrc(flow_mode)) + host_load = max(host_load, + q->hosts[flow->srchost].srchost_bulk_flow_count); + + if (cake_ddst(flow_mode)) + host_load = max(host_load, + q->hosts[flow->dsthost].dsthost_bulk_flow_count); + + /* The get_random_u16() is a way to apply dithering to avoid + * accumulating roundoff errors + */ + return (q->flow_quantum * quantum_div[host_load] + + get_random_u16()) >> 16; +} + static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb, int flow_mode, u16 flow_override, u16 host_override) { @@ -773,10 +833,8 @@ skip_hash: allocate_dst = cake_ddst(flow_mode); if (q->flows[outer_hash + k].set == CAKE_SET_BULK) { - if (allocate_src) - q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--; - if (allocate_dst) - q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--; + cake_dec_srchost_bulk_flow_count(q, &q->flows[outer_hash + k], flow_mode); + cake_dec_dsthost_bulk_flow_count(q, &q->flows[outer_hash + k], flow_mode); } found: /* reserve queue for future packets in same flow */ @@ -801,9 +859,10 @@ found: q->hosts[outer_hash + k].srchost_tag = srchost_hash; found_src: srchost_idx = outer_hash + k; - if (q->flows[reduced_hash].set == CAKE_SET_BULK) - q->hosts[srchost_idx].srchost_bulk_flow_count++; q->flows[reduced_hash].srchost = srchost_idx; + + if (q->flows[reduced_hash].set == CAKE_SET_BULK) + cake_inc_srchost_bulk_flow_count(q, &q->flows[reduced_hash], flow_mode); } if (allocate_dst) { @@ -824,9 +883,10 @@ found_src: q->hosts[outer_hash + k].dsthost_tag = dsthost_hash; found_dst: dsthost_idx = outer_hash + k; - if (q->flows[reduced_hash].set == CAKE_SET_BULK) - q->hosts[dsthost_idx].dsthost_bulk_flow_count++; q->flows[reduced_hash].dsthost = dsthost_idx; + + if (q->flows[reduced_hash].set == CAKE_SET_BULK) + cake_inc_dsthost_bulk_flow_count(q, &q->flows[reduced_hash], flow_mode); } } @@ -1528,12 +1588,11 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) flow->dropped++; b->tin_dropped++; - sch->qstats.drops++; if (q->rate_flags & CAKE_FLAG_INGRESS) cake_advance_shaper(q, b, skb, now, true); - __qdisc_drop(skb, to_free); + qdisc_drop_reason(skb, sch, to_free, SKB_DROP_REASON_QDISC_OVERLIMIT); sch->q.qlen--; qdisc_tree_reduce_backlog(sch, 1, len); @@ -1839,10 +1898,6 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* flowchain */ if (!flow->set || flow->set == CAKE_SET_DECAYING) { - struct cake_host *srchost = &b->hosts[flow->srchost]; - struct cake_host *dsthost = &b->hosts[flow->dsthost]; - u16 host_load = 1; - if (!flow->set) { list_add_tail(&flow->flowchain, &b->new_flows); } else { @@ -1852,18 +1907,8 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, flow->set = CAKE_SET_SPARSE; b->sparse_flow_count++; - if (cake_dsrc(q->flow_mode)) - host_load = max(host_load, srchost->srchost_bulk_flow_count); - - if (cake_ddst(q->flow_mode)) - host_load = max(host_load, dsthost->dsthost_bulk_flow_count); - - flow->deficit = (b->flow_quantum * - quantum_div[host_load]) >> 16; + flow->deficit = cake_get_flow_quantum(b, flow, q->flow_mode); } else if (flow->set == CAKE_SET_SPARSE_WAIT) { - struct cake_host *srchost = &b->hosts[flow->srchost]; - struct cake_host *dsthost = &b->hosts[flow->dsthost]; - /* this flow was empty, accounted as a sparse flow, but actually * in the bulk rotation. */ @@ -1871,12 +1916,8 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, b->sparse_flow_count--; b->bulk_flow_count++; - if (cake_dsrc(q->flow_mode)) - srchost->srchost_bulk_flow_count++; - - if (cake_ddst(q->flow_mode)) - dsthost->dsthost_bulk_flow_count++; - + cake_inc_srchost_bulk_flow_count(b, flow, q->flow_mode); + cake_inc_dsthost_bulk_flow_count(b, flow, q->flow_mode); } if (q->buffer_used > q->buffer_max_used) @@ -1926,20 +1967,19 @@ static void cake_clear_tin(struct Qdisc *sch, u16 tin) q->cur_tin = tin; for (q->cur_flow = 0; q->cur_flow < CAKE_QUEUES; q->cur_flow++) while (!!(skb = cake_dequeue_one(sch))) - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_QUEUE_PURGE); } static struct sk_buff *cake_dequeue(struct Qdisc *sch) { struct cake_sched_data *q = qdisc_priv(sch); struct cake_tin_data *b = &q->tins[q->cur_tin]; - struct cake_host *srchost, *dsthost; + enum skb_drop_reason reason; ktime_t now = ktime_get(); struct cake_flow *flow; struct list_head *head; bool first_flow = true; struct sk_buff *skb; - u16 host_load; u64 delay; u32 len; @@ -2039,11 +2079,6 @@ retry: q->cur_flow = flow - b->flows; first_flow = false; - /* triple isolation (modified DRR++) */ - srchost = &b->hosts[flow->srchost]; - dsthost = &b->hosts[flow->dsthost]; - host_load = 1; - /* flow isolation (DRR++) */ if (flow->deficit <= 0) { /* Keep all flows with deficits out of the sparse and decaying @@ -2055,11 +2090,8 @@ retry: b->sparse_flow_count--; b->bulk_flow_count++; - if (cake_dsrc(q->flow_mode)) - srchost->srchost_bulk_flow_count++; - - if (cake_ddst(q->flow_mode)) - dsthost->dsthost_bulk_flow_count++; + cake_inc_srchost_bulk_flow_count(b, flow, q->flow_mode); + cake_inc_dsthost_bulk_flow_count(b, flow, q->flow_mode); flow->set = CAKE_SET_BULK; } else { @@ -2071,19 +2103,7 @@ retry: } } - if (cake_dsrc(q->flow_mode)) - host_load = max(host_load, srchost->srchost_bulk_flow_count); - - if (cake_ddst(q->flow_mode)) - host_load = max(host_load, dsthost->dsthost_bulk_flow_count); - - WARN_ON(host_load > CAKE_QUEUES); - - /* The get_random_u16() is a way to apply dithering to avoid - * accumulating roundoff errors - */ - flow->deficit += (b->flow_quantum * quantum_div[host_load] + - get_random_u16()) >> 16; + flow->deficit += cake_get_flow_quantum(b, flow, q->flow_mode); list_move_tail(&flow->flowchain, &b->old_flows); goto retry; @@ -2107,11 +2127,8 @@ retry: if (flow->set == CAKE_SET_BULK) { b->bulk_flow_count--; - if (cake_dsrc(q->flow_mode)) - srchost->srchost_bulk_flow_count--; - - if (cake_ddst(q->flow_mode)) - dsthost->dsthost_bulk_flow_count--; + cake_dec_srchost_bulk_flow_count(b, flow, q->flow_mode); + cake_dec_dsthost_bulk_flow_count(b, flow, q->flow_mode); b->decaying_flow_count++; } else if (flow->set == CAKE_SET_SPARSE || @@ -2129,12 +2146,8 @@ retry: else if (flow->set == CAKE_SET_BULK) { b->bulk_flow_count--; - if (cake_dsrc(q->flow_mode)) - srchost->srchost_bulk_flow_count--; - - if (cake_ddst(q->flow_mode)) - dsthost->dsthost_bulk_flow_count--; - + cake_dec_srchost_bulk_flow_count(b, flow, q->flow_mode); + cake_dec_dsthost_bulk_flow_count(b, flow, q->flow_mode); } else b->decaying_flow_count--; @@ -2143,12 +2156,12 @@ retry: goto begin; } + reason = cobalt_should_drop(&flow->cvars, &b->cparams, now, skb, + (b->bulk_flow_count * + !!(q->rate_flags & + CAKE_FLAG_INGRESS))); /* Last packet in queue may be marked, shouldn't be dropped */ - if (!cobalt_should_drop(&flow->cvars, &b->cparams, now, skb, - (b->bulk_flow_count * - !!(q->rate_flags & - CAKE_FLAG_INGRESS))) || - !flow->head) + if (reason == SKB_NOT_DROPPED_YET || !flow->head) break; /* drop this packet, get another one */ @@ -2162,7 +2175,7 @@ retry: b->tin_dropped++; qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb)); qdisc_qstats_drop(sch); - kfree_skb(skb); + kfree_skb_reason(skb, reason); if (q->rate_flags & CAKE_FLAG_INGRESS) goto retry; } diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 3e8d4fe4d91e..81189d02fee7 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -52,7 +52,7 @@ static void drop_func(struct sk_buff *skb, void *ctx) { struct Qdisc *sch = ctx; - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_QDISC_CONGESTED); qdisc_qstats_drop(sch); } @@ -89,7 +89,8 @@ static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, } q = qdisc_priv(sch); q->drop_overlimit++; - return qdisc_drop(skb, sch, to_free); + return qdisc_drop_reason(skb, sch, to_free, + SKB_DROP_REASON_QDISC_OVERLIMIT); } static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = { diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index f80bc05d4c5a..516038a44163 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -91,6 +91,8 @@ ets_class_from_arg(struct Qdisc *sch, unsigned long arg) { struct ets_sched *q = qdisc_priv(sch); + if (arg == 0 || arg > q->nbands) + return NULL; return &q->classes[arg - 1]; } diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index b50b2c2cc09b..e6bfd39ff339 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -40,6 +40,9 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, { unsigned int prev_backlog; + if (unlikely(READ_ONCE(sch->limit) == 0)) + return qdisc_drop(skb, sch, to_free); + if (likely(sch->q.qlen < READ_ONCE(sch->limit))) return qdisc_enqueue_tail(skb, sch); diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index a5e87f9ea986..2ca5332cfcc5 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -537,6 +537,8 @@ static bool fq_packet_beyond_horizon(const struct sk_buff *skb, return unlikely((s64)skb->tstamp > (s64)(now + q->horizon)); } +#define FQDR(reason) SKB_DROP_REASON_FQ_##reason + static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { @@ -548,7 +550,8 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, band = fq_prio2band(q->prio2band, skb->priority & TC_PRIO_MAX); if (unlikely(q->band_pkt_count[band] >= sch->limit)) { q->stat_band_drops[band]++; - return qdisc_drop(skb, sch, to_free); + return qdisc_drop_reason(skb, sch, to_free, + FQDR(BAND_LIMIT)); } now = ktime_get_ns(); @@ -558,8 +561,9 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* Check if packet timestamp is too far in the future. */ if (fq_packet_beyond_horizon(skb, q, now)) { if (q->horizon_drop) { - q->stat_horizon_drops++; - return qdisc_drop(skb, sch, to_free); + q->stat_horizon_drops++; + return qdisc_drop_reason(skb, sch, to_free, + FQDR(HORIZON_LIMIT)); } q->stat_horizon_caps++; skb->tstamp = now + q->horizon; @@ -572,7 +576,8 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (f != &q->internal) { if (unlikely(f->qlen >= q->flow_plimit)) { q->stat_flows_plimit++; - return qdisc_drop(skb, sch, to_free); + return qdisc_drop_reason(skb, sch, to_free, + FQDR(FLOW_LIMIT)); } if (fq_flow_is_detached(f)) { @@ -597,6 +602,7 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, return NET_XMIT_SUCCESS; } +#undef FQDR static void fq_check_throttled(struct fq_sched_data *q, u64 now) { diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 4f908c11ba95..799f5397ad4c 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -168,6 +168,7 @@ static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets, skb = dequeue_head(flow); len += qdisc_pkt_len(skb); mem += get_codel_cb(skb)->mem_usage; + tcf_set_drop_reason(skb, SKB_DROP_REASON_QDISC_OVERLIMIT); __qdisc_drop(skb, to_free); } while (++i < max_packets && len < threshold); @@ -274,7 +275,7 @@ static void drop_func(struct sk_buff *skb, void *ctx) { struct Qdisc *sch = ctx; - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_QDISC_CONGESTED); qdisc_qstats_drop(sch); } diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index c38f33ff80bd..93c36afbf576 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -130,6 +130,7 @@ static inline void flow_queue_add(struct fq_pie_flow *flow, static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { + enum skb_drop_reason reason = SKB_DROP_REASON_QDISC_OVERLIMIT; struct fq_pie_sched_data *q = qdisc_priv(sch); struct fq_pie_flow *sel_flow; int ret; @@ -161,6 +162,8 @@ static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, q->overmemory++; } + reason = SKB_DROP_REASON_QDISC_CONGESTED; + if (!pie_drop_early(sch, &q->p_params, &sel_flow->vars, sel_flow->backlog, skb->len)) { enqueue = true; @@ -198,8 +201,7 @@ static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, out: q->stats.dropped++; sel_flow->vars.accu_prob = 0; - __qdisc_drop(skb, to_free); - qdisc_qstats_drop(sch); + qdisc_drop_reason(skb, sch, to_free, reason); return NET_XMIT_CN; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 38ec18f73de4..14ab2f4c190a 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -551,25 +551,20 @@ static void dev_watchdog(struct timer_list *t) netdev_put(dev, &dev->watchdog_dev_tracker); } -void __netdev_watchdog_up(struct net_device *dev) -{ - if (dev->netdev_ops->ndo_tx_timeout) { - if (dev->watchdog_timeo <= 0) - dev->watchdog_timeo = 5*HZ; - if (!mod_timer(&dev->watchdog_timer, - round_jiffies(jiffies + dev->watchdog_timeo))) - netdev_hold(dev, &dev->watchdog_dev_tracker, - GFP_ATOMIC); - } -} -EXPORT_SYMBOL_GPL(__netdev_watchdog_up); - -static void dev_watchdog_up(struct net_device *dev) +void netdev_watchdog_up(struct net_device *dev) { - __netdev_watchdog_up(dev); + if (!dev->netdev_ops->ndo_tx_timeout) + return; + if (dev->watchdog_timeo <= 0) + dev->watchdog_timeo = 5*HZ; + if (!mod_timer(&dev->watchdog_timer, + round_jiffies(jiffies + dev->watchdog_timeo))) + netdev_hold(dev, &dev->watchdog_dev_tracker, + GFP_ATOMIC); } +EXPORT_SYMBOL_GPL(netdev_watchdog_up); -static void dev_watchdog_down(struct net_device *dev) +static void netdev_watchdog_down(struct net_device *dev) { netif_tx_lock_bh(dev); if (del_timer(&dev->watchdog_timer)) @@ -591,7 +586,7 @@ void netif_carrier_on(struct net_device *dev) atomic_inc(&dev->carrier_up_count); linkwatch_fire_event(dev); if (netif_running(dev)) - __netdev_watchdog_up(dev); + netdev_watchdog_up(dev); } } EXPORT_SYMBOL(netif_carrier_on); @@ -911,8 +906,8 @@ static int pfifo_fast_change_tx_queue_len(struct Qdisc *sch, bands[prio] = q; } - return skb_array_resize_multiple(bands, PFIFO_FAST_BANDS, new_len, - GFP_KERNEL); + return skb_array_resize_multiple_bh(bands, PFIFO_FAST_BANDS, new_len, + GFP_KERNEL); } struct Qdisc_ops pfifo_fast_ops __read_mostly = { @@ -1267,7 +1262,7 @@ void dev_activate(struct net_device *dev) if (need_watchdog) { netif_trans_update(dev); - dev_watchdog_up(dev); + netdev_watchdog_up(dev); } } EXPORT_SYMBOL(dev_activate); @@ -1282,15 +1277,17 @@ static void qdisc_deactivate(struct Qdisc *qdisc) static void dev_deactivate_queue(struct net_device *dev, struct netdev_queue *dev_queue, - void *_qdisc_default) + void *_sync_needed) { - struct Qdisc *qdisc_default = _qdisc_default; + bool *sync_needed = _sync_needed; struct Qdisc *qdisc; qdisc = rtnl_dereference(dev_queue->qdisc); if (qdisc) { + if (qdisc->enqueue) + *sync_needed = true; qdisc_deactivate(qdisc); - rcu_assign_pointer(dev_queue->qdisc, qdisc_default); + rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc); } } @@ -1357,24 +1354,22 @@ static bool some_qdisc_is_busy(struct net_device *dev) */ void dev_deactivate_many(struct list_head *head) { + bool sync_needed = false; struct net_device *dev; list_for_each_entry(dev, head, close_list) { netdev_for_each_tx_queue(dev, dev_deactivate_queue, - &noop_qdisc); + &sync_needed); if (dev_ingress_queue(dev)) dev_deactivate_queue(dev, dev_ingress_queue(dev), - &noop_qdisc); + &sync_needed); - dev_watchdog_down(dev); + netdev_watchdog_down(dev); } - /* Wait for outstanding qdisc-less dev_queue_xmit calls or - * outstanding qdisc enqueuing calls. - * This is avoided if all devices are in dismantle phase : - * Caller will call synchronize_net() for us - */ - synchronize_net(); + /* Wait for outstanding qdisc enqueuing calls. */ + if (sync_needed) + synchronize_net(); list_for_each_entry(dev, head, close_list) { netdev_for_each_tx_queue(dev, dev_reset_queue, NULL); diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 7d2151c62c4a..ab6234b4fcd5 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -251,10 +251,10 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch, q->stats.pdrop++; drop: - return qdisc_drop(skb, sch, to_free); + return qdisc_drop_reason(skb, sch, to_free, SKB_DROP_REASON_QDISC_OVERLIMIT); congestion_drop: - qdisc_drop(skb, sch, to_free); + qdisc_drop_reason(skb, sch, to_free, SKB_DROP_REASON_QDISC_CONGESTED); return NET_XMIT_CN; } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index fe6fed291a7b..fdd79d3ccd8c 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -79,6 +79,8 @@ struct netem_sched_data { struct sk_buff *t_head; struct sk_buff *t_tail; + u32 t_len; + /* optional qdisc for classful handling (NULL at netem init) */ struct Qdisc *qdisc; @@ -383,6 +385,7 @@ static void tfifo_reset(struct Qdisc *sch) rtnl_kfree_skbs(q->t_head, q->t_tail); q->t_head = NULL; q->t_tail = NULL; + q->t_len = 0; } static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) @@ -412,6 +415,7 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) rb_link_node(&nskb->rbnode, parent, p); rb_insert_color(&nskb->rbnode, &q->t_root); } + q->t_len++; sch->q.qlen++; } @@ -518,7 +522,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, 1<<get_random_u32_below(8); } - if (unlikely(sch->q.qlen >= sch->limit)) { + if (unlikely(q->t_len >= sch->limit)) { /* re-link segs, so that qdisc_drop_all() frees them all */ skb->next = segs; qdisc_drop_all(skb, sch, to_free); @@ -702,8 +706,8 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) tfifo_dequeue: skb = __qdisc_dequeue_head(&sch->q); if (skb) { - qdisc_qstats_backlog_dec(sch, skb); deliver: + qdisc_qstats_backlog_dec(sch, skb); qdisc_bstats_update(sch, skb); return skb; } @@ -719,8 +723,7 @@ deliver: if (time_to_send <= now && q->slot.slot_next <= now) { netem_erase_head(q, skb); - sch->q.qlen--; - qdisc_qstats_backlog_dec(sch, skb); + q->t_len--; skb->next = NULL; skb->prev = NULL; /* skb->dev shares skb->rbnode area, @@ -746,17 +749,22 @@ deliver: if (err != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(err)) qdisc_qstats_drop(sch); + sch->qstats.backlog -= pkt_len; + sch->q.qlen--; qdisc_tree_reduce_backlog(sch, 1, pkt_len); } goto tfifo_dequeue; } + sch->q.qlen--; goto deliver; } if (q->qdisc) { skb = q->qdisc->ops->dequeue(q->qdisc); - if (skb) + if (skb) { + sch->q.qlen--; goto deliver; + } } qdisc_watchdog_schedule_ns(&q->watchdog, @@ -766,8 +774,10 @@ deliver: if (q->qdisc) { skb = q->qdisc->ops->dequeue(q->qdisc); - if (skb) + if (skb) { + sch->q.qlen--; goto deliver; + } } return NULL; } diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index b3dcb845b327..bb1fa9aa530b 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -85,6 +85,7 @@ EXPORT_SYMBOL_GPL(pie_drop_early); static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { + enum skb_drop_reason reason = SKB_DROP_REASON_QDISC_OVERLIMIT; struct pie_sched_data *q = qdisc_priv(sch); bool enqueue = false; @@ -93,6 +94,8 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, goto out; } + reason = SKB_DROP_REASON_QDISC_CONGESTED; + if (!pie_drop_early(sch, &q->params, &q->vars, sch->qstats.backlog, skb->len)) { enqueue = true; @@ -121,7 +124,7 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, out: q->stats.dropped++; q->vars.accu_prob = 0; - return qdisc_drop(skb, sch, to_free); + return qdisc_drop_reason(skb, sch, to_free, reason); } static const struct nla_policy pie_policy[TCA_PIE_MAX + 1] = { diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 6029bc29b51e..ef8a2afed26b 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -70,6 +70,7 @@ static int red_use_nodrop(struct red_sched_data *q) static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { + enum skb_drop_reason reason = SKB_DROP_REASON_QDISC_CONGESTED; struct red_sched_data *q = qdisc_priv(sch); struct Qdisc *child = q->qdisc; unsigned int len; @@ -107,6 +108,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, break; case RED_HARD_MARK: + reason = SKB_DROP_REASON_QDISC_OVERLIMIT; qdisc_qstats_overlimit(sch); if (red_use_harddrop(q) || !red_use_ecn(q)) { q->stats.forced_drop++; @@ -143,7 +145,7 @@ congestion_drop: if (!skb) return NET_XMIT_CN | ret; - qdisc_drop(skb, sch, to_free); + qdisc_drop_reason(skb, sch, to_free, reason); return NET_XMIT_CN; } diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index b717e15a3a17..d2835f1168e1 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -280,6 +280,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { + enum skb_drop_reason reason = SKB_DROP_REASON_QDISC_OVERLIMIT; struct sfb_sched_data *q = qdisc_priv(sch); unsigned int len = qdisc_pkt_len(skb); struct Qdisc *child = q->qdisc; @@ -380,6 +381,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, } r = get_random_u16() & SFB_MAX_PROB; + reason = SKB_DROP_REASON_QDISC_CONGESTED; if (unlikely(r < p_min)) { if (unlikely(p_min > SFB_MAX_PROB / 2)) { @@ -414,7 +416,7 @@ enqueue: return ret; drop: - qdisc_drop(skb, sch, to_free); + qdisc_drop_reason(skb, sch, to_free, reason); return NET_XMIT_CN; other_drop: if (ret & __NET_XMIT_BYPASS) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index a4b8296a2fa1..65d5b59da583 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -652,6 +652,10 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt, if (!p) return -ENOMEM; } + if (ctl->limit == 1) { + NL_SET_ERR_MSG_MOD(extack, "invalid limit"); + return -EINVAL; + } sch_tree_lock(sch); if (ctl->quantum) q->quantum = ctl->quantum; |