diff options
Diffstat (limited to 'net/sched')
| -rw-r--r-- | net/sched/Kconfig | 4 | ||||
| -rw-r--r-- | net/sched/act_ct.c | 3 | ||||
| -rw-r--r-- | net/sched/cls_flower.c | 40 | ||||
| -rw-r--r-- | net/sched/cls_fw.c | 1 | ||||
| -rw-r--r-- | net/sched/cls_route.c | 1 | ||||
| -rw-r--r-- | net/sched/cls_u32.c | 57 | ||||
| -rw-r--r-- | net/sched/em_meta.c | 6 | ||||
| -rw-r--r-- | net/sched/sch_api.c | 53 | ||||
| -rw-r--r-- | net/sched/sch_drr.c | 11 | ||||
| -rw-r--r-- | net/sched/sch_hfsc.c | 14 | ||||
| -rw-r--r-- | net/sched/sch_htb.c | 17 | ||||
| -rw-r--r-- | net/sched/sch_ingress.c | 61 | ||||
| -rw-r--r-- | net/sched/sch_netem.c | 49 | ||||
| -rw-r--r-- | net/sched/sch_qfq.c | 12 | ||||
| -rw-r--r-- | net/sched/sch_taprio.c | 83 | 
15 files changed, 310 insertions, 102 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 4b95cb1ac435..470c70deffe2 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -347,8 +347,7 @@ config NET_SCH_FQ_PIE  config NET_SCH_INGRESS  	tristate "Ingress/classifier-action Qdisc"  	depends on NET_CLS_ACT -	select NET_INGRESS -	select NET_EGRESS +	select NET_XGRESS  	help  	  Say Y here if you want to use classifiers for incoming and/or outgoing  	  packets. This qdisc doesn't do anything else besides running classifiers, @@ -679,6 +678,7 @@ config NET_EMATCH_IPT  config NET_CLS_ACT  	bool "Actions"  	select NET_CLS +	select NET_XGRESS  	help  	  Say Y here if you want to use traffic control actions. Actions  	  get attached to classifiers and are invoked after a successful diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index abc71a06d634..7c652d14528b 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1238,7 +1238,8 @@ static int tcf_ct_fill_params(struct net *net,  		}  	} -	__set_bit(IPS_CONFIRMED_BIT, &tmpl->status); +	if (p->ct_action & TCA_CT_ACT_COMMIT) +		__set_bit(IPS_CONFIRMED_BIT, &tmpl->status);  	return 0;  err:  	nf_ct_put(p->tmpl); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 8da9d039d964..e5314a31f75a 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -72,6 +72,7 @@ struct fl_flow_key {  	struct flow_dissector_key_num_of_vlans num_of_vlans;  	struct flow_dissector_key_pppoe pppoe;  	struct flow_dissector_key_l2tpv3 l2tpv3; +	struct flow_dissector_key_ipsec ipsec;  	struct flow_dissector_key_cfm cfm;  } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ @@ -726,6 +727,8 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {  	[TCA_FLOWER_KEY_PPPOE_SID]	= { .type = NLA_U16 },  	[TCA_FLOWER_KEY_PPP_PROTO]	= { .type = NLA_U16 },  	[TCA_FLOWER_KEY_L2TPV3_SID]	= { .type = NLA_U32 }, +	[TCA_FLOWER_KEY_SPI]		= { .type = NLA_U32 }, +	[TCA_FLOWER_KEY_SPI_MASK]	= { .type = NLA_U32 },  	[TCA_FLOWER_L2_MISS]		= NLA_POLICY_MAX(NLA_U8, 1),  	[TCA_FLOWER_KEY_CFM]		= { .type = NLA_NESTED },  }; @@ -776,7 +779,8 @@ mpls_stack_entry_policy[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1] = {  	[TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL]    = { .type = NLA_U32 },  }; -static const struct nla_policy cfm_opt_policy[TCA_FLOWER_KEY_CFM_OPT_MAX] = { +static const struct nla_policy +cfm_opt_policy[TCA_FLOWER_KEY_CFM_OPT_MAX + 1] = {  	[TCA_FLOWER_KEY_CFM_MD_LEVEL]	= NLA_POLICY_MAX(NLA_U8,  						FLOW_DIS_CFM_MDL_MAX),  	[TCA_FLOWER_KEY_CFM_OPCODE]	= { .type = NLA_U8 }, @@ -795,6 +799,24 @@ static void fl_set_key_val(struct nlattr **tb,  		nla_memcpy(mask, tb[mask_type], len);  } +static int fl_set_key_spi(struct nlattr **tb, struct fl_flow_key *key, +			  struct fl_flow_key *mask, +			  struct netlink_ext_ack *extack) +{ +	if (key->basic.ip_proto != IPPROTO_ESP && +	    key->basic.ip_proto != IPPROTO_AH) { +		NL_SET_ERR_MSG(extack, +			       "Protocol must be either ESP or AH"); +		return -EINVAL; +	} + +	fl_set_key_val(tb, &key->ipsec.spi, +		       TCA_FLOWER_KEY_SPI, +		       &mask->ipsec.spi, TCA_FLOWER_KEY_SPI_MASK, +		       sizeof(key->ipsec.spi)); +	return 0; +} +  static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key,  				 struct fl_flow_key *mask,  				 struct netlink_ext_ack *extack) @@ -1709,7 +1731,7 @@ static int fl_set_key_cfm(struct nlattr **tb,  			  struct fl_flow_key *mask,  			  struct netlink_ext_ack *extack)  { -	struct nlattr *nla_cfm_opt[TCA_FLOWER_KEY_CFM_OPT_MAX]; +	struct nlattr *nla_cfm_opt[TCA_FLOWER_KEY_CFM_OPT_MAX + 1];  	int err;  	if (!tb[TCA_FLOWER_KEY_CFM]) @@ -1894,6 +1916,12 @@ static int fl_set_key(struct net *net, struct nlattr **tb,  			return ret;  	} +	if (tb[TCA_FLOWER_KEY_SPI]) { +		ret = fl_set_key_spi(tb, key, mask, extack); +		if (ret) +			return ret; +	} +  	if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] ||  	    tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) {  		key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; @@ -2067,6 +2095,8 @@ static void fl_init_dissector(struct flow_dissector *dissector,  	FL_KEY_SET_IF_MASKED(mask, keys, cnt,  			     FLOW_DISSECTOR_KEY_L2TPV3, l2tpv3);  	FL_KEY_SET_IF_MASKED(mask, keys, cnt, +			     FLOW_DISSECTOR_KEY_IPSEC, ipsec); +	FL_KEY_SET_IF_MASKED(mask, keys, cnt,  			     FLOW_DISSECTOR_KEY_CFM, cfm);  	skb_flow_dissector_init(dissector, keys, cnt); @@ -3364,6 +3394,12 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,  				 sizeof(key->l2tpv3.session_id)))  		goto nla_put_failure; +	if (key->ipsec.spi && +	    fl_dump_key_val(skb, &key->ipsec.spi, TCA_FLOWER_KEY_SPI, +			    &mask->ipsec.spi, TCA_FLOWER_KEY_SPI_MASK, +			    sizeof(key->ipsec.spi))) +		goto nla_put_failure; +  	if ((key->basic.ip_proto == IPPROTO_TCP ||  	     key->basic.ip_proto == IPPROTO_UDP ||  	     key->basic.ip_proto == IPPROTO_SCTP) && diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 8641f8059317..c49d6af0e048 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -267,7 +267,6 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,  			return -ENOBUFS;  		fnew->id = f->id; -		fnew->res = f->res;  		fnew->ifindex = f->ifindex;  		fnew->tp = f->tp; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index d0c53724d3e8..1e20bbd687f1 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -513,7 +513,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,  	if (fold) {  		f->id = fold->id;  		f->iif = fold->iif; -		f->res = fold->res;  		f->handle = fold->handle;  		f->tp = fold->tp; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 5abf31e432ca..da4c179a4d41 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -826,7 +826,6 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,  	new->ifindex = n->ifindex;  	new->fshift = n->fshift; -	new->res = n->res;  	new->flags = n->flags;  	RCU_INIT_POINTER(new->ht_down, ht); @@ -1024,18 +1023,62 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,  		return -EINVAL;  	} +	/* At this point, we need to derive the new handle that will be used to +	 * uniquely map the identity of this table match entry. The +	 * identity of the entry that we need to construct is 32 bits made of: +	 *     htid(12b):bucketid(8b):node/entryid(12b) +	 * +	 * At this point _we have the table(ht)_ in which we will insert this +	 * entry. We carry the table's id in variable "htid". +	 * Note that earlier code picked the ht selection either by a) the user +	 * providing the htid specified via TCA_U32_HASH attribute or b) when +	 * no such attribute is passed then the root ht, is default to at ID +	 * 0x[800][00][000]. Rule: the root table has a single bucket with ID 0. +	 * If OTOH the user passed us the htid, they may also pass a bucketid of +	 * choice. 0 is fine. For example a user htid is 0x[600][01][000] it is +	 * indicating hash bucketid of 1. Rule: the entry/node ID _cannot_ be +	 * passed via the htid, so even if it was non-zero it will be ignored. +	 * +	 * We may also have a handle, if the user passed one. The handle also +	 * carries the same addressing of htid(12b):bucketid(8b):node/entryid(12b). +	 * Rule: the bucketid on the handle is ignored even if one was passed; +	 * rather the value on "htid" is always assumed to be the bucketid. +	 */  	if (handle) { +		/* Rule: The htid from handle and tableid from htid must match */  		if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) {  			NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch");  			return -EINVAL;  		} -		handle = htid | TC_U32_NODE(handle); -		err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, handle, -				    GFP_KERNEL); -		if (err) -			return err; -	} else +		/* Ok, so far we have a valid htid(12b):bucketid(8b) but we +		 * need to finalize the table entry identification with the last +		 * part - the node/entryid(12b)). Rule: Nodeid _cannot be 0_ for +		 * entries. Rule: nodeid of 0 is reserved only for tables(see +		 * earlier code which processes TC_U32_DIVISOR attribute). +		 * Rule: The nodeid can only be derived from the handle (and not +		 * htid). +		 * Rule: if the handle specified zero for the node id example +		 * 0x60000000, then pick a new nodeid from the pool of IDs +		 * this hash table has been allocating from. +		 * If OTOH it is specified (i.e for example the user passed a +		 * handle such as 0x60000123), then we use it generate our final +		 * handle which is used to uniquely identify the match entry. +		 */ +		if (!TC_U32_NODE(handle)) { +			handle = gen_new_kid(ht, htid); +		} else { +			handle = htid | TC_U32_NODE(handle); +			err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, +					    handle, GFP_KERNEL); +			if (err) +				return err; +		} +	} else { +		/* The user did not give us a handle; lets just generate one +		 * from the table's pool of nodeids. +		 */  		handle = gen_new_kid(ht, htid); +	}  	if (tb[TCA_U32_SEL] == NULL) {  		NL_SET_ERR_MSG_MOD(extack, "Selector not specified"); diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index af85a73c4c54..da34fd4c9269 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -502,7 +502,7 @@ META_COLLECTOR(int_sk_lingertime)  		*err = -1;  		return;  	} -	dst->value = sk->sk_lingertime / HZ; +	dst->value = READ_ONCE(sk->sk_lingertime) / HZ;  }  META_COLLECTOR(int_sk_err_qlen) @@ -568,7 +568,7 @@ META_COLLECTOR(int_sk_rcvtimeo)  		*err = -1;  		return;  	} -	dst->value = sk->sk_rcvtimeo / HZ; +	dst->value = READ_ONCE(sk->sk_rcvtimeo) / HZ;  }  META_COLLECTOR(int_sk_sndtimeo) @@ -579,7 +579,7 @@ META_COLLECTOR(int_sk_sndtimeo)  		*err = -1;  		return;  	} -	dst->value = sk->sk_sndtimeo / HZ; +	dst->value = READ_ONCE(sk->sk_sndtimeo) / HZ;  }  META_COLLECTOR(int_sk_sendmsg_off) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index aa6b1fe65151..e9eaf637220e 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1547,10 +1547,28 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,  	return 0;  } +static bool req_create_or_replace(struct nlmsghdr *n) +{ +	return (n->nlmsg_flags & NLM_F_CREATE && +		n->nlmsg_flags & NLM_F_REPLACE); +} + +static bool req_create_exclusive(struct nlmsghdr *n) +{ +	return (n->nlmsg_flags & NLM_F_CREATE && +		n->nlmsg_flags & NLM_F_EXCL); +} + +static bool req_change(struct nlmsghdr *n) +{ +	return (!(n->nlmsg_flags & NLM_F_CREATE) && +		!(n->nlmsg_flags & NLM_F_REPLACE) && +		!(n->nlmsg_flags & NLM_F_EXCL)); +} +  /*   * Create/change qdisc.   */ -  static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,  			   struct netlink_ext_ack *extack)  { @@ -1644,27 +1662,35 @@ replay:  				 *  				 *   We know, that some child q is already  				 *   attached to this parent and have choice: -				 *   either to change it or to create/graft new one. +				 *   1) change it or 2) create/graft new one. +				 *   If the requested qdisc kind is different +				 *   than the existing one, then we choose graft. +				 *   If they are the same then this is "change" +				 *   operation - just let it fallthrough..  				 *  				 *   1. We are allowed to create/graft only -				 *   if CREATE and REPLACE flags are set. +				 *   if the request is explicitly stating +				 *   "please create if it doesn't exist".  				 * -				 *   2. If EXCL is set, requestor wanted to say, -				 *   that qdisc tcm_handle is not expected +				 *   2. If the request is to exclusive create +				 *   then the qdisc tcm_handle is not expected  				 *   to exist, so that we choose create/graft too.  				 *  				 *   3. The last case is when no flags are set. +				 *   This will happen when for example tc +				 *   utility issues a "change" command.  				 *   Alas, it is sort of hole in API, we  				 *   cannot decide what to do unambiguously. -				 *   For now we select create/graft, if -				 *   user gave KIND, which does not match existing. +				 *   For now we select create/graft.  				 */ -				if ((n->nlmsg_flags & NLM_F_CREATE) && -				    (n->nlmsg_flags & NLM_F_REPLACE) && -				    ((n->nlmsg_flags & NLM_F_EXCL) || -				     (tca[TCA_KIND] && -				      nla_strcmp(tca[TCA_KIND], q->ops->id)))) -					goto create_n_graft; +				if (tca[TCA_KIND] && +				    nla_strcmp(tca[TCA_KIND], q->ops->id)) { +					if (req_create_or_replace(n) || +					    req_create_exclusive(n)) +						goto create_n_graft; +					else if (req_change(n)) +						goto create_n_graft2; +				}  			}  		}  	} else { @@ -1698,6 +1724,7 @@ create_n_graft:  		NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");  		return -ENOENT;  	} +create_n_graft2:  	if (clid == TC_H_INGRESS) {  		if (dev_ingress_queue(dev)) {  			q = qdisc_create(dev, dev_ingress_queue(dev), diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index e35a4e90f4e6..19901e77cd3b 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -17,7 +17,6 @@  struct drr_class {  	struct Qdisc_class_common	common; -	unsigned int			filter_cnt;  	struct gnet_stats_basic_sync		bstats;  	struct gnet_stats_queue		qstats; @@ -150,8 +149,10 @@ static int drr_delete_class(struct Qdisc *sch, unsigned long arg,  	struct drr_sched *q = qdisc_priv(sch);  	struct drr_class *cl = (struct drr_class *)arg; -	if (cl->filter_cnt > 0) +	if (qdisc_class_in_use(&cl->common)) { +		NL_SET_ERR_MSG(extack, "DRR class is in use");  		return -EBUSY; +	}  	sch_tree_lock(sch); @@ -187,8 +188,8 @@ static unsigned long drr_bind_tcf(struct Qdisc *sch, unsigned long parent,  {  	struct drr_class *cl = drr_find_class(sch, classid); -	if (cl != NULL) -		cl->filter_cnt++; +	if (cl) +		qdisc_class_get(&cl->common);  	return (unsigned long)cl;  } @@ -197,7 +198,7 @@ static void drr_unbind_tcf(struct Qdisc *sch, unsigned long arg)  {  	struct drr_class *cl = (struct drr_class *)arg; -	cl->filter_cnt--; +	qdisc_class_put(&cl->common);  }  static int drr_graft_class(struct Qdisc *sch, unsigned long arg, diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 70b0c5873d32..3554085bc2be 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -116,7 +116,6 @@ struct hfsc_class {  	struct net_rate_estimator __rcu *rate_est;  	struct tcf_proto __rcu *filter_list; /* filter list */  	struct tcf_block *block; -	unsigned int	filter_cnt;	/* filter count */  	unsigned int	level;		/* class level in hierarchy */  	struct hfsc_sched *sched;	/* scheduler data */ @@ -1012,6 +1011,10 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,  		if (parent == NULL)  			return -ENOENT;  	} +	if (!(parent->cl_flags & HFSC_FSC) && parent != &q->root) { +		NL_SET_ERR_MSG(extack, "Invalid parent - parent class must have FSC"); +		return -EINVAL; +	}  	if (classid == 0 || TC_H_MAJ(classid ^ sch->handle) != 0)  		return -EINVAL; @@ -1094,8 +1097,11 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg,  	struct hfsc_sched *q = qdisc_priv(sch);  	struct hfsc_class *cl = (struct hfsc_class *)arg; -	if (cl->level > 0 || cl->filter_cnt > 0 || cl == &q->root) +	if (cl->level > 0 || qdisc_class_in_use(&cl->cl_common) || +	    cl == &q->root) { +		NL_SET_ERR_MSG(extack, "HFSC class in use");  		return -EBUSY; +	}  	sch_tree_lock(sch); @@ -1223,7 +1229,7 @@ hfsc_bind_tcf(struct Qdisc *sch, unsigned long parent, u32 classid)  	if (cl != NULL) {  		if (p != NULL && p->level <= cl->level)  			return 0; -		cl->filter_cnt++; +		qdisc_class_get(&cl->cl_common);  	}  	return (unsigned long)cl; @@ -1234,7 +1240,7 @@ hfsc_unbind_tcf(struct Qdisc *sch, unsigned long arg)  {  	struct hfsc_class *cl = (struct hfsc_class *)arg; -	cl->filter_cnt--; +	qdisc_class_put(&cl->cl_common);  }  static struct tcf_block *hfsc_tcf_block(struct Qdisc *sch, unsigned long arg, diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 325c29041c7d..0d947414e616 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -102,7 +102,6 @@ struct htb_class {  	struct tcf_proto __rcu	*filter_list;	/* class attached filters */  	struct tcf_block	*block; -	int			filter_cnt;  	int			level;		/* our level (see above) */  	unsigned int		children; @@ -1710,8 +1709,10 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg,  	 * tc subsys guarantee us that in htb_destroy it holds no class  	 * refs so that we can remove children safely there ?  	 */ -	if (cl->children || cl->filter_cnt) +	if (cl->children || qdisc_class_in_use(&cl->common)) { +		NL_SET_ERR_MSG(extack, "HTB class in use");  		return -EBUSY; +	}  	if (!cl->level && htb_parent_last_child(cl))  		last_child = 1; @@ -1810,10 +1811,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,  			NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter");  			goto failure;  		} -		if (hopt->quantum) { -			NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter"); -			goto failure; -		}  	}  	/* Keeping backward compatible with rate_table based iproute2 tc */ @@ -1910,6 +1907,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,  				.rate = max_t(u64, hopt->rate.rate, rate64),  				.ceil = max_t(u64, hopt->ceil.rate, ceil64),  				.prio = hopt->prio, +				.quantum = hopt->quantum,  				.extack = extack,  			};  			err = htb_offload(dev, &offload_opt); @@ -1931,6 +1929,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,  				.rate = max_t(u64, hopt->rate.rate, rate64),  				.ceil = max_t(u64, hopt->ceil.rate, ceil64),  				.prio = hopt->prio, +				.quantum = hopt->quantum,  				.extack = extack,  			};  			err = htb_offload(dev, &offload_opt); @@ -2017,6 +2016,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,  				.rate = max_t(u64, hopt->rate.rate, rate64),  				.ceil = max_t(u64, hopt->ceil.rate, ceil64),  				.prio = hopt->prio, +				.quantum = hopt->quantum,  				.extack = extack,  			};  			err = htb_offload(dev, &offload_opt); @@ -2108,7 +2108,7 @@ static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,  	 * be broken by class during destroy IIUC.  	 */  	if (cl) -		cl->filter_cnt++; +		qdisc_class_get(&cl->common);  	return (unsigned long)cl;  } @@ -2116,8 +2116,7 @@ static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)  {  	struct htb_class *cl = (struct htb_class *)arg; -	if (cl) -		cl->filter_cnt--; +	qdisc_class_put(&cl->common);  }  static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg) diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index e43a45499372..a463a63192c3 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -13,6 +13,7 @@  #include <net/netlink.h>  #include <net/pkt_sched.h>  #include <net/pkt_cls.h> +#include <net/tcx.h>  struct ingress_sched_data {  	struct tcf_block *block; @@ -78,6 +79,8 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,  {  	struct ingress_sched_data *q = qdisc_priv(sch);  	struct net_device *dev = qdisc_dev(sch); +	struct bpf_mprog_entry *entry; +	bool created;  	int err;  	if (sch->parent != TC_H_INGRESS) @@ -85,7 +88,13 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,  	net_inc_ingress_queue(); -	mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress); +	entry = tcx_entry_fetch_or_create(dev, true, &created); +	if (!entry) +		return -ENOMEM; +	tcx_miniq_set_active(entry, true); +	mini_qdisc_pair_init(&q->miniqp, sch, &tcx_entry(entry)->miniq); +	if (created) +		tcx_entry_update(dev, entry, true);  	q->block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;  	q->block_info.chain_head_change = clsact_chain_head_change; @@ -103,11 +112,22 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,  static void ingress_destroy(struct Qdisc *sch)  {  	struct ingress_sched_data *q = qdisc_priv(sch); +	struct net_device *dev = qdisc_dev(sch); +	struct bpf_mprog_entry *entry = rtnl_dereference(dev->tcx_ingress);  	if (sch->parent != TC_H_INGRESS)  		return;  	tcf_block_put_ext(q->block, sch, &q->block_info); + +	if (entry) { +		tcx_miniq_set_active(entry, false); +		if (!tcx_entry_is_active(entry)) { +			tcx_entry_update(dev, NULL, true); +			tcx_entry_free(entry); +		} +	} +  	net_dec_ingress_queue();  } @@ -223,6 +243,8 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,  {  	struct clsact_sched_data *q = qdisc_priv(sch);  	struct net_device *dev = qdisc_dev(sch); +	struct bpf_mprog_entry *entry; +	bool created;  	int err;  	if (sch->parent != TC_H_CLSACT) @@ -231,7 +253,13 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,  	net_inc_ingress_queue();  	net_inc_egress_queue(); -	mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress); +	entry = tcx_entry_fetch_or_create(dev, true, &created); +	if (!entry) +		return -ENOMEM; +	tcx_miniq_set_active(entry, true); +	mini_qdisc_pair_init(&q->miniqp_ingress, sch, &tcx_entry(entry)->miniq); +	if (created) +		tcx_entry_update(dev, entry, true);  	q->ingress_block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;  	q->ingress_block_info.chain_head_change = clsact_chain_head_change; @@ -244,7 +272,13 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,  	mini_qdisc_pair_block_init(&q->miniqp_ingress, q->ingress_block); -	mini_qdisc_pair_init(&q->miniqp_egress, sch, &dev->miniq_egress); +	entry = tcx_entry_fetch_or_create(dev, false, &created); +	if (!entry) +		return -ENOMEM; +	tcx_miniq_set_active(entry, true); +	mini_qdisc_pair_init(&q->miniqp_egress, sch, &tcx_entry(entry)->miniq); +	if (created) +		tcx_entry_update(dev, entry, false);  	q->egress_block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS;  	q->egress_block_info.chain_head_change = clsact_chain_head_change; @@ -256,12 +290,31 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,  static void clsact_destroy(struct Qdisc *sch)  {  	struct clsact_sched_data *q = qdisc_priv(sch); +	struct net_device *dev = qdisc_dev(sch); +	struct bpf_mprog_entry *ingress_entry = rtnl_dereference(dev->tcx_ingress); +	struct bpf_mprog_entry *egress_entry = rtnl_dereference(dev->tcx_egress);  	if (sch->parent != TC_H_CLSACT)  		return; -	tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info);  	tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info); +	tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info); + +	if (ingress_entry) { +		tcx_miniq_set_active(ingress_entry, false); +		if (!tcx_entry_is_active(ingress_entry)) { +			tcx_entry_update(dev, NULL, true); +			tcx_entry_free(ingress_entry); +		} +	} + +	if (egress_entry) { +		tcx_miniq_set_active(egress_entry, false); +		if (!tcx_entry_is_active(egress_entry)) { +			tcx_entry_update(dev, NULL, false); +			tcx_entry_free(egress_entry); +		} +	}  	net_dec_ingress_queue();  	net_dec_egress_queue(); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 38d9aa0cd30e..4ad39a4a3cf5 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -105,6 +105,11 @@ struct netem_sched_data {  		u32 rho;  	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor; +	struct prng  { +		u64 seed; +		struct rnd_state prng_state; +	} prng; +  	struct disttable *delay_dist;  	enum  { @@ -179,15 +184,16 @@ static void init_crandom(struct crndstate *state, unsigned long rho)   * Next number depends on last value.   * rho is scaled to avoid floating point.   */ -static u32 get_crandom(struct crndstate *state) +static u32 get_crandom(struct crndstate *state, struct prng *p)  {  	u64 value, rho;  	unsigned long answer; +	struct rnd_state *s = &p->prng_state;  	if (!state || state->rho == 0)	/* no correlation */ -		return get_random_u32(); +		return prandom_u32_state(s); -	value = get_random_u32(); +	value = prandom_u32_state(s);  	rho = (u64)state->rho + 1;  	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;  	state->last = answer; @@ -201,7 +207,7 @@ static u32 get_crandom(struct crndstate *state)  static bool loss_4state(struct netem_sched_data *q)  {  	struct clgstate *clg = &q->clg; -	u32 rnd = get_random_u32(); +	u32 rnd = prandom_u32_state(&q->prng.prng_state);  	/*  	 * Makes a comparison between rnd and the transition @@ -266,18 +272,19 @@ static bool loss_4state(struct netem_sched_data *q)  static bool loss_gilb_ell(struct netem_sched_data *q)  {  	struct clgstate *clg = &q->clg; +	struct rnd_state *s = &q->prng.prng_state;  	switch (clg->state) {  	case GOOD_STATE: -		if (get_random_u32() < clg->a1) +		if (prandom_u32_state(s) < clg->a1)  			clg->state = BAD_STATE; -		if (get_random_u32() < clg->a4) +		if (prandom_u32_state(s) < clg->a4)  			return true;  		break;  	case BAD_STATE: -		if (get_random_u32() < clg->a2) +		if (prandom_u32_state(s) < clg->a2)  			clg->state = GOOD_STATE; -		if (get_random_u32() > clg->a3) +		if (prandom_u32_state(s) > clg->a3)  			return true;  	} @@ -289,7 +296,7 @@ static bool loss_event(struct netem_sched_data *q)  	switch (q->loss_model) {  	case CLG_RANDOM:  		/* Random packet drop 0 => none, ~0 => all */ -		return q->loss && q->loss >= get_crandom(&q->loss_cor); +		return q->loss && q->loss >= get_crandom(&q->loss_cor, &q->prng);  	case CLG_4_STATES:  		/* 4state loss model algorithm (used also for GI model) @@ -318,6 +325,7 @@ static bool loss_event(struct netem_sched_data *q)   */  static s64 tabledist(s64 mu, s32 sigma,  		     struct crndstate *state, +		     struct prng *prng,  		     const struct disttable *dist)  {  	s64 x; @@ -327,7 +335,7 @@ static s64 tabledist(s64 mu, s32 sigma,  	if (sigma == 0)  		return mu; -	rnd = get_crandom(state); +	rnd = get_crandom(state, prng);  	/* default uniform distribution */  	if (dist == NULL) @@ -449,7 +457,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,  	skb->prev = NULL;  	/* Random duplication */ -	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) +	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor, &q->prng))  		++count;  	/* Drop packet? */ @@ -492,7 +500,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,  	 * If packet is going to be hardware checksummed, then  	 * do it now in software before we mangle it.  	 */ -	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { +	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor, &q->prng)) {  		if (skb_is_gso(skb)) {  			skb = netem_segment(skb, sch, to_free);  			if (!skb) @@ -530,12 +538,12 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,  	cb = netem_skb_cb(skb);  	if (q->gap == 0 ||		/* not doing reordering */  	    q->counter < q->gap - 1 ||	/* inside last reordering gap */ -	    q->reorder < get_crandom(&q->reorder_cor)) { +	    q->reorder < get_crandom(&q->reorder_cor, &q->prng)) {  		u64 now;  		s64 delay;  		delay = tabledist(q->latency, q->jitter, -				  &q->delay_cor, q->delay_dist); +				  &q->delay_cor, &q->prng, q->delay_dist);  		now = ktime_get_ns(); @@ -639,7 +647,7 @@ static void get_slot_next(struct netem_sched_data *q, u64 now)  	else  		next_delay = tabledist(q->slot_config.dist_delay,  				       (s32)(q->slot_config.dist_jitter), -				       NULL, q->slot_dist); +				       NULL, &q->prng, q->slot_dist);  	q->slot.slot_next = now + next_delay;  	q->slot.packets_left = q->slot_config.max_packets; @@ -922,6 +930,7 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {  	[TCA_NETEM_LATENCY64]	= { .type = NLA_S64 },  	[TCA_NETEM_JITTER64]	= { .type = NLA_S64 },  	[TCA_NETEM_SLOT]	= { .len = sizeof(struct tc_netem_slot) }, +	[TCA_NETEM_PRNG_SEED]	= { .type = NLA_U64 },  };  static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, @@ -1040,6 +1049,12 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,  	/* capping jitter to the range acceptable by tabledist() */  	q->jitter = min_t(s64, abs(q->jitter), INT_MAX); +	if (tb[TCA_NETEM_PRNG_SEED]) +		q->prng.seed = nla_get_u64(tb[TCA_NETEM_PRNG_SEED]); +	else +		q->prng.seed = get_random_u64(); +	prandom_seed_state(&q->prng.prng_state, q->prng.seed); +  unlock:  	sch_tree_unlock(sch); @@ -1203,6 +1218,10 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)  			goto nla_put_failure;  	} +	if (nla_put_u64_64bit(skb, TCA_NETEM_PRNG_SEED, q->prng.seed, +			      TCA_NETEM_PAD)) +		goto nla_put_failure; +  	return nla_nest_end(skb, nla);  nla_put_failure: diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index befaf74b33ca..1a25752f1a9a 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -130,8 +130,6 @@ struct qfq_aggregate;  struct qfq_class {  	struct Qdisc_class_common common; -	unsigned int filter_cnt; -  	struct gnet_stats_basic_sync bstats;  	struct gnet_stats_queue qstats;  	struct net_rate_estimator __rcu *rate_est; @@ -545,8 +543,10 @@ static int qfq_delete_class(struct Qdisc *sch, unsigned long arg,  	struct qfq_sched *q = qdisc_priv(sch);  	struct qfq_class *cl = (struct qfq_class *)arg; -	if (cl->filter_cnt > 0) +	if (qdisc_class_in_use(&cl->common)) { +		NL_SET_ERR_MSG_MOD(extack, "QFQ class in use");  		return -EBUSY; +	}  	sch_tree_lock(sch); @@ -580,8 +580,8 @@ static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent,  {  	struct qfq_class *cl = qfq_find_class(sch, classid); -	if (cl != NULL) -		cl->filter_cnt++; +	if (cl) +		qdisc_class_get(&cl->common);  	return (unsigned long)cl;  } @@ -590,7 +590,7 @@ static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg)  {  	struct qfq_class *cl = (struct qfq_class *)arg; -	cl->filter_cnt--; +	qdisc_class_put(&cl->common);  }  static int qfq_graft_class(struct Qdisc *sch, unsigned long arg, diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 717ae51d94a0..1cb5e41c0ec7 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1015,6 +1015,11 @@ static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = {  							      TC_FP_PREEMPTIBLE),  }; +static struct netlink_range_validation_signed taprio_cycle_time_range = { +	.min = 0, +	.max = INT_MAX, +}; +  static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {  	[TCA_TAPRIO_ATTR_PRIOMAP]	       = {  		.len = sizeof(struct tc_mqprio_qopt) @@ -1023,7 +1028,8 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {  	[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]            = { .type = NLA_S64 },  	[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]         = { .type = NLA_NESTED },  	[TCA_TAPRIO_ATTR_SCHED_CLOCKID]              = { .type = NLA_S32 }, -	[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]           = { .type = NLA_S64 }, +	[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]           = +		NLA_POLICY_FULL_RANGE_SIGNED(NLA_S64, &taprio_cycle_time_range),  	[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },  	[TCA_TAPRIO_ATTR_FLAGS]                      = { .type = NLA_U32 },  	[TCA_TAPRIO_ATTR_TXTIME_DELAY]		     = { .type = NLA_U32 }, @@ -1159,6 +1165,11 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,  			return -EINVAL;  		} +		if (cycle < 0 || cycle > INT_MAX) { +			NL_SET_ERR_MSG(extack, "'cycle_time' is too big"); +			return -EINVAL; +		} +  		new->cycle_time = cycle;  	} @@ -1347,7 +1358,7 @@ static void setup_txtime(struct taprio_sched *q,  			 struct sched_gate_list *sched, ktime_t base)  {  	struct sched_entry *entry; -	u32 interval = 0; +	u64 interval = 0;  	list_for_each_entry(entry, &sched->entries, list) {  		entry->next_txtime = ktime_add_ns(base, interval); @@ -2088,11 +2099,8 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,  		return -EOPNOTSUPP;  	} -	/* pre-allocate qdisc, attachment can't fail */ -	q->qdiscs = kcalloc(dev->num_tx_queues, -			    sizeof(q->qdiscs[0]), +	q->qdiscs = kcalloc(dev->num_tx_queues, sizeof(q->qdiscs[0]),  			    GFP_KERNEL); -  	if (!q->qdiscs)  		return -ENOMEM; @@ -2134,25 +2142,32 @@ static void taprio_attach(struct Qdisc *sch)  	/* Attach underlying qdisc */  	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { -		struct Qdisc *qdisc = q->qdiscs[ntx]; -		struct Qdisc *old; +		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); +		struct Qdisc *old, *dev_queue_qdisc;  		if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { +			struct Qdisc *qdisc = q->qdiscs[ntx]; + +			/* In offload mode, the root taprio qdisc is bypassed +			 * and the netdev TX queues see the children directly +			 */  			qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; -			old = dev_graft_qdisc(qdisc->dev_queue, qdisc); +			dev_queue_qdisc = qdisc;  		} else { -			old = dev_graft_qdisc(qdisc->dev_queue, sch); -			qdisc_refcount_inc(sch); +			/* In software mode, attach the root taprio qdisc +			 * to all netdev TX queues, so that dev_qdisc_enqueue() +			 * goes through taprio_enqueue(). +			 */ +			dev_queue_qdisc = sch;  		} +		old = dev_graft_qdisc(dev_queue, dev_queue_qdisc); +		/* The qdisc's refcount requires to be elevated once +		 * for each netdev TX queue it is grafted onto +		 */ +		qdisc_refcount_inc(dev_queue_qdisc);  		if (old)  			qdisc_put(old);  	} - -	/* access to the child qdiscs is not needed in offload mode */ -	if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { -		kfree(q->qdiscs); -		q->qdiscs = NULL; -	}  }  static struct netdev_queue *taprio_queue_get(struct Qdisc *sch, @@ -2181,13 +2196,23 @@ static int taprio_graft(struct Qdisc *sch, unsigned long cl,  	if (dev->flags & IFF_UP)  		dev_deactivate(dev); +	/* In offload mode, the child Qdisc is directly attached to the netdev +	 * TX queue, and thus, we need to keep its refcount elevated in order +	 * to counteract qdisc_graft()'s call to qdisc_put() once per TX queue. +	 * However, save the reference to the new qdisc in the private array in +	 * both software and offload cases, to have an up-to-date reference to +	 * our children. +	 */ +	*old = q->qdiscs[cl - 1];  	if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { -		*old = dev_graft_qdisc(dev_queue, new); -	} else { -		*old = q->qdiscs[cl - 1]; -		q->qdiscs[cl - 1] = new; +		WARN_ON_ONCE(dev_graft_qdisc(dev_queue, new) != *old); +		if (new) +			qdisc_refcount_inc(new); +		if (*old) +			qdisc_put(*old);  	} +	q->qdiscs[cl - 1] = new;  	if (new)  		new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; @@ -2425,12 +2450,14 @@ start_error:  static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl)  { -	struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); +	struct taprio_sched *q = qdisc_priv(sch); +	struct net_device *dev = qdisc_dev(sch); +	unsigned int ntx = cl - 1; -	if (!dev_queue) +	if (ntx >= dev->num_tx_queues)  		return NULL; -	return rtnl_dereference(dev_queue->qdisc_sleeping); +	return q->qdiscs[ntx];  }  static unsigned long taprio_find(struct Qdisc *sch, u32 classid) @@ -2445,11 +2472,11 @@ static unsigned long taprio_find(struct Qdisc *sch, u32 classid)  static int taprio_dump_class(struct Qdisc *sch, unsigned long cl,  			     struct sk_buff *skb, struct tcmsg *tcm)  { -	struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); +	struct Qdisc *child = taprio_leaf(sch, cl);  	tcm->tcm_parent = TC_H_ROOT;  	tcm->tcm_handle |= TC_H_MIN(cl); -	tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle; +	tcm->tcm_info = child->handle;  	return 0;  } @@ -2459,16 +2486,14 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,  	__releases(d->lock)  	__acquires(d->lock)  { -	struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); +	struct Qdisc *child = taprio_leaf(sch, cl);  	struct tc_taprio_qopt_offload offload = {  		.cmd = TAPRIO_CMD_QUEUE_STATS,  		.queue_stats = {  			.queue = cl - 1,  		},  	}; -	struct Qdisc *child; -	child = rtnl_dereference(dev_queue->qdisc_sleeping);  	if (gnet_stats_copy_basic(d, NULL, &child->bstats, true) < 0 ||  	    qdisc_qstats_copy(d, child) < 0)  		return -1;  | 
