diff options
Diffstat (limited to 'net/core')
| -rw-r--r-- | net/core/datagram.c | 13 | ||||
| -rw-r--r-- | net/core/dev.c | 21 | ||||
| -rw-r--r-- | net/core/dev_ioctl.c | 11 | ||||
| -rw-r--r-- | net/core/fib_rules.c | 80 | ||||
| -rw-r--r-- | net/core/filter.c | 268 | ||||
| -rw-r--r-- | net/core/gen_stats.c | 16 | ||||
| -rw-r--r-- | net/core/lwt_bpf.c | 2 | ||||
| -rw-r--r-- | net/core/page_pool.c | 2 | ||||
| -rw-r--r-- | net/core/rtnetlink.c | 9 | ||||
| -rw-r--r-- | net/core/skbuff.c | 14 | ||||
| -rw-r--r-- | net/core/sock.c | 13 | ||||
| -rw-r--r-- | net/core/xdp.c | 3 | 
12 files changed, 327 insertions, 125 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c index f19bf3dc2bd6..9938952c5c78 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -819,8 +819,9 @@ EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);  /**   * 	datagram_poll - generic datagram poll + *	@file: file struct   *	@sock: socket - *	@events to wait for + *	@wait: poll table   *   *	Datagram poll: Again totally generic. This also handles   *	sequenced packet sockets providing the socket receive queue @@ -830,10 +831,14 @@ EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);   *	and you use a different write policy from sock_writeable()   *	then please supply your own write_space callback.   */ -__poll_t datagram_poll_mask(struct socket *sock, __poll_t events) +__poll_t datagram_poll(struct file *file, struct socket *sock, +			   poll_table *wait)  {  	struct sock *sk = sock->sk; -	__poll_t mask = 0; +	__poll_t mask; + +	sock_poll_wait(file, sk_sleep(sk), wait); +	mask = 0;  	/* exceptional events? */  	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) @@ -866,4 +871,4 @@ __poll_t datagram_poll_mask(struct socket *sock, __poll_t events)  	return mask;  } -EXPORT_SYMBOL(datagram_poll_mask); +EXPORT_SYMBOL(datagram_poll); diff --git a/net/core/dev.c b/net/core/dev.c index 57b7bab5f70b..559a91271f82 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7149,16 +7149,19 @@ int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len)  		dev->tx_queue_len = new_len;  		res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev);  		res = notifier_to_errno(res); -		if (res) { -			netdev_err(dev, -				   "refused to change device tx_queue_len\n"); -			dev->tx_queue_len = orig_len; -			return res; -		} -		return dev_qdisc_change_tx_queue_len(dev); +		if (res) +			goto err_rollback; +		res = dev_qdisc_change_tx_queue_len(dev); +		if (res) +			goto err_rollback;  	}  	return 0; + +err_rollback: +	netdev_err(dev, "refused to change device tx_queue_len\n"); +	dev->tx_queue_len = orig_len; +	return res;  }  /** @@ -8643,7 +8646,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char  		/* We get here if we can't use the current device name */  		if (!pat)  			goto out; -		if (dev_get_valid_name(net, dev, pat) < 0) +		err = dev_get_valid_name(net, dev, pat); +		if (err < 0)  			goto out;  	} @@ -8655,7 +8659,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char  	dev_close(dev);  	/* And unlink it from device chain */ -	err = -ENODEV;  	unlist_netdevice(dev);  	synchronize_net(); diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index a04e1e88bf3a..50537ff961a7 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -285,16 +285,9 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)  		if (ifr->ifr_qlen < 0)  			return -EINVAL;  		if (dev->tx_queue_len ^ ifr->ifr_qlen) { -			unsigned int orig_len = dev->tx_queue_len; - -			dev->tx_queue_len = ifr->ifr_qlen; -			err = call_netdevice_notifiers( -					NETDEV_CHANGE_TX_QUEUE_LEN, dev); -			err = notifier_to_errno(err); -			if (err) { -				dev->tx_queue_len = orig_len; +			err = dev_change_tx_queue_len(dev, ifr->ifr_qlen); +			if (err)  				return err; -			}  		}  		return 0; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 126ffc5bc630..f64aa13811ea 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -416,6 +416,14 @@ static struct fib_rule *rule_find(struct fib_rules_ops *ops,  		if (rule->mark && r->mark != rule->mark)  			continue; +		if (rule->suppress_ifgroup != -1 && +		    r->suppress_ifgroup != rule->suppress_ifgroup) +			continue; + +		if (rule->suppress_prefixlen != -1 && +		    r->suppress_prefixlen != rule->suppress_prefixlen) +			continue; +  		if (rule->mark_mask && r->mark_mask != rule->mark_mask)  			continue; @@ -436,6 +444,9 @@ static struct fib_rule *rule_find(struct fib_rules_ops *ops,  		if (rule->ip_proto && r->ip_proto != rule->ip_proto)  			continue; +		if (rule->proto && r->proto != rule->proto) +			continue; +  		if (fib_rule_port_range_set(&rule->sport_range) &&  		    !fib_rule_port_range_compare(&r->sport_range,  						 &rule->sport_range)) @@ -645,6 +656,73 @@ errout:  	return err;  } +static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh, +		       struct nlattr **tb, struct fib_rule *rule) +{ +	struct fib_rule *r; + +	list_for_each_entry(r, &ops->rules_list, list) { +		if (r->action != rule->action) +			continue; + +		if (r->table != rule->table) +			continue; + +		if (r->pref != rule->pref) +			continue; + +		if (memcmp(r->iifname, rule->iifname, IFNAMSIZ)) +			continue; + +		if (memcmp(r->oifname, rule->oifname, IFNAMSIZ)) +			continue; + +		if (r->mark != rule->mark) +			continue; + +		if (r->suppress_ifgroup != rule->suppress_ifgroup) +			continue; + +		if (r->suppress_prefixlen != rule->suppress_prefixlen) +			continue; + +		if (r->mark_mask != rule->mark_mask) +			continue; + +		if (r->tun_id != rule->tun_id) +			continue; + +		if (r->fr_net != rule->fr_net) +			continue; + +		if (r->l3mdev != rule->l3mdev) +			continue; + +		if (!uid_eq(r->uid_range.start, rule->uid_range.start) || +		    !uid_eq(r->uid_range.end, rule->uid_range.end)) +			continue; + +		if (r->ip_proto != rule->ip_proto) +			continue; + +		if (r->proto != rule->proto) +			continue; + +		if (!fib_rule_port_range_compare(&r->sport_range, +						 &rule->sport_range)) +			continue; + +		if (!fib_rule_port_range_compare(&r->dport_range, +						 &rule->dport_range)) +			continue; + +		if (!ops->compare(r, frh, tb)) +			continue; +		return 1; +	} +	return 0; +} +  int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,  		   struct netlink_ext_ack *extack)  { @@ -679,7 +757,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,  		goto errout;  	if ((nlh->nlmsg_flags & NLM_F_EXCL) && -	    rule_find(ops, frh, tb, rule, user_priority)) { +	    rule_exists(ops, frh, tb, rule)) {  		err = -EEXIST;  		goto errout_free;  	} diff --git a/net/core/filter.c b/net/core/filter.c index 3d9ba7e5965a..9dfd145eedcc 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -459,11 +459,21 @@ static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp)  	     (!unaligned_ok && offset >= 0 &&  	      offset + ip_align >= 0 &&  	      offset + ip_align % size == 0))) { +		bool ldx_off_ok = offset <= S16_MAX; +  		*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H);  		*insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset); -		*insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP, size, 2 + endian); -		*insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A, BPF_REG_D, -				      offset); +		*insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP, +				      size, 2 + endian + (!ldx_off_ok * 2)); +		if (ldx_off_ok) { +			*insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A, +					      BPF_REG_D, offset); +		} else { +			*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_D); +			*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_TMP, offset); +			*insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A, +					      BPF_REG_TMP, 0); +		}  		if (endian)  			*insn++ = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, size * 8);  		*insn++ = BPF_JMP_A(8); @@ -1702,24 +1712,26 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = {  BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,  	   u32, offset, void *, to, u32, len, u32, start_header)  { +	u8 *end = skb_tail_pointer(skb); +	u8 *net = skb_network_header(skb); +	u8 *mac = skb_mac_header(skb);  	u8 *ptr; -	if (unlikely(offset > 0xffff || len > skb_headlen(skb))) +	if (unlikely(offset > 0xffff || len > (end - mac)))  		goto err_clear;  	switch (start_header) {  	case BPF_HDR_START_MAC: -		ptr = skb_mac_header(skb) + offset; +		ptr = mac + offset;  		break;  	case BPF_HDR_START_NET: -		ptr = skb_network_header(skb) + offset; +		ptr = net + offset;  		break;  	default:  		goto err_clear;  	} -	if (likely(ptr >= skb_mac_header(skb) && -		   ptr + len <= skb_tail_pointer(skb))) { +	if (likely(ptr >= mac && ptr + len <= end)) {  		memcpy(to, ptr, len);  		return 0;  	} @@ -1762,6 +1774,37 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = {  	.arg2_type	= ARG_ANYTHING,  }; +static inline int sk_skb_try_make_writable(struct sk_buff *skb, +					   unsigned int write_len) +{ +	int err = __bpf_try_make_writable(skb, write_len); + +	bpf_compute_data_end_sk_skb(skb); +	return err; +} + +BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len) +{ +	/* Idea is the following: should the needed direct read/write +	 * test fail during runtime, we can pull in more data and redo +	 * again, since implicitly, we invalidate previous checks here. +	 * +	 * Or, since we know how much we need to make read/writeable, +	 * this can be done once at the program beginning for direct +	 * access case. By this we overcome limitations of only current +	 * headroom being accessible. +	 */ +	return sk_skb_try_make_writable(skb, len ? : skb_headlen(skb)); +} + +static const struct bpf_func_proto sk_skb_pull_data_proto = { +	.func		= sk_skb_pull_data, +	.gpl_only	= false, +	.ret_type	= RET_INTEGER, +	.arg1_type	= ARG_PTR_TO_CTX, +	.arg2_type	= ARG_ANYTHING, +}; +  BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,  	   u64, from, u64, to, u64, flags)  { @@ -2779,7 +2822,8 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)  static u32 __bpf_skb_max_len(const struct sk_buff *skb)  { -	return skb->dev->mtu + skb->dev->hard_header_len; +	return skb->dev ? skb->dev->mtu + skb->dev->hard_header_len : +			  SKB_MAX_ALLOC;  }  static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff) @@ -2863,8 +2907,8 @@ static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)  	return __skb_trim_rcsum(skb, new_len);  } -BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len, -	   u64, flags) +static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len, +					u64 flags)  {  	u32 max_len = __bpf_skb_max_len(skb);  	u32 min_len = __bpf_skb_min_len(skb); @@ -2900,6 +2944,13 @@ BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,  		if (!ret && skb_is_gso(skb))  			skb_gso_reset(skb);  	} +	return ret; +} + +BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len, +	   u64, flags) +{ +	int ret = __bpf_skb_change_tail(skb, new_len, flags);  	bpf_compute_data_pointers(skb);  	return ret; @@ -2914,9 +2965,27 @@ static const struct bpf_func_proto bpf_skb_change_tail_proto = {  	.arg3_type	= ARG_ANYTHING,  }; -BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room, +BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,  	   u64, flags)  { +	int ret = __bpf_skb_change_tail(skb, new_len, flags); + +	bpf_compute_data_end_sk_skb(skb); +	return ret; +} + +static const struct bpf_func_proto sk_skb_change_tail_proto = { +	.func		= sk_skb_change_tail, +	.gpl_only	= false, +	.ret_type	= RET_INTEGER, +	.arg1_type	= ARG_PTR_TO_CTX, +	.arg2_type	= ARG_ANYTHING, +	.arg3_type	= ARG_ANYTHING, +}; + +static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room, +					u64 flags) +{  	u32 max_len = __bpf_skb_max_len(skb);  	u32 new_len = skb->len + head_room;  	int ret; @@ -2941,8 +3010,16 @@ BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,  		skb_reset_mac_header(skb);  	} +	return ret; +} + +BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room, +	   u64, flags) +{ +	int ret = __bpf_skb_change_head(skb, head_room, flags); +  	bpf_compute_data_pointers(skb); -	return 0; +	return ret;  }  static const struct bpf_func_proto bpf_skb_change_head_proto = { @@ -2954,6 +3031,23 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = {  	.arg3_type	= ARG_ANYTHING,  }; +BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room, +	   u64, flags) +{ +	int ret = __bpf_skb_change_head(skb, head_room, flags); + +	bpf_compute_data_end_sk_skb(skb); +	return ret; +} + +static const struct bpf_func_proto sk_skb_change_head_proto = { +	.func		= sk_skb_change_head, +	.gpl_only	= false, +	.ret_type	= RET_INTEGER, +	.arg1_type	= ARG_PTR_TO_CTX, +	.arg2_type	= ARG_ANYTHING, +	.arg3_type	= ARG_ANYTHING, +};  static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)  {  	return xdp_data_meta_unsupported(xdp) ? 0 : @@ -3046,12 +3140,16 @@ static int __bpf_tx_xdp(struct net_device *dev,  			u32 index)  {  	struct xdp_frame *xdpf; -	int sent; +	int err, sent;  	if (!dev->netdev_ops->ndo_xdp_xmit) {  		return -EOPNOTSUPP;  	} +	err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data); +	if (unlikely(err)) +		return err; +  	xdpf = convert_to_xdp_frame(xdp);  	if (unlikely(!xdpf))  		return -EOVERFLOW; @@ -3214,20 +3312,6 @@ err:  }  EXPORT_SYMBOL_GPL(xdp_do_redirect); -static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd) -{ -	unsigned int len; - -	if (unlikely(!(fwd->flags & IFF_UP))) -		return -ENETDOWN; - -	len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN; -	if (skb->len > len) -		return -EMSGSIZE; - -	return 0; -} -  static int xdp_do_generic_redirect_map(struct net_device *dev,  				       struct sk_buff *skb,  				       struct xdp_buff *xdp, @@ -3256,10 +3340,11 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,  	}  	if (map->map_type == BPF_MAP_TYPE_DEVMAP) { -		if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd)))) +		struct bpf_dtab_netdev *dst = fwd; + +		err = dev_map_generic_redirect(dst, skb, xdp_prog); +		if (unlikely(err))  			goto err; -		skb->dev = fwd; -		generic_xdp_tx(skb, xdp_prog);  	} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {  		struct xdp_sock *xs = fwd; @@ -3298,7 +3383,8 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,  		goto err;  	} -	if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd)))) +	err = xdp_ok_fwd_dev(fwd, skb->len); +	if (unlikely(err))  		goto err;  	skb->dev = fwd; @@ -4086,8 +4172,9 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,  	memcpy(params->smac, dev->dev_addr, ETH_ALEN);  	params->h_vlan_TCI = 0;  	params->h_vlan_proto = 0; +	params->ifindex = dev->ifindex; -	return dev->ifindex; +	return 0;  }  #endif @@ -4111,7 +4198,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,  	/* verify forwarding is enabled on this interface */  	in_dev = __in_dev_get_rcu(dev);  	if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev))) -		return 0; +		return BPF_FIB_LKUP_RET_FWD_DISABLED;  	if (flags & BPF_FIB_LOOKUP_OUTPUT) {  		fl4.flowi4_iif = 1; @@ -4136,7 +4223,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,  		tb = fib_get_table(net, tbid);  		if (unlikely(!tb)) -			return 0; +			return BPF_FIB_LKUP_RET_NOT_FWDED;  		err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);  	} else { @@ -4148,8 +4235,20 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,  		err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF);  	} -	if (err || res.type != RTN_UNICAST) -		return 0; +	if (err) { +		/* map fib lookup errors to RTN_ type */ +		if (err == -EINVAL) +			return BPF_FIB_LKUP_RET_BLACKHOLE; +		if (err == -EHOSTUNREACH) +			return BPF_FIB_LKUP_RET_UNREACHABLE; +		if (err == -EACCES) +			return BPF_FIB_LKUP_RET_PROHIBIT; + +		return BPF_FIB_LKUP_RET_NOT_FWDED; +	} + +	if (res.type != RTN_UNICAST) +		return BPF_FIB_LKUP_RET_NOT_FWDED;  	if (res.fi->fib_nhs > 1)  		fib_select_path(net, &res, &fl4, NULL); @@ -4157,19 +4256,16 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,  	if (check_mtu) {  		mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);  		if (params->tot_len > mtu) -			return 0; +			return BPF_FIB_LKUP_RET_FRAG_NEEDED;  	}  	nh = &res.fi->fib_nh[res.nh_sel];  	/* do not handle lwt encaps right now */  	if (nh->nh_lwtstate) -		return 0; +		return BPF_FIB_LKUP_RET_UNSUPP_LWT;  	dev = nh->nh_dev; -	if (unlikely(!dev)) -		return 0; -  	if (nh->nh_gw)  		params->ipv4_dst = nh->nh_gw; @@ -4179,10 +4275,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,  	 * rcu_read_lock_bh is not needed here  	 */  	neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst); -	if (neigh) -		return bpf_fib_set_fwd_params(params, neigh, dev); +	if (!neigh) +		return BPF_FIB_LKUP_RET_NO_NEIGH; -	return 0; +	return bpf_fib_set_fwd_params(params, neigh, dev);  }  #endif @@ -4203,7 +4299,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,  	/* link local addresses are never forwarded */  	if (rt6_need_strict(dst) || rt6_need_strict(src)) -		return 0; +		return BPF_FIB_LKUP_RET_NOT_FWDED;  	dev = dev_get_by_index_rcu(net, params->ifindex);  	if (unlikely(!dev)) @@ -4211,7 +4307,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,  	idev = __in6_dev_get_safely(dev);  	if (unlikely(!idev || !net->ipv6.devconf_all->forwarding)) -		return 0; +		return BPF_FIB_LKUP_RET_FWD_DISABLED;  	if (flags & BPF_FIB_LOOKUP_OUTPUT) {  		fl6.flowi6_iif = 1; @@ -4238,7 +4334,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,  		tb = ipv6_stub->fib6_get_table(net, tbid);  		if (unlikely(!tb)) -			return 0; +			return BPF_FIB_LKUP_RET_NOT_FWDED;  		f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict);  	} else { @@ -4251,11 +4347,23 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,  	}  	if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry)) -		return 0; +		return BPF_FIB_LKUP_RET_NOT_FWDED; -	if (unlikely(f6i->fib6_flags & RTF_REJECT || -	    f6i->fib6_type != RTN_UNICAST)) -		return 0; +	if (unlikely(f6i->fib6_flags & RTF_REJECT)) { +		switch (f6i->fib6_type) { +		case RTN_BLACKHOLE: +			return BPF_FIB_LKUP_RET_BLACKHOLE; +		case RTN_UNREACHABLE: +			return BPF_FIB_LKUP_RET_UNREACHABLE; +		case RTN_PROHIBIT: +			return BPF_FIB_LKUP_RET_PROHIBIT; +		default: +			return BPF_FIB_LKUP_RET_NOT_FWDED; +		} +	} + +	if (f6i->fib6_type != RTN_UNICAST) +		return BPF_FIB_LKUP_RET_NOT_FWDED;  	if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0)  		f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6, @@ -4265,11 +4373,11 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,  	if (check_mtu) {  		mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src);  		if (params->tot_len > mtu) -			return 0; +			return BPF_FIB_LKUP_RET_FRAG_NEEDED;  	}  	if (f6i->fib6_nh.nh_lwtstate) -		return 0; +		return BPF_FIB_LKUP_RET_UNSUPP_LWT;  	if (f6i->fib6_flags & RTF_GATEWAY)  		*dst = f6i->fib6_nh.nh_gw; @@ -4283,10 +4391,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,  	 */  	neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128,  				      ndisc_hashfn, dst, dev); -	if (neigh) -		return bpf_fib_set_fwd_params(params, neigh, dev); +	if (!neigh) +		return BPF_FIB_LKUP_RET_NO_NEIGH; -	return 0; +	return bpf_fib_set_fwd_params(params, neigh, dev);  }  #endif @@ -4328,7 +4436,7 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,  	   struct bpf_fib_lookup *, params, int, plen, u32, flags)  {  	struct net *net = dev_net(skb->dev); -	int index = -EAFNOSUPPORT; +	int rc = -EAFNOSUPPORT;  	if (plen < sizeof(*params))  		return -EINVAL; @@ -4339,25 +4447,25 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,  	switch (params->family) {  #if IS_ENABLED(CONFIG_INET)  	case AF_INET: -		index = bpf_ipv4_fib_lookup(net, params, flags, false); +		rc = bpf_ipv4_fib_lookup(net, params, flags, false);  		break;  #endif  #if IS_ENABLED(CONFIG_IPV6)  	case AF_INET6: -		index = bpf_ipv6_fib_lookup(net, params, flags, false); +		rc = bpf_ipv6_fib_lookup(net, params, flags, false);  		break;  #endif  	} -	if (index > 0) { +	if (!rc) {  		struct net_device *dev; -		dev = dev_get_by_index_rcu(net, index); +		dev = dev_get_by_index_rcu(net, params->ifindex);  		if (!is_skb_forwardable(dev, skb)) -			index = 0; +			rc = BPF_FIB_LKUP_RET_FRAG_NEEDED;  	} -	return index; +	return rc;  }  static const struct bpf_func_proto bpf_skb_fib_lookup_proto = { @@ -4430,10 +4538,10 @@ static const struct bpf_func_proto bpf_lwt_push_encap_proto = {  	.arg4_type	= ARG_CONST_SIZE  }; +#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)  BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,  	   const void *, from, u32, len)  { -#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)  	struct seg6_bpf_srh_state *srh_state =  		this_cpu_ptr(&seg6_bpf_srh_states);  	void *srh_tlvs, *srh_end, *ptr; @@ -4459,9 +4567,6 @@ BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,  	memcpy(skb->data + offset, from, len);  	return 0; -#else /* CONFIG_IPV6_SEG6_BPF */ -	return -EOPNOTSUPP; -#endif  }  static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = { @@ -4477,7 +4582,6 @@ static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {  BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,  	   u32, action, void *, param, u32, param_len)  { -#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)  	struct seg6_bpf_srh_state *srh_state =  		this_cpu_ptr(&seg6_bpf_srh_states);  	struct ipv6_sr_hdr *srh; @@ -4525,9 +4629,6 @@ BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,  	default:  		return -EINVAL;  	} -#else /* CONFIG_IPV6_SEG6_BPF */ -	return -EOPNOTSUPP; -#endif  }  static const struct bpf_func_proto bpf_lwt_seg6_action_proto = { @@ -4543,7 +4644,6 @@ static const struct bpf_func_proto bpf_lwt_seg6_action_proto = {  BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,  	   s32, len)  { -#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)  	struct seg6_bpf_srh_state *srh_state =  		this_cpu_ptr(&seg6_bpf_srh_states);  	void *srh_end, *srh_tlvs, *ptr; @@ -4587,9 +4687,6 @@ BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,  	srh_state->hdrlen += len;  	srh_state->valid = 0;  	return 0; -#else /* CONFIG_IPV6_SEG6_BPF */ -	return -EOPNOTSUPP; -#endif  }  static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = { @@ -4600,6 +4697,7 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {  	.arg2_type	= ARG_ANYTHING,  	.arg3_type	= ARG_ANYTHING,  }; +#endif /* CONFIG_IPV6_SEG6_BPF */  bool bpf_helper_changes_pkt_data(void *func)  { @@ -4608,9 +4706,12 @@ bool bpf_helper_changes_pkt_data(void *func)  	    func == bpf_skb_store_bytes ||  	    func == bpf_skb_change_proto ||  	    func == bpf_skb_change_head || +	    func == sk_skb_change_head ||  	    func == bpf_skb_change_tail || +	    func == sk_skb_change_tail ||  	    func == bpf_skb_adjust_room ||  	    func == bpf_skb_pull_data || +	    func == sk_skb_pull_data ||  	    func == bpf_clone_redirect ||  	    func == bpf_l3_csum_replace ||  	    func == bpf_l4_csum_replace || @@ -4618,11 +4719,12 @@ bool bpf_helper_changes_pkt_data(void *func)  	    func == bpf_xdp_adjust_meta ||  	    func == bpf_msg_pull_data ||  	    func == bpf_xdp_adjust_tail || -	    func == bpf_lwt_push_encap || +#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)  	    func == bpf_lwt_seg6_store_bytes ||  	    func == bpf_lwt_seg6_adjust_srh || -	    func == bpf_lwt_seg6_action -	    ) +	    func == bpf_lwt_seg6_action || +#endif +	    func == bpf_lwt_push_encap)  		return true;  	return false; @@ -4862,11 +4964,11 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)  	case BPF_FUNC_skb_load_bytes:  		return &bpf_skb_load_bytes_proto;  	case BPF_FUNC_skb_pull_data: -		return &bpf_skb_pull_data_proto; +		return &sk_skb_pull_data_proto;  	case BPF_FUNC_skb_change_tail: -		return &bpf_skb_change_tail_proto; +		return &sk_skb_change_tail_proto;  	case BPF_FUNC_skb_change_head: -		return &bpf_skb_change_head_proto; +		return &sk_skb_change_head_proto;  	case BPF_FUNC_get_socket_cookie:  		return &bpf_get_socket_cookie_proto;  	case BPF_FUNC_get_socket_uid: @@ -4957,12 +5059,14 @@ static const struct bpf_func_proto *  lwt_seg6local_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)  {  	switch (func_id) { +#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)  	case BPF_FUNC_lwt_seg6_store_bytes:  		return &bpf_lwt_seg6_store_bytes_proto;  	case BPF_FUNC_lwt_seg6_action:  		return &bpf_lwt_seg6_action_proto;  	case BPF_FUNC_lwt_seg6_adjust_srh:  		return &bpf_lwt_seg6_adjust_srh_proto; +#endif  	default:  		return lwt_out_func_proto(func_id, prog);  	} diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index b2b2323bdc84..188d693cb251 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -77,8 +77,20 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,  		d->lock = lock;  		spin_lock_bh(lock);  	} -	if (d->tail) -		return gnet_stats_copy(d, type, NULL, 0, padattr); +	if (d->tail) { +		int ret = gnet_stats_copy(d, type, NULL, 0, padattr); + +		/* The initial attribute added in gnet_stats_copy() may be +		 * preceded by a padding attribute, in which case d->tail will +		 * end up pointing at the padding instead of the real attribute. +		 * Fix this so gnet_stats_finish_copy() adjusts the length of +		 * the right attribute. +		 */ +		if (ret == 0 && d->tail->nla_type == padattr) +			d->tail = (struct nlattr *)((char *)d->tail + +						    NLA_ALIGN(d->tail->nla_len)); +		return ret; +	}  	return 0;  } diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index e7e626fb87bb..e45098593dc0 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -217,7 +217,7 @@ static int bpf_parse_prog(struct nlattr *attr, struct bpf_lwt_prog *prog,  	if (!tb[LWT_BPF_PROG_FD] || !tb[LWT_BPF_PROG_NAME])  		return -EINVAL; -	prog->name = nla_memdup(tb[LWT_BPF_PROG_NAME], GFP_KERNEL); +	prog->name = nla_memdup(tb[LWT_BPF_PROG_NAME], GFP_ATOMIC);  	if (!prog->name)  		return -ENOMEM; diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 68bf07206744..43a932cb609b 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -269,7 +269,7 @@ static void __page_pool_empty_ring(struct page_pool *pool)  	struct page *page;  	/* Empty recycle ring */ -	while ((page = ptr_ring_consume(&pool->ring))) { +	while ((page = ptr_ring_consume_bh(&pool->ring))) {  		/* Verify the refcnt invariant of cached pages */  		if (!(page_ref_count(page) == 1))  			pr_crit("%s() page_pool refcnt %d violation\n", diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5ef61222fdef..e3f743c141b3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2759,9 +2759,12 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)  			return err;  	} -	dev->rtnl_link_state = RTNL_LINK_INITIALIZED; - -	__dev_notify_flags(dev, old_flags, ~0U); +	if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) { +		__dev_notify_flags(dev, old_flags, 0U); +	} else { +		dev->rtnl_link_state = RTNL_LINK_INITIALIZED; +		__dev_notify_flags(dev, old_flags, ~0U); +	}  	return 0;  }  EXPORT_SYMBOL(rtnl_configure_link); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c642304f178c..fb35b62af272 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -858,6 +858,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)  	n->cloned = 1;  	n->nohdr = 0;  	n->peeked = 0; +	C(pfmemalloc);  	n->destructor = NULL;  	C(tail);  	C(end); @@ -3719,6 +3720,7 @@ normal:  				net_warn_ratelimited(  					"skb_segment: too many frags: %u %u\n",  					pos, mss); +				err = -EINVAL;  				goto err;  			} @@ -3752,11 +3754,10 @@ skip_fraglist:  perform_csum_check:  		if (!csum) { -			if (skb_has_shared_frag(nskb)) { -				err = __skb_linearize(nskb); -				if (err) -					goto err; -			} +			if (skb_has_shared_frag(nskb) && +			    __skb_linearize(nskb)) +				goto err; +  			if (!nskb->remcsum_offload)  				nskb->ip_summed = CHECKSUM_NONE;  			SKB_GSO_CB(nskb)->csum = @@ -5276,8 +5277,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,  			if (npages >= 1 << order) {  				page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) |  						   __GFP_COMP | -						   __GFP_NOWARN | -						   __GFP_NORETRY, +						   __GFP_NOWARN,  						   order);  				if (page)  					goto fill_page; diff --git a/net/core/sock.c b/net/core/sock.c index bcc41829a16d..bc2d7a37297f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2277,9 +2277,9 @@ int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,  		pfrag->offset += use;  		sge = sg + sg_curr - 1; -		if (sg_curr > first_coalesce && sg_page(sg) == pfrag->page && -		    sg->offset + sg->length == orig_offset) { -			sg->length += use; +		if (sg_curr > first_coalesce && sg_page(sge) == pfrag->page && +		    sge->offset + sge->length == orig_offset) { +			sge->length += use;  		} else {  			sge = sg + sg_curr;  			sg_unmark_end(sge); @@ -3243,7 +3243,8 @@ static int req_prot_init(const struct proto *prot)  	rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,  					   rsk_prot->obj_size, 0, -					   prot->slab_flags, NULL); +					   SLAB_ACCOUNT | prot->slab_flags, +					   NULL);  	if (!rsk_prot->slab) {  		pr_crit("%s: Can't create request sock SLAB cache!\n", @@ -3258,7 +3259,8 @@ int proto_register(struct proto *prot, int alloc_slab)  	if (alloc_slab) {  		prot->slab = kmem_cache_create_usercopy(prot->name,  					prot->obj_size, 0, -					SLAB_HWCACHE_ALIGN | prot->slab_flags, +					SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT | +					prot->slab_flags,  					prot->useroffset, prot->usersize,  					NULL); @@ -3281,6 +3283,7 @@ int proto_register(struct proto *prot, int alloc_slab)  				kmem_cache_create(prot->twsk_prot->twsk_slab_name,  						  prot->twsk_prot->twsk_obj_size,  						  0, +						  SLAB_ACCOUNT |  						  prot->slab_flags,  						  NULL);  			if (prot->twsk_prot->twsk_slab == NULL) diff --git a/net/core/xdp.c b/net/core/xdp.c index 9d1f22072d5d..6771f1855b96 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -345,7 +345,8 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,  		rcu_read_lock();  		/* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */  		xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); -		xa->zc_alloc->free(xa->zc_alloc, handle); +		if (!WARN_ON_ONCE(!xa)) +			xa->zc_alloc->free(xa->zc_alloc, handle);  		rcu_read_unlock();  	default:  		/* Not possible, checked in xdp_rxq_info_reg_mem_model() */  | 
