diff options
author | Jens Axboe <axboe@kernel.dk> | 2022-07-24 18:41:03 -0600 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2022-07-24 18:41:03 -0600 |
commit | 4effe18fc0da27ae5d51a702841e87fa13b8a32d (patch) | |
tree | 468f353a3713c93b27e7b2c262efd747e66ff199 /net | |
parent | 32e09298c8b3ff29177c825ab711a4a692d4caad (diff) | |
parent | f6b543fd03d347e8bf245cee4f2d54eb6ffd8fcb (diff) |
Merge branch 'for-5.20/io_uring' into for-5.20/io_uring-zerocopy-send
* for-5.20/io_uring: (716 commits)
io_uring: ensure REQ_F_ISREG is set async offload
net: fix compat pointer in get_compat_msghdr()
io_uring: Don't require reinitable percpu_ref
io_uring: fix types in io_recvmsg_multishot_overflow
io_uring: Use atomic_long_try_cmpxchg in __io_account_mem
io_uring: support multishot in recvmsg
net: copy from user before calling __get_compat_msghdr
net: copy from user before calling __copy_msghdr
io_uring: support 0 length iov in buffer select in compat
io_uring: fix multishot ending when not polled
io_uring: add netmsg cache
io_uring: impose max limit on apoll cache
io_uring: add abstraction around apoll cache
io_uring: move apoll cache to poll.c
io_uring: consolidate hash_locked io-wq handling
io_uring: clear REQ_F_HASH_LOCKED on hash removal
io_uring: don't race double poll setting REQ_F_ASYNC_DATA
io_uring: don't miss setting REQ_F_DOUBLE_POLL
io_uring: disable multishot recvmsg
io_uring: only trace one of complete or overflow
...
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'net')
67 files changed, 445 insertions, 388 deletions
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 53b1955b027f..214532173536 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -182,10 +182,14 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev, else if (dev->mtu > max_mtu) return -EINVAL; + /* Note: If this initial vlan_changelink() fails, we need + * to call vlan_dev_free_egress_priority() to free memory. + */ err = vlan_changelink(dev, tb, data, extack); - if (err) - return err; - err = register_vlan_dev(dev, extack); + + if (!err) + err = register_vlan_dev(dev, extack); + if (err) vlan_dev_free_egress_priority(dev); return err; diff --git a/net/compat.c b/net/compat.c index 6cd2e7683dd0..fe9be3c56ef7 100644 --- a/net/compat.c +++ b/net/compat.c @@ -34,20 +34,15 @@ #include <net/compat.h> int __get_compat_msghdr(struct msghdr *kmsg, - struct compat_msghdr __user *umsg, - struct sockaddr __user **save_addr, - compat_uptr_t *ptr, compat_size_t *len) + struct compat_msghdr *msg, + struct sockaddr __user **save_addr) { - struct compat_msghdr msg; ssize_t err; - if (copy_from_user(&msg, umsg, sizeof(*umsg))) - return -EFAULT; - - kmsg->msg_flags = msg.msg_flags; - kmsg->msg_namelen = msg.msg_namelen; + kmsg->msg_flags = msg->msg_flags; + kmsg->msg_namelen = msg->msg_namelen; - if (!msg.msg_name) + if (!msg->msg_name) kmsg->msg_namelen = 0; if (kmsg->msg_namelen < 0) @@ -57,15 +52,15 @@ int __get_compat_msghdr(struct msghdr *kmsg, kmsg->msg_namelen = sizeof(struct sockaddr_storage); kmsg->msg_control_is_user = true; - kmsg->msg_control_user = compat_ptr(msg.msg_control); - kmsg->msg_controllen = msg.msg_controllen; + kmsg->msg_control_user = compat_ptr(msg->msg_control); + kmsg->msg_controllen = msg->msg_controllen; if (save_addr) - *save_addr = compat_ptr(msg.msg_name); + *save_addr = compat_ptr(msg->msg_name); - if (msg.msg_name && kmsg->msg_namelen) { + if (msg->msg_name && kmsg->msg_namelen) { if (!save_addr) { - err = move_addr_to_kernel(compat_ptr(msg.msg_name), + err = move_addr_to_kernel(compat_ptr(msg->msg_name), kmsg->msg_namelen, kmsg->msg_name); if (err < 0) @@ -76,13 +71,11 @@ int __get_compat_msghdr(struct msghdr *kmsg, kmsg->msg_namelen = 0; } - if (msg.msg_iovlen > UIO_MAXIOV) + if (msg->msg_iovlen > UIO_MAXIOV) return -EMSGSIZE; kmsg->msg_iocb = NULL; kmsg->msg_ubuf = NULL; - *ptr = msg.msg_iov; - *len = msg.msg_iovlen; return 0; } @@ -91,15 +84,17 @@ int get_compat_msghdr(struct msghdr *kmsg, struct sockaddr __user **save_addr, struct iovec **iov) { - compat_uptr_t ptr; - compat_size_t len; + struct compat_msghdr msg; ssize_t err; - err = __get_compat_msghdr(kmsg, umsg, save_addr, &ptr, &len); + if (copy_from_user(&msg, umsg, sizeof(*umsg))) + return -EFAULT; + + err = __get_compat_msghdr(kmsg, &msg, save_addr); if (err) return err; - err = import_iovec(save_addr ? READ : WRITE, compat_ptr(ptr), len, + err = import_iovec(save_addr ? READ : WRITE, compat_ptr(msg.msg_iov), msg.msg_iovlen, UIO_FASTIOV, iov, &kmsg->msg_iter); return err < 0 ? err : 0; } diff --git a/net/core/dev.c b/net/core/dev.c index 8e6f22961206..30a1603a7225 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4863,7 +4863,10 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, } /* When doing generic XDP we have to bypass the qdisc layer and the - * network taps in order to match in-driver-XDP behavior. + * network taps in order to match in-driver-XDP behavior. This also means + * that XDP packets are able to starve other packets going through a qdisc, + * and DDOS attacks will be more effective. In-driver-XDP use dedicated TX + * queues, so they do not have this starvation issue. */ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) { @@ -4875,7 +4878,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) txq = netdev_core_pick_tx(dev, skb, NULL); cpu = smp_processor_id(); HARD_TX_LOCK(dev, txq, cpu); - if (!netif_xmit_stopped(txq)) { + if (!netif_xmit_frozen_or_drv_stopped(txq)) { rc = netdev_start_xmit(skb, dev, txq, 0); if (dev_xmit_complete(rc)) free_skb = false; @@ -4883,6 +4886,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) HARD_TX_UNLOCK(dev, txq); if (free_skb) { trace_xdp_exception(dev, xdp_prog, XDP_TX); + dev_core_stats_tx_dropped_inc(dev); kfree_skb(skb); } } diff --git a/net/core/filter.c b/net/core/filter.c index 5d16d66727fc..7950f7520765 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6158,7 +6158,6 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len if (err) return err; - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); return seg6_lookup_nexthop(skb, NULL, 0); @@ -7042,7 +7041,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN) return -EINVAL; - if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies)) return -EINVAL; if (!th->ack || th->rst || th->syn) @@ -7117,7 +7116,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len, if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN) return -EINVAL; - if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies)) return -ENOENT; if (!th->syn || th->ack || th->fin || th->rst) diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 5f85e01d4093..b0ff6153be62 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -64,7 +64,7 @@ u32 secure_tcpv6_ts_off(const struct net *net, .daddr = *(struct in6_addr *)daddr, }; - if (net->ipv4.sysctl_tcp_timestamps != 1) + if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) return 0; ts_secret_init(); @@ -120,7 +120,7 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #ifdef CONFIG_INET u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr) { - if (net->ipv4.sysctl_tcp_timestamps != 1) + if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) return 0; ts_secret_init(); diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index 3f00a28fe762..5daa1fa54249 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -387,7 +387,7 @@ void reuseport_stop_listen_sock(struct sock *sk) prog = rcu_dereference_protected(reuse->prog, lockdep_is_held(&reuseport_lock)); - if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req || + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_migrate_req) || (prog && prog->expected_attach_type == BPF_SK_REUSEPORT_SELECT_OR_MIGRATE)) { /* Migration capable, move sk from the listening section * to the closed section. @@ -545,7 +545,7 @@ struct sock *reuseport_migrate_sock(struct sock *sk, hash = migrating_sk->sk_hash; prog = rcu_dereference(reuse->prog); if (!prog || prog->expected_attach_type != BPF_SK_REUSEPORT_SELECT_OR_MIGRATE) { - if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_migrate_req)) goto select_by_hash; goto failure; } diff --git a/net/dsa/port.c b/net/dsa/port.c index 3738f2d40a0b..2dd76eb1621c 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -248,6 +248,7 @@ static void dsa_port_reset_vlan_filtering(struct dsa_port *dp, struct netlink_ext_ack extack = {0}; bool change_vlan_filtering = false; struct dsa_switch *ds = dp->ds; + struct dsa_port *other_dp; bool vlan_filtering; int err; @@ -270,8 +271,8 @@ static void dsa_port_reset_vlan_filtering(struct dsa_port *dp, * VLAN-aware bridge. */ if (change_vlan_filtering && ds->vlan_filtering_is_global) { - dsa_switch_for_each_port(dp, ds) { - struct net_device *br = dsa_port_bridge_dev_get(dp); + dsa_switch_for_each_port(other_dp, ds) { + struct net_device *br = dsa_port_bridge_dev_get(other_dp); if (br && br_vlan_enabled(br)) { change_vlan_filtering = false; @@ -799,7 +800,7 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, ds->vlan_filtering = vlan_filtering; dsa_switch_for_each_user_port(other_dp, ds) { - struct net_device *slave = dp->slave; + struct net_device *slave = other_dp->slave; /* We might be called in the unbind path, so not * all slave devices might still be registered. diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 93da9f783bec..252c8bceaba4 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -217,7 +217,7 @@ int inet_listen(struct socket *sock, int backlog) * because the socket was in TCP_LISTEN state previously but * was shutdown() rather than close(). */ - tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen; + tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen); if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) && (tcp_fastopen & TFO_SERVER_ENABLE) && !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) { @@ -335,7 +335,7 @@ lookup_protocol: inet->hdrincl = 1; } - if (net->ipv4.sysctl_ip_no_pmtu_disc) + if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; @@ -1246,7 +1246,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) if (new_saddr == old_saddr) return 0; - if (sock_net(sk)->ipv4.sysctl_ip_dynaddr > 1) { + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) > 1) { pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n", __func__, &old_saddr, &new_saddr); } @@ -1301,7 +1301,7 @@ int inet_sk_rebuild_header(struct sock *sk) * Other protocols have to map its equivalent state to TCP_SYN_SENT. * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme */ - if (!sock_net(sk)->ipv4.sysctl_ip_dynaddr || + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) || sk->sk_state != TCP_SYN_SENT || (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || (err = inet_sk_reselect_saddr(sk)) != 0) @@ -1710,24 +1710,14 @@ static const struct net_protocol igmp_protocol = { }; #endif -/* thinking of making this const? Don't. - * early_demux can change based on sysctl. - */ -static struct net_protocol tcp_protocol = { - .early_demux = tcp_v4_early_demux, - .early_demux_handler = tcp_v4_early_demux, +static const struct net_protocol tcp_protocol = { .handler = tcp_v4_rcv, .err_handler = tcp_v4_err, .no_policy = 1, .icmp_strict_tag_validation = 1, }; -/* thinking of making this const? Don't. - * early_demux can change based on sysctl. - */ -static struct net_protocol udp_protocol = { - .early_demux = udp_v4_early_demux, - .early_demux_handler = udp_v4_early_demux, +static const struct net_protocol udp_protocol = { .handler = udp_rcv, .err_handler = udp_err, .no_policy = 1, diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 6eea1e9e998d..f8ad04470d3a 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -507,7 +507,7 @@ static int ah_init_state(struct xfrm_state *x) if (aalg_desc->uinfo.auth.icv_fullbits/8 != crypto_ahash_digestsize(ahash)) { - pr_info("%s: %s digestsize %u != %hu\n", + pr_info("%s: %s digestsize %u != %u\n", __func__, x->aalg->alg_name, crypto_ahash_digestsize(ahash), aalg_desc->uinfo.auth.icv_fullbits / 8); diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 62d5f99760aa..6cd3b6c559f0 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -239,7 +239,7 @@ static int cipso_v4_cache_check(const unsigned char *key, struct cipso_v4_map_cache_entry *prev_entry = NULL; u32 hash; - if (!cipso_v4_cache_enabled) + if (!READ_ONCE(cipso_v4_cache_enabled)) return -ENOENT; hash = cipso_v4_map_cache_hash(key, key_len); @@ -296,13 +296,14 @@ static int cipso_v4_cache_check(const unsigned char *key, int cipso_v4_cache_add(const unsigned char *cipso_ptr, const struct netlbl_lsm_secattr *secattr) { + int bkt_size = READ_ONCE(cipso_v4_cache_bucketsize); int ret_val = -EPERM; u32 bkt; struct cipso_v4_map_cache_entry *entry = NULL; struct cipso_v4_map_cache_entry *old_entry = NULL; u32 cipso_ptr_len; - if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0) + if (!READ_ONCE(cipso_v4_cache_enabled) || bkt_size <= 0) return 0; cipso_ptr_len = cipso_ptr[1]; @@ -322,7 +323,7 @@ int cipso_v4_cache_add(const unsigned char *cipso_ptr, bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1); spin_lock_bh(&cipso_v4_cache[bkt].lock); - if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) { + if (cipso_v4_cache[bkt].size < bkt_size) { list_add(&entry->list, &cipso_v4_cache[bkt].list); cipso_v4_cache[bkt].size += 1; } else { @@ -1199,7 +1200,8 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def, /* This will send packets using the "optimized" format when * possible as specified in section 3.4.2.6 of the * CIPSO draft. */ - if (cipso_v4_rbm_optfmt && ret_val > 0 && ret_val <= 10) + if (READ_ONCE(cipso_v4_rbm_optfmt) && ret_val > 0 && + ret_val <= 10) tag_len = 14; else tag_len = 4 + ret_val; @@ -1603,7 +1605,7 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) * all the CIPSO validations here but it doesn't * really specify _exactly_ what we need to validate * ... so, just make it a sysctl tunable. */ - if (cipso_v4_rbm_strictvalid) { + if (READ_ONCE(cipso_v4_rbm_strictvalid)) { if (cipso_v4_map_lvl_valid(doi_def, tag[3]) < 0) { err_offset = opt_iter + 3; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index b21238df3301..b694f352ce7a 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -1108,7 +1108,7 @@ static int esp_init_authenc(struct xfrm_state *x) err = -EINVAL; if (aalg_desc->uinfo.auth.icv_fullbits / 8 != crypto_aead_authsize(aead)) { - pr_info("ESP: %s digestsize %u != %hu\n", + pr_info("ESP: %s digestsize %u != %u\n", x->aalg->alg_name, crypto_aead_authsize(aead), aalg_desc->uinfo.auth.icv_fullbits / 8); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index a57ba23571c9..db7b2503f068 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1230,7 +1230,7 @@ static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh, nh->fib_nh_dev = in_dev->dev; dev_hold_track(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); - nh->fib_nh_scope = RT_SCOPE_HOST; + nh->fib_nh_scope = RT_SCOPE_LINK; if (!netif_carrier_ok(nh->fib_nh_dev)) nh->fib_nh_flags |= RTNH_F_LINKDOWN; err = 0; @@ -1811,7 +1811,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, goto nla_put_failure; if (nexthop_is_blackhole(fi->nh)) rtm->rtm_type = RTN_BLACKHOLE; - if (!fi->fib_net->ipv4.sysctl_nexthop_compat_mode) + if (!READ_ONCE(fi->fib_net->ipv4.sysctl_nexthop_compat_mode)) goto offload; } @@ -2216,7 +2216,7 @@ void fib_select_multipath(struct fib_result *res, int hash) } change_nexthops(fi) { - if (net->ipv4.sysctl_fib_multipath_use_neigh) { + if (READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh)) { if (!fib_good_nh(nexthop_nh)) continue; if (!first) { diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 2734c3af7e24..46e8a5125853 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -498,7 +498,7 @@ static void tnode_free(struct key_vector *tn) tn = container_of(head, struct tnode, rcu)->kv; } - if (tnode_free_size >= sysctl_fib_sync_mem) { + if (tnode_free_size >= READ_ONCE(sysctl_fib_sync_mem)) { tnode_free_size = 0; synchronize_rcu(); } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index efea0e796f06..d5d745c3e345 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -253,11 +253,12 @@ bool icmp_global_allow(void) spin_lock(&icmp_global.lock); delta = min_t(u32, now - icmp_global.stamp, HZ); if (delta >= HZ / 50) { - incr = sysctl_icmp_msgs_per_sec * delta / HZ ; + incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ; if (incr) WRITE_ONCE(icmp_global.stamp, now); } - credit = min_t(u32, icmp_global.credit + incr, sysctl_icmp_msgs_burst); + credit = min_t(u32, icmp_global.credit + incr, + READ_ONCE(sysctl_icmp_msgs_burst)); if (credit) { /* We want to use a credit of one in average, but need to randomize * it for security reasons. @@ -281,7 +282,7 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code) return true; /* Limit if icmp type is enabled in ratemask. */ - if (!((1 << type) & net->ipv4.sysctl_icmp_ratemask)) + if (!((1 << type) & READ_ONCE(net->ipv4.sysctl_icmp_ratemask))) return true; return false; @@ -319,7 +320,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, vif = l3mdev_master_ifindex(dst->dev); peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1); - rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit); + rc = inet_peer_xrlim_allow(peer, + READ_ONCE(net->ipv4.sysctl_icmp_ratelimit)); if (peer) inet_putpeer(peer); out: @@ -692,7 +694,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, rcu_read_lock(); if (rt_is_input_route(rt) && - net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) + READ_ONCE(net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)) dev = dev_get_by_index_rcu(net, inet_iif(skb_in)); if (dev) @@ -879,7 +881,7 @@ static enum skb_drop_reason icmp_unreach(struct sk_buff *skb) * values please see * Documentation/networking/ip-sysctl.rst */ - switch (net->ipv4.sysctl_ip_no_pmtu_disc) { + switch (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) { default: net_dbg_ratelimited("%pI4: fragmentation needed and DF set\n", &iph->daddr); @@ -932,7 +934,7 @@ static enum skb_drop_reason icmp_unreach(struct sk_buff *skb) * get the other vendor to fix their kit. */ - if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses && + if (!READ_ONCE(net->ipv4.sysctl_icmp_ignore_bogus_error_responses) && inet_addr_type_dev_table(net, skb->dev, iph->daddr) == RTN_BROADCAST) { net_warn_ratelimited("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n", &ip_hdr(skb)->saddr, @@ -992,7 +994,7 @@ static enum skb_drop_reason icmp_echo(struct sk_buff *skb) net = dev_net(skb_dst(skb)->dev); /* should there be an ICMP stat for ignored echos? */ - if (net->ipv4.sysctl_icmp_echo_ignore_all) + if (READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_all)) return SKB_NOT_DROPPED_YET; icmp_param.data.icmph = *icmp_hdr(skb); @@ -1027,7 +1029,7 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) u16 ident_len; u8 status; - if (!net->ipv4.sysctl_icmp_echo_enable_probe) + if (!READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe)) return false; /* We currently only support probing interfaces on the proxy node @@ -1248,7 +1250,7 @@ int icmp_rcv(struct sk_buff *skb) */ if ((icmph->type == ICMP_ECHO || icmph->type == ICMP_TIMESTAMP) && - net->ipv4.sysctl_icmp_echo_ignore_broadcasts) { + READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_broadcasts)) { reason = SKB_DROP_REASON_INVALID_PROTO; goto error; } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index b65d074d9620..e3ab0cb61624 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -467,7 +467,8 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, if (pmc->multiaddr == IGMP_ALL_HOSTS) return skb; - if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(pmc->multiaddr) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return skb; mtu = READ_ONCE(dev->mtu); @@ -593,7 +594,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc) if (pmc->multiaddr == IGMP_ALL_HOSTS) continue; if (ipv4_is_local_multicast(pmc->multiaddr) && - !net->ipv4.sysctl_igmp_llm_reports) + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) continue; spin_lock_bh(&pmc->lock); if (pmc->sfcount[MCAST_EXCLUDE]) @@ -736,7 +737,8 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) return igmpv3_send_report(in_dev, pmc); - if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(group) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return 0; if (type == IGMP_HOST_LEAVE_MESSAGE) @@ -825,7 +827,7 @@ static void igmp_ifc_event(struct in_device *in_dev) struct net *net = dev_net(in_dev->dev); if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) return; - WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv); + WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv)); igmp_ifc_start_timer(in_dev, 1); } @@ -920,7 +922,8 @@ static bool igmp_heard_report(struct in_device *in_dev, __be32 group) if (group == IGMP_ALL_HOSTS) return false; - if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(group) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return false; rcu_read_lock(); @@ -1006,7 +1009,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, * received value was zero, use the default or statically * configured value. */ - in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv; + in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL; /* RFC3376, 8.3. Query Response Interval: @@ -1045,7 +1048,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, if (im->multiaddr == IGMP_ALL_HOSTS) continue; if (ipv4_is_local_multicast(im->multiaddr) && - !net->ipv4.sysctl_igmp_llm_reports) + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) continue; spin_lock_bh(&im->lock); if (im->tm_running) @@ -1186,7 +1189,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im, pmc->interface = im->interface; in_dev_hold(in_dev); pmc->multiaddr = im->multiaddr; - pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); pmc->sfmode = im->sfmode; if (pmc->sfmode == MCAST_INCLUDE) { struct ip_sf_list *psf; @@ -1237,9 +1240,11 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im) swap(im->tomb, pmc->tomb); swap(im->sources, pmc->sources); for (psf = im->sources; psf; psf = psf->sf_next) - psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + psf->sf_crcount = in_dev->mr_qrv ?: + READ_ONCE(net->ipv4.sysctl_igmp_qrv); } else { - im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + im->crcount = in_dev->mr_qrv ?: + READ_ONCE(net->ipv4.sysctl_igmp_qrv); } in_dev_put(pmc->interface); kfree_pmc(pmc); @@ -1296,7 +1301,8 @@ static void __igmp_group_dropped(struct ip_mc_list *im, gfp_t gfp) #ifdef CONFIG_IP_MULTICAST if (im->multiaddr == IGMP_ALL_HOSTS) return; - if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(im->multiaddr) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return; reporter = im->reporter; @@ -1338,13 +1344,14 @@ static void igmp_group_added(struct ip_mc_list *im) #ifdef CONFIG_IP_MULTICAST if (im->multiaddr == IGMP_ALL_HOSTS) return; - if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(im->multiaddr) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return; if (in_dev->dead) return; - im->unsolicit_count = net->ipv4.sysctl_igmp_qrv; + im->unsolicit_count = READ_ONCE(net->ipv4.sysctl_igmp_qrv); if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) { spin_lock_bh(&im->lock); igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY); @@ -1358,7 +1365,7 @@ static void igmp_group_added(struct ip_mc_list *im) * IN() to IN(A). */ if (im->sfmode == MCAST_EXCLUDE) - im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + im->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); igmp_ifc_event(in_dev); #endif @@ -1642,7 +1649,7 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev) if (im->multiaddr == IGMP_ALL_HOSTS) continue; if (ipv4_is_local_multicast(im->multiaddr) && - !net->ipv4.sysctl_igmp_llm_reports) + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) continue; /* a failover is happening and switches @@ -1749,7 +1756,7 @@ static void ip_mc_reset(struct in_device *in_dev) in_dev->mr_qi = IGMP_QUERY_INTERVAL; in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL; - in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv; + in_dev->mr_qrv = READ_ONCE(net->ipv4.sysctl_igmp_qrv); } #else static void ip_mc_reset(struct in_device *in_dev) @@ -1883,7 +1890,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, #ifdef CONFIG_IP_MULTICAST if (psf->sf_oldin && !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) { - psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + psf->sf_crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); psf->sf_next = pmc->tomb; pmc->tomb = psf; rv = 1; @@ -1947,7 +1954,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, /* filter mode change */ pmc->sfmode = MCAST_INCLUDE; #ifdef CONFIG_IP_MULTICAST - pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount); for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; @@ -2126,7 +2133,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, #ifdef CONFIG_IP_MULTICAST /* else no filters; keep old mode for reports */ - pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount); for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; @@ -2192,7 +2199,7 @@ static int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr, count++; } err = -ENOBUFS; - if (count >= net->ipv4.sysctl_igmp_max_memberships) + if (count >= READ_ONCE(net->ipv4.sysctl_igmp_max_memberships)) goto done; iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL); if (!iml) @@ -2379,7 +2386,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct } /* else, add a new source to the filter */ - if (psl && psl->sl_count >= net->ipv4.sysctl_igmp_max_msf) { + if (psl && psl->sl_count >= READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) { err = -ENOBUFS; goto done; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 53f5f956d948..eb31c7158b39 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -263,7 +263,7 @@ next_port: goto other_half_scan; } - if (net->ipv4.sysctl_ip_autobind_reuse && !relax) { + if (READ_ONCE(net->ipv4.sysctl_ip_autobind_reuse) && !relax) { /* We still have a chance to connect to different destinations */ relax = true; goto ports_exhausted; @@ -833,7 +833,8 @@ static void reqsk_timer_handler(struct timer_list *t) icsk = inet_csk(sk_listener); net = sock_net(sk_listener); - max_syn_ack_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries; + max_syn_ack_retries = icsk->icsk_syn_retries ? : + READ_ONCE(net->ipv4.sysctl_tcp_synack_retries); /* Normally all the openreqs are young and become mature * (i.e. converted to established socket) for first timeout. * If synack was not acknowledged for 1 second, it means diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 0ec501845cb3..47ccc343c9fb 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -156,7 +156,8 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, { struct inet_timewait_sock *tw; - if (refcount_read(&dr->tw_refcount) - 1 >= dr->sysctl_max_tw_buckets) + if (refcount_read(&dr->tw_refcount) - 1 >= + READ_ONCE(dr->sysctl_max_tw_buckets)) return NULL; tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab, diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index da21dfce24d7..e9fed83e9b3c 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -141,16 +141,20 @@ static void inet_peer_gc(struct inet_peer_base *base, struct inet_peer *gc_stack[], unsigned int gc_cnt) { + int peer_threshold, peer_maxttl, peer_minttl; struct inet_peer *p; __u32 delta, ttl; int i; - if (base->total >= inet_peer_threshold) + peer_threshold = READ_ONCE(inet_peer_threshold); + peer_maxttl = READ_ONCE(inet_peer_maxttl); + peer_minttl = READ_ONCE(inet_peer_minttl); + + if (base->total >= peer_threshold) ttl = 0; /* be aggressive */ else - ttl = inet_peer_maxttl - - (inet_peer_maxttl - inet_peer_minttl) / HZ * - base->total / inet_peer_threshold * HZ; + ttl = peer_maxttl - (peer_maxttl - peer_minttl) / HZ * + base->total / peer_threshold * HZ; for (i = 0; i < gc_cnt; i++) { p = gc_stack[i]; diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index e3aa436a1bdf..e18931a6d153 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -157,7 +157,7 @@ int ip_forward(struct sk_buff *skb) !skb_sec_path(skb)) ip_rt_send_redirect(skb); - if (net->ipv4.sysctl_ip_fwd_update_priority) + if (READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority)) skb->priority = rt_tos2priority(iph->tos); return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index b1165f717cd1..1b512390b3cf 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -312,14 +312,13 @@ static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph, ip_hdr(hint)->tos == iph->tos; } -INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *)); -INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *)); +int tcp_v4_early_demux(struct sk_buff *skb); +int udp_v4_early_demux(struct sk_buff *skb); static int ip_rcv_finish_core(struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *dev, const struct sk_buff *hint) { const struct iphdr *iph = ip_hdr(skb); - int (*edemux)(struct sk_buff *skb); int err, drop_reason; struct rtable *rt; @@ -332,21 +331,29 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, goto drop_error; } - if (net->ipv4.sysctl_ip_early_demux && + if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) && !skb_dst(skb) && !skb->sk && !ip_is_fragment(iph)) { - const struct net_protocol *ipprot; - int protocol = iph->protocol; - - ipprot = rcu_dereference(inet_protos[protocol]); - if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) { - err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux, - udp_v4_early_demux, skb); - if (unlikely(err)) - goto drop_error; - /* must reload iph, skb->head might have changed */ - iph = ip_hdr(skb); + switch (iph->protocol) { + case IPPROTO_TCP: + if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) { + tcp_v4_early_demux(skb); + + /* must reload iph, skb->head might have changed */ + iph = ip_hdr(skb); + } + break; + case IPPROTO_UDP: + if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) { + err = udp_v4_early_demux(skb); + if (unlikely(err)) + goto drop_error; + + /* must reload iph, skb->head might have changed */ + iph = ip_hdr(skb); + } + break; } } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 445a9ecaefa1..a8a323ecbb54 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -782,7 +782,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) /* numsrc >= (4G-140)/128 overflow in 32 bits */ err = -ENOBUFS; if (gsf->gf_numsrc >= 0x1ffffff || - gsf->gf_numsrc > sock_net(sk)->ipv4.sysctl_igmp_max_msf) + gsf->gf_numsrc > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf)) goto out_free_gsf; err = -EINVAL; @@ -832,7 +832,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, /* numsrc >= (4G-140)/128 overflow in 32 bits */ err = -ENOBUFS; - if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf) + if (n > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf)) goto out_free_gsf; err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode, &gf32->gf_group, gf32->gf_slist_flex); @@ -1244,7 +1244,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname, } /* numsrc >= (1G-4) overflow in 32 bits */ if (msf->imsf_numsrc >= 0x3ffffffcU || - msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) { + msf->imsf_numsrc > READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) { kfree(msf); err = -ENOBUFS; break; @@ -1606,7 +1606,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, { struct net *net = sock_net(sk); val = (inet->uc_ttl == -1 ? - net->ipv4.sysctl_ip_default_ttl : + READ_ONCE(net->ipv4.sysctl_ip_default_ttl) : inet->uc_ttl); break; } diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 918c61fda0f3..d640adcaf1b1 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -62,7 +62,7 @@ struct sk_buff *nf_reject_skb_v4_tcp_reset(struct net *net, skb_reserve(nskb, LL_MAX_HEADER); niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, - net->ipv4.sysctl_ip_default_ttl); + READ_ONCE(net->ipv4.sysctl_ip_default_ttl)); nf_reject_ip_tcphdr_put(nskb, oldskb, oth); niph->tot_len = htons(nskb->len); ip_send_check(niph); @@ -117,7 +117,7 @@ struct sk_buff *nf_reject_skb_v4_unreach(struct net *net, skb_reserve(nskb, LL_MAX_HEADER); niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP, - net->ipv4.sysctl_ip_default_ttl); + READ_ONCE(net->ipv4.sysctl_ip_default_ttl)); skb_reset_transport_header(nskb); icmph = skb_put_zero(nskb, sizeof(struct icmphdr)); diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index e459a391e607..853a75a8fbaf 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1858,7 +1858,7 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) /* __ip6_del_rt does a release, so do a hold here */ fib6_info_hold(f6i); ipv6_stub->ip6_del_rt(net, f6i, - !net->ipv4.sysctl_nexthop_compat_mode); + !READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode)); } } @@ -2361,7 +2361,8 @@ out: if (!rc) { nh_base_seq_inc(net); nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo); - if (replace_notify && net->ipv4.sysctl_nexthop_compat_mode) + if (replace_notify && + READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode)) nexthop_replace_notify(net, new_nh, &cfg->nlinfo); } diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 28836071f0a6..0088a4c64d77 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -387,7 +387,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) seq_printf(seq, "\nIp: %d %d", IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2, - net->ipv4.sysctl_ip_default_ttl); + READ_ONCE(net->ipv4.sysctl_ip_default_ttl)); BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 356f535f3443..4702c61207a8 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1398,7 +1398,7 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr) struct fib_info *fi = res->fi; u32 mtu = 0; - if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu || + if (READ_ONCE(dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu) || fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU)) mtu = fi->fib_mtu; @@ -1929,7 +1929,7 @@ static u32 fib_multipath_custom_hash_outer(const struct net *net, const struct sk_buff *skb, bool *p_has_inner) { - u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; + u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); struct flow_keys keys, hash_keys; if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) @@ -1958,7 +1958,7 @@ static u32 fib_multipath_custom_hash_inner(const struct net *net, const struct sk_buff *skb, bool has_inner) { - u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; + u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); struct flow_keys keys, hash_keys; /* We assume the packet carries an encapsulation, but if none was @@ -2018,7 +2018,7 @@ static u32 fib_multipath_custom_hash_skb(const struct net *net, static u32 fib_multipath_custom_hash_fl4(const struct net *net, const struct flowi4 *fl4) { - u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; + u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); struct flow_keys hash_keys; if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) @@ -2048,7 +2048,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, struct flow_keys hash_keys; u32 mhash = 0; - switch (net->ipv4.sysctl_fib_multipath_hash_policy) { + switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) { case 0: memset(&hash_keys, 0, sizeof(hash_keys)); hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index f33c31dd7366..942d2dfa1115 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -247,12 +247,12 @@ bool cookie_timestamp_decode(const struct net *net, return true; } - if (!net->ipv4.sysctl_tcp_timestamps) + if (!READ_ONCE(net->ipv4.sysctl_tcp_timestamps)) return false; tcp_opt->sack_ok = (options & TS_OPT_SACK) ? TCP_SACK_SEEN : 0; - if (tcp_opt->sack_ok && !net->ipv4.sysctl_tcp_sack) + if (tcp_opt->sack_ok && !READ_ONCE(net->ipv4.sysctl_tcp_sack)) return false; if ((options & TS_OPT_WSCALE_MASK) == TS_OPT_WSCALE_MASK) @@ -261,7 +261,7 @@ bool cookie_timestamp_decode(const struct net *net, tcp_opt->wscale_ok = 1; tcp_opt->snd_wscale = options & TS_OPT_WSCALE_MASK; - return net->ipv4.sysctl_tcp_window_scaling != 0; + return READ_ONCE(net->ipv4.sysctl_tcp_window_scaling) != 0; } EXPORT_SYMBOL(cookie_timestamp_decode); @@ -273,7 +273,7 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt, if (!ecn_ok) return false; - if (net->ipv4.sysctl_tcp_ecn) + if (READ_ONCE(net->ipv4.sysctl_tcp_ecn)) return true; return dst_feature(dst, RTAX_FEATURE_ECN); @@ -340,7 +340,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) struct flowi4 fl4; u32 tsoff = 0; - if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) || + !th->ack || th->rst) goto out; if (tcp_synq_no_recent_overflow(sk)) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index cd448cdd3b38..5490c285668b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -84,7 +84,7 @@ static int ipv4_local_port_range(struct ctl_table *table, int write, * port limit. */ if ((range[1] < range[0]) || - (range[0] < net->ipv4.sysctl_ip_prot_sock)) + (range[0] < READ_ONCE(net->ipv4.sysctl_ip_prot_sock))) ret = -EINVAL; else set_local_port_range(net, range); @@ -110,7 +110,7 @@ static int ipv4_privileged_ports(struct ctl_table *table, int write, .extra2 = &ip_privileged_port_max, }; - pports = net->ipv4.sysctl_ip_prot_sock; + pports = READ_ONCE(net->ipv4.sysctl_ip_prot_sock); ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); @@ -122,7 +122,7 @@ static int ipv4_privileged_ports(struct ctl_table *table, int write, if (range[0] < pports) ret = -EINVAL; else - net->ipv4.sysctl_ip_prot_sock = pports; + WRITE_ONCE(net->ipv4.sysctl_ip_prot_sock, pports); } return ret; @@ -350,61 +350,6 @@ bad_key: return ret; } -static void proc_configure_early_demux(int enabled, int protocol) -{ - struct net_protocol *ipprot; -#if IS_ENABLED(CONFIG_IPV6) - struct inet6_protocol *ip6prot; -#endif - - rcu_read_lock(); - - ipprot = rcu_dereference(inet_protos[protocol]); - if (ipprot) - ipprot->early_demux = enabled ? ipprot->early_demux_handler : - NULL; - -#if IS_ENABLED(CONFIG_IPV6) - ip6prot = rcu_dereference(inet6_protos[protocol]); - if (ip6prot) - ip6prot->early_demux = enabled ? ip6prot->early_demux_handler : - NULL; -#endif - rcu_read_unlock(); -} - -static int proc_tcp_early_demux(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - int ret = 0; - - ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); - - if (write && !ret) { - int enabled = init_net.ipv4.sysctl_tcp_early_demux; - - proc_configure_early_demux(enabled, IPPROTO_TCP); - } - - return ret; -} - -static int proc_udp_early_demux(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - int ret = 0; - - ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); - - if (write && !ret) { - int enabled = init_net.ipv4.sysctl_udp_early_demux; - - proc_configure_early_demux(enabled, IPPROTO_UDP); - } - - return ret; -} - static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) @@ -599,6 +544,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, { .procname = "icmp_echo_enable_probe", @@ -615,6 +562,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, { .procname = "icmp_ignore_bogus_error_responses", @@ -622,6 +571,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, { .procname = "icmp_errors_use_inbound_ifaddr", @@ -629,6 +580,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, { .procname = "icmp_ratelimit", @@ -668,6 +621,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_TWO, }, { .procname = "tcp_ecn_fallback", @@ -675,6 +630,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "ip_dynaddr", @@ -695,14 +652,14 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_udp_early_demux, .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_udp_early_demux + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_early_demux", .data = &init_net.ipv4.sysctl_tcp_early_demux, .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_tcp_early_demux + .proc_handler = proc_dou8vec_minmax, }, { .procname = "nexthop_compat_mode", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index fdb80b9608fe..8f117f92e193 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -441,7 +441,7 @@ void tcp_init_sock(struct sock *sk) tp->snd_cwnd_clamp = ~0; tp->mss_cache = TCP_MSS_DEFAULT; - tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering; + tp->reordering = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering); tcp_assign_congestion_control(sk); tp->tsoffset = 0; @@ -1150,7 +1150,8 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, struct sockaddr *uaddr = msg->msg_name; int err, flags; - if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) || + if (!(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & + TFO_CLIENT_ENABLE) || (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) && uaddr->sa_family == AF_UNSPEC)) return -EOPNOTSUPP; @@ -2726,7 +2727,8 @@ static void tcp_orphan_update(struct timer_list *unused) static bool tcp_too_many_orphans(int shift) { - return READ_ONCE(tcp_orphan_cache) << shift > sysctl_tcp_max_orphans; + return READ_ONCE(tcp_orphan_cache) << shift > + READ_ONCE(sysctl_tcp_max_orphans); } bool tcp_check_oom(struct sock *sk, int shift) @@ -3627,7 +3629,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname, case TCP_FASTOPEN_CONNECT: if (val > 1 || val < 0) { err = -EINVAL; - } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) { + } else if (READ_ONCE(net->ipv4.sysctl_tcp_fastopen) & + TFO_CLIENT_ENABLE) { if (sk->sk_state == TCP_CLOSE) tp->fastopen_connect = val; else @@ -3977,12 +3980,13 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = keepalive_probes(tp); break; case TCP_SYNCNT: - val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; + val = icsk->icsk_syn_retries ? : + READ_ONCE(net->ipv4.sysctl_tcp_syn_retries); break; case TCP_LINGER2: val = tp->linger2; if (val >= 0) - val = (val ? : net->ipv4.sysctl_tcp_fin_timeout) / HZ; + val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ; break; case TCP_DEFER_ACCEPT: val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept, diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index fdbcf2a6d08e..825b216d11f5 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -332,7 +332,7 @@ static bool tcp_fastopen_no_cookie(const struct sock *sk, const struct dst_entry *dst, int flag) { - return (sock_net(sk)->ipv4.sysctl_tcp_fastopen & flag) || + return (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & flag) || tcp_sk(sk)->fastopen_no_cookie || (dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE)); } @@ -347,7 +347,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, const struct dst_entry *dst) { bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; - int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen; + int tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen); struct tcp_fastopen_cookie valid_foc = { .len = -1 }; struct sock *child; int ret = 0; @@ -489,7 +489,7 @@ void tcp_fastopen_active_disable(struct sock *sk) { struct net *net = sock_net(sk); - if (!sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout)) return; /* Paired with READ_ONCE() in tcp_fastopen_active_should_disable() */ @@ -510,7 +510,8 @@ void tcp_fastopen_active_disable(struct sock *sk) */ bool tcp_fastopen_active_should_disable(struct sock *sk) { - unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout; + unsigned int tfo_bh_timeout = + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout); unsigned long timeout; int tfo_da_times; int multiplier; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2e2a9ece9af2..07dbcbae7782 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1051,7 +1051,7 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq, tp->undo_marker ? tp->undo_retrans : 0); #endif tp->reordering = min_t(u32, (metric + mss - 1) / mss, - sock_net(sk)->ipv4.sysctl_tcp_max_reordering); + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering)); } /* This exciting event is worth to be remembered. 8) */ @@ -2030,7 +2030,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend) return; tp->reordering = min_t(u32, tp->packets_out + addend, - sock_net(sk)->ipv4.sysctl_tcp_max_reordering); + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering)); tp->reord_seen++; NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER); } @@ -2095,7 +2095,8 @@ static inline void tcp_init_undo(struct tcp_sock *tp) static bool tcp_is_rack(const struct sock *sk) { - return sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION; + return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & + TCP_RACK_LOSS_DETECTION; } /* If we detect SACK reneging, forget all SACK information @@ -2139,6 +2140,7 @@ void tcp_enter_loss(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery; + u8 reordering; tcp_timeout_mark_lost(sk); @@ -2159,10 +2161,12 @@ void tcp_enter_loss(struct sock *sk) /* Timeout in disordered state after receiving substantial DUPACKs * suggests that the degree of reordering is over-estimated. */ + reordering = READ_ONCE(net->ipv4.sysctl_tcp_reordering); if (icsk->icsk_ca_state <= TCP_CA_Disorder && - tp->sacked_out >= net->ipv4.sysctl_tcp_reordering) + tp->sacked_out >= reordering) tp->reordering = min_t(unsigned int, tp->reordering, - net->ipv4.sysctl_tcp_reordering); + reordering); + tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; tcp_ecn_queue_cwr(tp); @@ -3464,7 +3468,8 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) * new SACK or ECE mark may first advance cwnd here and later reduce * cwnd in tcp_fastretrans_alert() based on more states. */ - if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering) + if (tcp_sk(sk)->reordering > + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering)) return flag & FLAG_FORWARD_PROGRESS; return flag & FLAG_DATA_ACKED; @@ -4056,7 +4061,7 @@ void tcp_parse_options(const struct net *net, break; case TCPOPT_WINDOW: if (opsize == TCPOLEN_WINDOW && th->syn && - !estab && net->ipv4.sysctl_tcp_window_scaling) { + !estab && READ_ONCE(net->ipv4.sysctl_tcp_window_scaling)) { __u8 snd_wscale = *(__u8 *)ptr; opt_rx->wscale_ok = 1; if (snd_wscale > TCP_MAX_WSCALE) { @@ -4072,7 +4077,7 @@ void tcp_parse_options(const struct net *net, case TCPOPT_TIMESTAMP: if ((opsize == TCPOLEN_TIMESTAMP) && ((estab && opt_rx->tstamp_ok) || - (!estab && net->ipv4.sysctl_tcp_timestamps))) { + (!estab && READ_ONCE(net->ipv4.sysctl_tcp_timestamps)))) { opt_rx->saw_tstamp = 1; opt_rx->rcv_tsval = get_unaligned_be32(ptr); opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4); @@ -4080,7 +4085,7 @@ void tcp_parse_options(const struct net *net, break; case TCPOPT_SACK_PERM: if (opsize == TCPOLEN_SACK_PERM && th->syn && - !estab && net->ipv4.sysctl_tcp_sack) { + !estab && READ_ONCE(net->ipv4.sysctl_tcp_sack)) { opt_rx->sack_ok = TCP_SACK_SEEN; tcp_sack_reset(opt_rx); } @@ -5567,7 +5572,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th) struct tcp_sock *tp = tcp_sk(sk); u32 ptr = ntohs(th->urg_ptr); - if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg) + if (ptr && !READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_stdurg)) ptr--; ptr += ntohl(th->seq); @@ -6729,7 +6734,7 @@ static void tcp_ecn_create_request(struct request_sock *req, ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield); ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK); - ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst; + ecn_ok = READ_ONCE(net->ipv4.sysctl_tcp_ecn) || ecn_ok_dst; if (((!ect || th->res1) && ecn_ok) || tcp_ca_needs_ecn(listen_sk) || (ecn_ok_dst & DST_FEATURE_ECN_CA) || @@ -6797,11 +6802,14 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto) { struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; const char *msg = "Dropping request"; - bool want_cookie = false; struct net *net = sock_net(sk); + bool want_cookie = false; + u8 syncookies; + + syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies); #ifdef CONFIG_SYN_COOKIES - if (net->ipv4.sysctl_tcp_syncookies) { + if (syncookies) { msg = "Sending cookies"; want_cookie = true; __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); @@ -6809,8 +6817,7 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto) #endif __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); - if (!queue->synflood_warned && - net->ipv4.sysctl_tcp_syncookies != 2 && + if (!queue->synflood_warned && syncookies != 2 && xchg(&queue->synflood_warned, 1) == 0) net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", proto, sk->sk_num, msg); @@ -6859,7 +6866,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, struct tcp_sock *tp = tcp_sk(sk); u16 mss; - if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 && + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) != 2 && !inet_csk_reqsk_queue_is_full(sk)) return 0; @@ -6893,13 +6900,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, bool want_cookie = false; struct dst_entry *dst; struct flowi fl; + u8 syncookies; + + syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies); /* TW buckets are converted to open requests without * limitations, they conserve resources and peer is * evidently real one. */ - if ((net->ipv4.sysctl_tcp_syncookies == 2 || - inet_csk_reqsk_queue_is_full(sk)) && !isn) { + if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) { want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name); if (!want_cookie) goto drop; @@ -6948,10 +6957,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb); if (!want_cookie && !isn) { + int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog); + /* Kill the following clause, if you dislike this way. */ - if (!net->ipv4.sysctl_tcp_syncookies && - (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < - (net->ipv4.sysctl_max_syn_backlog >> 2)) && + if (!syncookies && + (max_syn_backlog - inet_csk_reqsk_queue_len(sk) < + (max_syn_backlog >> 2)) && !tcp_peer_is_proven(req, dst)) { /* Without syncookies last quarter of * backlog is filled with destinations, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index da5a3c44c4fb..d16e6e40f47b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -108,10 +108,10 @@ static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb) int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) { + int reuse = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tw_reuse); const struct inet_timewait_sock *tw = inet_twsk(sktw); const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); struct tcp_sock *tp = tcp_sk(sk); - int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse; if (reuse == 2) { /* Still does not detect *everything* that goes through diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 7029b0e98edb..a501150deaa3 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -428,7 +428,8 @@ void tcp_update_metrics(struct sock *sk) if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) { val = tcp_metric_get(tm, TCP_METRIC_REORDERING); if (val < tp->reordering && - tp->reordering != net->ipv4.sysctl_tcp_reordering) + tp->reordering != + READ_ONCE(net->ipv4.sysctl_tcp_reordering)) tcp_metric_set(tm, TCP_METRIC_REORDERING, tp->reordering); } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 6854bb1fb32b..cb95d88497ae 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -173,7 +173,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, * Oh well... nobody has a sufficient solution to this * protocol bug yet. */ - if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) { + if (!READ_ONCE(twsk_net(tw)->ipv4.sysctl_tcp_rfc1337)) { kill: inet_twsk_deschedule_put(tw); return TCP_TW_SUCCESS; @@ -781,7 +781,7 @@ listen_overflow: if (sk != req->rsk_listener) __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE); - if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) { + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow)) { inet_rsk(req)->acked = 1; return NULL; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1c054431e358..c38e07b50639 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -324,7 +324,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk); - bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 || + bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 || tcp_ca_needs_ecn(sk) || bpf_needs_ecn; if (!use_ecn) { @@ -346,7 +346,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb) { - if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)) /* tp->ecn_flags are cleared at a later point in time when * SYN ACK is ultimatively being received. */ @@ -791,18 +791,18 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, opts->mss = tcp_advertise_mss(sk); remaining -= TCPOLEN_MSS_ALIGNED; - if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) { + if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps) && !*md5)) { opts->options |= OPTION_TS; opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset; opts->tsecr = tp->rx_opt.ts_recent; remaining -= TCPOLEN_TSTAMP_ALIGNED; } - if (likely(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) { + if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling))) { opts->ws = tp->rx_opt.rcv_wscale; opts->options |= OPTION_WSCALE; remaining -= TCPOLEN_WSCALE_ALIGNED; } - if (likely(sock_net(sk)->ipv4.sysctl_tcp_sack)) { + if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_sack))) { opts->options |= OPTION_SACK_ADVERTISE; if (unlikely(!(OPTION_TS & opts->options))) remaining -= TCPOLEN_SACKPERM_ALIGNED; @@ -1719,7 +1719,8 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu) mss_now -= icsk->icsk_ext_hdr_len; /* Then reserve room for full set of TCP options and 8 bytes of data */ - mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss); + mss_now = max(mss_now, + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss)); return mss_now; } @@ -1762,10 +1763,10 @@ void tcp_mtup_init(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct net *net = sock_net(sk); - icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1; + icsk->icsk_mtup.enabled = READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing) > 1; icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) + icsk->icsk_af_ops->net_header_len; - icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss); + icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, READ_ONCE(net->ipv4.sysctl_tcp_base_mss)); icsk->icsk_mtup.probe_size = 0; if (icsk->icsk_mtup.enabled) icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; @@ -1897,7 +1898,7 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) if (tp->packets_out > tp->snd_cwnd_used) tp->snd_cwnd_used = tp->packets_out; - if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle && + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) && (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto && !ca_ops->cong_control) tcp_cwnd_application_limited(sk); @@ -2282,7 +2283,7 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk) u32 interval; s32 delta; - interval = net->ipv4.sysctl_tcp_probe_interval; + interval = READ_ONCE(net->ipv4.sysctl_tcp_probe_interval); delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp; if (unlikely(delta >= interval * HZ)) { int mss = tcp_current_mss(sk); @@ -2366,7 +2367,7 @@ static int tcp_mtu_probe(struct sock *sk) * probing process by not resetting search range to its orignal. */ if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) || - interval < net->ipv4.sysctl_tcp_probe_threshold) { + interval < READ_ONCE(net->ipv4.sysctl_tcp_probe_threshold)) { /* Check whether enough time has elaplased for * another round of probing. */ @@ -2740,7 +2741,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto) if (rcu_access_pointer(tp->fastopen_rsk)) return false; - early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans; + early_retrans = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_early_retrans); /* Schedule a loss probe in 2*RTT for SACK capable connections * not in loss recovery, that are either limited by cwnd or application. */ @@ -3104,7 +3105,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, struct sk_buff *skb = to, *tmp; bool first = true; - if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)) return; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) return; @@ -3646,7 +3647,7 @@ static void tcp_connect_init(struct sock *sk) * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT. */ tp->tcp_header_len = sizeof(struct tcphdr); - if (sock_net(sk)->ipv4.sysctl_tcp_timestamps) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps)) tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED; #ifdef CONFIG_TCP_MD5SIG @@ -3682,7 +3683,7 @@ static void tcp_connect_init(struct sock *sk) tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), &tp->rcv_wnd, &tp->window_clamp, - sock_net(sk)->ipv4.sysctl_tcp_window_scaling, + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling), &rcv_wscale, rcv_wnd); @@ -4089,7 +4090,7 @@ void tcp_send_probe0(struct sock *sk) icsk->icsk_probes_out++; if (err <= 0) { - if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2) + if (icsk->icsk_backoff < READ_ONCE(net->ipv4.sysctl_tcp_retries2)) icsk->icsk_backoff++; timeout = tcp_probe0_when(sk, TCP_RTO_MAX); } else { diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 48f30e7209f2..50abaa941387 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -14,7 +14,8 @@ static u32 tcp_rack_reo_wnd(const struct sock *sk) return 0; if (tp->sacked_out >= tp->reordering && - !(sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_NO_DUPTHRESH)) + !(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & + TCP_RACK_NO_DUPTHRESH)) return 0; } @@ -187,7 +188,8 @@ void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); - if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND || + if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & + TCP_RACK_STATIC_REO_WND) || !rs->prior_delivered) return; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 20cf4a98c69d..50bba370486e 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -143,7 +143,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) */ static int tcp_orphan_retries(struct sock *sk, bool alive) { - int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */ + int retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_orphan_retries); /* May be zero. */ /* We know from an ICMP that something is wrong. */ if (sk->sk_err_soft && !alive) @@ -163,7 +163,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) int mss; /* Black hole detection */ - if (!net->ipv4.sysctl_tcp_mtu_probing) + if (!READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing)) return; if (!icsk->icsk_mtup.enabled) { @@ -171,9 +171,9 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; } else { mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; - mss = min(net->ipv4.sysctl_tcp_base_mss, mss); - mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor); - mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss); + mss = min(READ_ONCE(net->ipv4.sysctl_tcp_base_mss), mss); + mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_mtu_probe_floor)); + mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_min_snd_mss)); icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); } tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); @@ -239,17 +239,18 @@ static int tcp_write_timeout(struct sock *sk) if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { if (icsk->icsk_retransmits) __dst_negative_advice(sk); - retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; + retry_until = icsk->icsk_syn_retries ? : + READ_ONCE(net->ipv4.sysctl_tcp_syn_retries); expired = icsk->icsk_retransmits >= retry_until; } else { - if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) { + if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) { /* Black hole detection */ tcp_mtu_probing(icsk, sk); __dst_negative_advice(sk); } - retry_until = net->ipv4.sysctl_tcp_retries2; + retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2); if (sock_flag(sk, SOCK_DEAD)) { const bool alive = icsk->icsk_rto < TCP_RTO_MAX; @@ -380,7 +381,7 @@ static void tcp_probe_timer(struct sock *sk) msecs_to_jiffies(icsk->icsk_user_timeout)) goto abort; - max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2; + max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2); if (sock_flag(sk, SOCK_DEAD)) { const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX; @@ -406,12 +407,15 @@ abort: tcp_write_err(sk); static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req) { struct inet_connection_sock *icsk = inet_csk(sk); - int max_retries = icsk->icsk_syn_retries ? : - sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */ struct tcp_sock *tp = tcp_sk(sk); + int max_retries; req->rsk_ops->syn_ack_timeout(req); + /* add one more retry for fastopen */ + max_retries = icsk->icsk_syn_retries ? : + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_synack_retries) + 1; + if (req->num_timeout >= max_retries) { tcp_write_err(sk); return; @@ -574,7 +578,7 @@ out_reset_timer: * linear-timeout retransmissions into a black hole */ if (sk->sk_state == TCP_ESTABLISHED && - (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) && + (tp->thin_lto || READ_ONCE(net->ipv4.sysctl_tcp_thin_linear_timeouts)) && tcp_stream_is_thin(tp) && icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { icsk->icsk_backoff = 0; @@ -585,7 +589,7 @@ out_reset_timer: } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, tcp_clamp_rto_to_user_timeout(sk), TCP_RTO_MAX); - if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0)) + if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1) + 1, 0)) __sk_dst_reset(sk); out:; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 70564ddccc46..6f354f8be2c5 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -226,7 +226,7 @@ lookup_protocol: RCU_INIT_POINTER(inet->mc_list, NULL); inet->rcv_tos = 0; - if (net->ipv4.sysctl_ip_no_pmtu_disc) + if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 61770220774e..9d92d51c4757 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -925,7 +925,7 @@ static int icmpv6_rcv(struct sk_buff *skb) break; case ICMPV6_EXT_ECHO_REQUEST: if (!net->ipv6.sysctl.icmpv6_echo_ignore_all && - net->ipv4.sysctl_icmp_echo_enable_probe) + READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe)) icmpv6_echo_reply(skb); break; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 0322cc86b84e..e1ebf5e42ebe 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -45,20 +45,23 @@ #include <net/inet_ecn.h> #include <net/dst_metadata.h> -INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *)); static void ip6_rcv_finish_core(struct net *net, struct sock *sk, struct sk_buff *skb) { - void (*edemux)(struct sk_buff *skb); - - if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { - const struct inet6_protocol *ipprot; - - ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); - if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) - INDIRECT_CALL_2(edemux, tcp_v6_early_demux, - udp_v6_early_demux, skb); + if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) && + !skb_dst(skb) && !skb->sk) { + switch (ipv6_hdr(skb)->nexthdr) { + case IPPROTO_TCP: + if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) + tcp_v6_early_demux(skb); + break; + case IPPROTO_UDP: + if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) + udp_v6_early_demux(skb); + break; + } } + if (!skb_valid_dst(skb)) ip6_route_input(skb); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 828355710c57..916417944ec8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5741,7 +5741,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, if (nexthop_is_blackhole(rt->nh)) rtm->rtm_type = RTN_BLACKHOLE; - if (net->ipv4.sysctl_nexthop_compat_mode && + if (READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode) && rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0) goto nla_put_failure; diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index d64855010948..e756ba705fd9 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -189,6 +189,8 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) } #endif + hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + skb_postpush_rcsum(skb, hdr, tot_len); return 0; @@ -241,6 +243,8 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) } #endif + hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen); return 0; @@ -302,7 +306,6 @@ static int seg6_do_srh(struct sk_buff *skb) break; } - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); nf_reset_ct(skb); diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 98a34287439c..2cd4a8d3b30a 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -826,7 +826,6 @@ static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt) if (err) goto drop; - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); seg6_lookup_nexthop(skb, NULL, 0); @@ -858,7 +857,6 @@ static int input_action_end_b6_encap(struct sk_buff *skb, if (err) goto drop; - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); seg6_lookup_nexthop(skb, NULL, 0); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 9cc123f000fb..5014aa663452 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -141,7 +141,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) __u8 rcv_wscale; u32 tsoff = 0; - if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) || + !th->ack || th->rst) goto out; if (tcp_synq_no_recent_overflow(sk)) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f37dd4aa91c6..9d3ede293258 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1822,7 +1822,7 @@ do_time_wait: goto discard_it; } -INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb) +void tcp_v6_early_demux(struct sk_buff *skb) { const struct ipv6hdr *hdr; const struct tcphdr *th; @@ -2176,12 +2176,7 @@ struct proto tcpv6_prot = { }; EXPORT_SYMBOL_GPL(tcpv6_prot); -/* thinking of making this const? Don't. - * early_demux can change based on sysctl. - */ -static struct inet6_protocol tcpv6_protocol = { - .early_demux = tcp_v6_early_demux, - .early_demux_handler = tcp_v6_early_demux, +static const struct inet6_protocol tcpv6_protocol = { .handler = tcp_v6_rcv, .err_handler = tcp_v6_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 55afd7f39c04..e2f2e087a753 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1052,7 +1052,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net, return NULL; } -INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb) +void udp_v6_early_demux(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); const struct udphdr *uh; @@ -1660,12 +1660,7 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname, return ipv6_getsockopt(sk, level, optname, optval, optlen); } -/* thinking of making this const? Don't. - * early_demux can change based on sysctl. - */ -static struct inet6_protocol udpv6_protocol = { - .early_demux = udp_v6_early_demux, - .early_demux_handler = udp_v6_early_demux, +static const struct inet6_protocol udpv6_protocol = { .handler = udpv6_rcv, .err_handler = udpv6_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index f7896f257e1b..4ddf297f40f2 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -4468,14 +4468,14 @@ EXPORT_SYMBOL_GPL(ieee80211_color_change_finish); void ieeee80211_obss_color_collision_notify(struct ieee80211_vif *vif, - u64 color_bitmap) + u64 color_bitmap, gfp_t gfp) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); if (sdata->vif.color_change_active || sdata->vif.csa_active) return; - cfg80211_obss_color_collision_notify(sdata->dev, color_bitmap); + cfg80211_obss_color_collision_notify(sdata->dev, color_bitmap, gfp); } EXPORT_SYMBOL_GPL(ieeee80211_obss_color_collision_notify); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 41531478437c..15a73b7fdd75 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -377,7 +377,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do bool cancel_scan; struct cfg80211_nan_func *func; + spin_lock_bh(&local->fq.lock); clear_bit(SDATA_STATE_RUNNING, &sdata->state); + spin_unlock_bh(&local->fq.lock); cancel_scan = rcu_access_pointer(local->scan_sdata) == sdata; if (cancel_scan) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 3c08ae04ddbc..1675f8cb87f1 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3217,7 +3217,8 @@ ieee80211_rx_check_bss_color_collision(struct ieee80211_rx_data *rx) IEEE80211_HE_OPERATION_BSS_COLOR_MASK); if (color == bss_conf->he_bss_color.color) ieeee80211_obss_color_collision_notify(&rx->sdata->vif, - BIT_ULL(color)); + BIT_ULL(color), + GFP_ATOMIC); } } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 0e4efc08c762..c425f4fb7c2e 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2818,19 +2818,10 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, /* * If the skb is shared we need to obtain our own copy. */ - if (skb_shared(skb)) { - struct sk_buff *tmp_skb = skb; - - /* can't happen -- skb is a clone if info_id != 0 */ - WARN_ON(info_id); - - skb = skb_clone(skb, GFP_ATOMIC); - kfree_skb(tmp_skb); - - if (!skb) { - ret = -ENOMEM; - goto free; - } + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(!skb)) { + ret = -ENOMEM; + goto free; } hdr.frame_control = fc; @@ -3539,15 +3530,9 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, /* after this point (skb is modified) we cannot return false */ - if (skb_shared(skb)) { - struct sk_buff *tmp_skb = skb; - - skb = skb_clone(skb, GFP_ATOMIC); - kfree_skb(tmp_skb); - - if (!skb) - return true; - } + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(!skb)) + return true; if ((hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) && ieee80211_amsdu_aggregate(sdata, sta, fast_tx, skb)) @@ -4437,7 +4422,7 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata, struct net_device *dev, struct sta_info *sta, struct ieee80211_key *key, struct sk_buff *skb) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_tx_info *info; struct ieee80211_local *local = sdata->local; struct tid_ampdu_tx *tid_tx; u8 tid; @@ -4452,6 +4437,11 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata, test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state)) goto out_free; + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(!skb)) + return; + + info = IEEE80211_SKB_CB(skb); memset(info, 0, sizeof(*info)); ieee80211_aggr_check(sdata, sta, skb); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 1e26b5235add..dad42d42aa84 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -301,6 +301,9 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac) local_bh_disable(); spin_lock(&fq->lock); + if (!test_bit(SDATA_STATE_RUNNING, &sdata->state)) + goto out; + if (sdata->vif.type == NL80211_IFTYPE_AP) ps = &sdata->bss->ps; diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 62c6733e0792..d50480b31750 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -147,8 +147,8 @@ u16 __ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, bool qos; /* all mesh/ocb stations are required to support WME */ - if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT || - sdata->vif.type == NL80211_IFTYPE_OCB) + if (sta && (sdata->vif.type == NL80211_IFTYPE_MESH_POINT || + sdata->vif.type == NL80211_IFTYPE_OCB)) qos = true; else if (sta) qos = sta->sta.wme; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index cc21fafd9726..21a3ed64226e 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2919,12 +2919,12 @@ static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk) static int mptcp_disconnect(struct sock *sk, int flags) { - struct mptcp_subflow_context *subflow; + struct mptcp_subflow_context *subflow, *tmp; struct mptcp_sock *msk = mptcp_sk(sk); inet_sk_state_store(sk, TCP_CLOSE); - mptcp_for_each_subflow(msk, subflow) { + list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); __mptcp_close_ssk(sk, ssk, subflow, MPTCP_CF_FASTCLOSE); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 082a2fd8d85b..369aeabb94fe 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -729,6 +729,9 @@ static void nf_ct_gc_expired(struct nf_conn *ct) if (!refcount_inc_not_zero(&ct->ct_general.use)) return; + /* load ->status after refcount increase */ + smp_acquire__after_ctrl_dep(); + if (nf_ct_should_gc(ct)) nf_ct_kill(ct); @@ -795,6 +798,9 @@ __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, */ ct = nf_ct_tuplehash_to_ctrack(h); if (likely(refcount_inc_not_zero(&ct->ct_general.use))) { + /* re-check key after refcount */ + smp_acquire__after_ctrl_dep(); + if (likely(nf_ct_key_equal(h, tuple, zone, net))) goto found; @@ -1387,6 +1393,9 @@ static unsigned int early_drop_list(struct net *net, if (!refcount_inc_not_zero(&tmp->ct_general.use)) continue; + /* load ->ct_net and ->status after refcount increase */ + smp_acquire__after_ctrl_dep(); + /* kill only if still in same netns -- might have moved due to * SLAB_TYPESAFE_BY_RCU rules. * @@ -1536,6 +1545,9 @@ static void gc_worker(struct work_struct *work) if (!refcount_inc_not_zero(&tmp->ct_general.use)) continue; + /* load ->status after refcount increase */ + smp_acquire__after_ctrl_dep(); + if (gc_worker_skip_ct(tmp)) { nf_ct_put(tmp); continue; @@ -1775,6 +1787,16 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, if (!exp) __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); + /* Other CPU might have obtained a pointer to this object before it was + * released. Because refcount is 0, refcount_inc_not_zero() will fail. + * + * After refcount_set(1) it will succeed; ensure that zeroing of + * ct->status and the correct ct->net pointer are visible; else other + * core might observe CONFIRMED bit which means the entry is valid and + * in the hash table, but its not (anymore). + */ + smp_wmb(); + /* Now it is going to be associated with an sk_buff, set refcount to 1. */ refcount_set(&ct->ct_general.use, 1); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 722af5e309ba..f5905b5201a7 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1203,6 +1203,7 @@ restart: hnnode) { ct = nf_ct_tuplehash_to_ctrack(h); if (nf_ct_is_expired(ct)) { + /* need to defer nf_ct_kill() until lock is released */ if (i < ARRAY_SIZE(nf_ct_evict) && refcount_inc_not_zero(&ct->ct_general.use)) nf_ct_evict[i++] = ct; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 6ad7bbc90d38..05895878610c 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -306,6 +306,9 @@ static int ct_seq_show(struct seq_file *s, void *v) if (unlikely(!refcount_inc_not_zero(&ct->ct_general.use))) return 0; + /* load ->status after refcount increase */ + smp_acquire__after_ctrl_dep(); + if (nf_ct_should_gc(ct)) { nf_ct_kill(ct); goto release; diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c index 77bcb10fc586..cb894f0d63e9 100644 --- a/net/netfilter/nf_log_syslog.c +++ b/net/netfilter/nf_log_syslog.c @@ -67,7 +67,7 @@ dump_arp_packet(struct nf_log_buf *m, unsigned int logflags; struct arphdr _arph; - ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); + ah = skb_header_pointer(skb, nhoff, sizeof(_arph), &_arph); if (!ah) { nf_log_buf_add(m, "TRUNCATED"); return; @@ -96,7 +96,7 @@ dump_arp_packet(struct nf_log_buf *m, ah->ar_pln != sizeof(__be32)) return; - ap = skb_header_pointer(skb, sizeof(_arph), sizeof(_arpp), &_arpp); + ap = skb_header_pointer(skb, nhoff + sizeof(_arph), sizeof(_arpp), &_arpp); if (!ap) { nf_log_buf_add(m, " INCOMPLETE [%zu bytes]", skb->len - sizeof(_arph)); @@ -149,7 +149,7 @@ static void nf_log_arp_packet(struct net *net, u_int8_t pf, nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo, prefix); - dump_arp_packet(m, loginfo, skb, 0); + dump_arp_packet(m, loginfo, skb, skb_network_offset(skb)); nf_log_buf_close(m); } @@ -850,7 +850,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf, if (in) dump_mac_header(m, loginfo, skb); - dump_ipv4_packet(net, m, loginfo, skb, 0); + dump_ipv4_packet(net, m, loginfo, skb, skb_network_offset(skb)); nf_log_buf_close(m); } diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index e479dd0561c5..16915f8eef2b 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -405,7 +405,7 @@ synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr, iph->tos = 0; iph->id = 0; iph->frag_off = htons(IP_DF); - iph->ttl = net->ipv4.sysctl_ip_default_ttl; + iph->ttl = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); iph->protocol = IPPROTO_TCP; iph->check = 0; iph->saddr = saddr; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d6b59beab3a9..646d5fd53604 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5833,8 +5833,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (!nla[NFTA_SET_ELEM_KEY] && !(flags & NFT_SET_ELEM_CATCHALL)) return -EINVAL; - if (flags != 0) - nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); + if (flags != 0) { + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); + if (err < 0) + return err; + } if (set->flags & NFT_SET_MAP) { if (nla[NFTA_SET_ELEM_DATA] == NULL && @@ -5943,7 +5946,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) goto err_set_elem_expr; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); + if (err < 0) + goto err_parse_key; } if (nla[NFTA_SET_ELEM_KEY_END]) { @@ -5952,22 +5957,31 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) goto err_parse_key; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen); + if (err < 0) + goto err_parse_key_end; } if (timeout > 0) { - nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION); - if (timeout != set->timeout) - nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION); + if (err < 0) + goto err_parse_key_end; + + if (timeout != set->timeout) { + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); + if (err < 0) + goto err_parse_key_end; + } } if (num_exprs) { for (i = 0; i < num_exprs; i++) size += expr_array[i]->ops->size; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_EXPRESSIONS, - sizeof(struct nft_set_elem_expr) + - size); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_EXPRESSIONS, + sizeof(struct nft_set_elem_expr) + size); + if (err < 0) + goto err_parse_key_end; } if (nla[NFTA_SET_ELEM_OBJREF] != NULL) { @@ -5982,7 +5996,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, err = PTR_ERR(obj); goto err_parse_key_end; } - nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF); + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF); + if (err < 0) + goto err_parse_key_end; } if (nla[NFTA_SET_ELEM_DATA] != NULL) { @@ -6016,7 +6032,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, NFT_VALIDATE_NEED); } - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, desc.len); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, desc.len); + if (err < 0) + goto err_parse_data; } /* The full maximum length of userdata can exceed the maximum @@ -6026,9 +6044,12 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, ulen = 0; if (nla[NFTA_SET_ELEM_USERDATA] != NULL) { ulen = nla_len(nla[NFTA_SET_ELEM_USERDATA]); - if (ulen > 0) - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA, - ulen); + if (ulen > 0) { + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA, + ulen); + if (err < 0) + goto err_parse_data; + } } err = -ENOMEM; @@ -6256,8 +6277,11 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, nft_set_ext_prepare(&tmpl); - if (flags != 0) - nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); + if (flags != 0) { + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); + if (err < 0) + return err; + } if (nla[NFTA_SET_ELEM_KEY]) { err = nft_setelem_parse_key(ctx, set, &elem.key.val, @@ -6265,16 +6289,20 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) return err; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); + if (err < 0) + goto fail_elem; } if (nla[NFTA_SET_ELEM_KEY_END]) { err = nft_setelem_parse_key(ctx, set, &elem.key_end.val, nla[NFTA_SET_ELEM_KEY_END]); if (err < 0) - return err; + goto fail_elem; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen); + if (err < 0) + goto fail_elem_key_end; } err = -ENOMEM; @@ -6282,7 +6310,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, elem.key_end.val.data, NULL, 0, 0, GFP_KERNEL_ACCOUNT); if (elem.priv == NULL) - goto fail_elem; + goto fail_elem_key_end; ext = nft_set_elem_ext(set, elem.priv); if (flags) @@ -6306,6 +6334,8 @@ fail_ops: kfree(trans); fail_trans: kfree(elem.priv); +fail_elem_key_end: + nft_data_release(&elem.key_end.val, NFT_DATA_VALUE); fail_elem: nft_data_release(&elem.key.val, NFT_DATA_VALUE); return err; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 9bb4d3dcc994..ac366c99086f 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3533,7 +3533,7 @@ int tc_setup_action(struct flow_action *flow_action, struct tc_action *actions[], struct netlink_ext_ack *extack) { - int i, j, index, err = 0; + int i, j, k, index, err = 0; struct tc_action *act; BUILD_BUG_ON(TCA_ACT_HW_STATS_ANY != FLOW_ACTION_HW_STATS_ANY); @@ -3553,14 +3553,18 @@ int tc_setup_action(struct flow_action *flow_action, if (err) goto err_out_locked; - entry->hw_stats = tc_act_hw_stats(act->hw_stats); - entry->hw_index = act->tcfa_index; index = 0; err = tc_setup_offload_act(act, entry, &index, extack); - if (!err) - j += index; - else + if (err) goto err_out_locked; + + for (k = 0; k < index ; k++) { + entry[k].hw_stats = tc_act_hw_stats(act->hw_stats); + entry[k].hw_index = act->tcfa_index; + } + + j += index; + spin_unlock_bh(&act->tcfa_lock); } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 35928fefae33..1a094b087d88 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -358,7 +358,7 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp) if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) && ret != RTN_LOCAL && !sp->inet.freebind && - !net->ipv4.sysctl_ip_nonlocal_bind) + !READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind)) return 0; if (ipv6_only_sock(sctp_opt2sk(sp))) diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index c4d057b2941d..0bde36b56472 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -2122,7 +2122,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc) init_waitqueue_head(&lgr->llc_flow_waiter); init_waitqueue_head(&lgr->llc_msg_waiter); mutex_init(&lgr->llc_conf_mutex); - lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time; + lgr->llc_testlink_time = READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); } /* called after lgr was removed from lgr_list */ diff --git a/net/socket.c b/net/socket.c index 82af3882b876..ec9ef13a702e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2359,25 +2359,20 @@ struct used_address { unsigned int name_len; }; -int __copy_msghdr_from_user(struct msghdr *kmsg, - struct user_msghdr __user *umsg, - struct sockaddr __user **save_addr, - struct iovec __user **uiov, size_t *nsegs) +int __copy_msghdr(struct msghdr *kmsg, + struct user_msghdr *msg, + struct sockaddr __user **save_addr) { - struct user_msghdr msg; ssize_t err; - if (copy_from_user(&msg, umsg, sizeof(*umsg))) - return -EFAULT; - kmsg->msg_control_is_user = true; kmsg->msg_get_inq = 0; - kmsg->msg_control_user = msg.msg_control; - kmsg->msg_controllen = msg.msg_controllen; - kmsg->msg_flags = msg.msg_flags; + kmsg->msg_control_user = msg->msg_control; + kmsg->msg_controllen = msg->msg_controllen; + kmsg->msg_flags = msg->msg_flags; - kmsg->msg_namelen = msg.msg_namelen; - if (!msg.msg_name) + kmsg->msg_namelen = msg->msg_namelen; + if (!msg->msg_name) kmsg->msg_namelen = 0; if (kmsg->msg_namelen < 0) @@ -2387,11 +2382,11 @@ int __copy_msghdr_from_user(struct msghdr *kmsg, kmsg->msg_namelen = sizeof(struct sockaddr_storage); if (save_addr) - *save_addr = msg.msg_name; + *save_addr = msg->msg_name; - if (msg.msg_name && kmsg->msg_namelen) { + if (msg->msg_name && kmsg->msg_namelen) { if (!save_addr) { - err = move_addr_to_kernel(msg.msg_name, + err = move_addr_to_kernel(msg->msg_name, kmsg->msg_namelen, kmsg->msg_name); if (err < 0) @@ -2402,13 +2397,11 @@ int __copy_msghdr_from_user(struct msghdr *kmsg, kmsg->msg_namelen = 0; } - if (msg.msg_iovlen > UIO_MAXIOV) + if (msg->msg_iovlen > UIO_MAXIOV) return -EMSGSIZE; kmsg->msg_iocb = NULL; kmsg->msg_ubuf = NULL; - *uiov = msg.msg_iov; - *nsegs = msg.msg_iovlen; return 0; } @@ -2420,8 +2413,10 @@ static int copy_msghdr_from_user(struct msghdr *kmsg, struct user_msghdr msg; ssize_t err; - err = __copy_msghdr_from_user(kmsg, umsg, save_addr, &msg.msg_iov, - &msg.msg_iovlen); + if (copy_from_user(&msg, umsg, sizeof(*umsg))) + return -EFAULT; + + err = __copy_msghdr(kmsg, &msg, save_addr); if (err) return err; diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index ec6f4b699a2b..879b9024678e 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -97,13 +97,16 @@ static void tls_device_queue_ctx_destruction(struct tls_context *ctx) unsigned long flags; spin_lock_irqsave(&tls_device_lock, flags); + if (unlikely(!refcount_dec_and_test(&ctx->refcount))) + goto unlock; + list_move_tail(&ctx->list, &tls_device_gc_list); /* schedule_work inside the spinlock * to make sure tls_device_down waits for that work. */ schedule_work(&tls_device_gc_work); - +unlock: spin_unlock_irqrestore(&tls_device_lock, flags); } @@ -194,8 +197,7 @@ void tls_device_sk_destruct(struct sock *sk) clean_acked_data_disable(inet_csk(sk)); } - if (refcount_dec_and_test(&tls_ctx->refcount)) - tls_device_queue_ctx_destruction(tls_ctx); + tls_device_queue_ctx_destruction(tls_ctx); } EXPORT_SYMBOL_GPL(tls_device_sk_destruct); @@ -1419,9 +1421,9 @@ static struct notifier_block tls_dev_notifier = { .notifier_call = tls_dev_event, }; -void __init tls_device_init(void) +int __init tls_device_init(void) { - register_netdevice_notifier(&tls_dev_notifier); + return register_netdevice_notifier(&tls_dev_notifier); } void __exit tls_device_cleanup(void) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 2ffede463e4a..d80ab3d1764e 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -1048,7 +1048,12 @@ static int __init tls_register(void) if (err) return err; - tls_device_init(); + err = tls_device_init(); + if (err) { + unregister_pernet_subsys(&tls_proc_ops); + return err; + } + tcp_register_ulp(&tcp_tls_ulp_ops); return 0; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index ff4d48fcbfb2..607a68911047 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -1031,7 +1031,8 @@ void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid) { ASSERT_WDEV_LOCK(wdev); - if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION && + wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) return; if (WARN_ON(!wdev->current_bss) || diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index f1876ea61fdc..f1a0bab920a5 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2678,8 +2678,10 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family, *num_xfrms = 0; return 0; } - if (IS_ERR(pols[0])) + if (IS_ERR(pols[0])) { + *num_pols = 0; return PTR_ERR(pols[0]); + } *num_xfrms = pols[0]->xfrm_nr; @@ -2694,6 +2696,7 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family, if (pols[1]) { if (IS_ERR(pols[1])) { xfrm_pols_put(pols, *num_pols); + *num_pols = 0; return PTR_ERR(pols[1]); } (*num_pols)++; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 08564e0eef20..ccfb172eb5b8 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2620,7 +2620,7 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) int err; if (family == AF_INET && - xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc) + READ_ONCE(xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc)) x->props.flags |= XFRM_STATE_NOPMTUDISC; err = -EPROTONOSUPPORT; |