Merge branch 'net-three-additions-to-net_hotdata'

Eric Dumazet says: ==================== net: three additions to net_hotdata This series moves three fast path sysctls to net_hotdata. To avoid <net/hotdata.h> inclusion from <net/sock.h>, create <net/proto_memory.h> to hold proto memory definitions. ==================== Link: https://lore.kernel.org/r/20240429134025.1233626-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
author: Jakub Kicinski <kuba@kernel.org> 2024-04-30 18:46:54 -0700
committer: Jakub Kicinski <kuba@kernel.org> 2024-04-30 18:46:55 -0700
commit: e7b1b0786f5c1c74e60b1d66d2ffb325f8ba9ed6 (patch)
tree: 6cc12ade57310fb99b3a64b72516bee33488c1b3
parent: 05d6d492097c55f2d153fc3fd33cbe78e1e28e0a (diff)
parent: c204fef97ee62ca3310f43d12bbda4eb10266c7c (diff)
17 files changed, 117 insertions, 105 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index adf75d69770c..36b133f04d30 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -353,8 +353,6 @@ struct sk_buff;
 
 #define MAX_SKB_FRAGS CONFIG_MAX_SKB_FRAGS
 
-extern int sysctl_max_skb_frags;
-
 /* Set skb_shinfo(skb)->gso_size to this in case you want skb_segment to
  * segment using its current segmentation instead.
  */
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
index 003667a1efd6..30e9570beb2a 100644
--- a/include/net/hotdata.h
+++ b/include/net/hotdata.h
@@ -38,6 +38,9 @@ struct net_hotdata {
 	int			max_backlog;
 	int			dev_tx_weight;
 	int			dev_rx_weight;
+	int			sysctl_max_skb_frags;
+	int			sysctl_skb_defer_max;
+	int			sysctl_mem_pcpu_rsv;
 };
 
 #define inet_ehash_secret	net_hotdata.tcp_protocol.secret
diff --git a/include/net/proto_memory.h b/include/net/proto_memory.h
new file mode 100644
index 000000000000..a6ab2f4f5e28
--- /dev/null
+++ b/include/net/proto_memory.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _PROTO_MEMORY_H
+#define _PROTO_MEMORY_H
+
+#include <net/sock.h>
+#include <net/hotdata.h>
+
+/* 1 MB per cpu, in page units */
+#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT))
+
+static inline bool sk_has_memory_pressure(const struct sock *sk)
+{
+	return sk->sk_prot->memory_pressure != NULL;
+}
+
+static inline bool
+proto_memory_pressure(const struct proto *prot)
+{
+	if (!prot->memory_pressure)
+		return false;
+	return !!READ_ONCE(*prot->memory_pressure);
+}
+
+static inline bool sk_under_global_memory_pressure(const struct sock *sk)
+{
+	return proto_memory_pressure(sk->sk_prot);
+}
+
+static inline bool sk_under_memory_pressure(const struct sock *sk)
+{
+	if (!sk->sk_prot->memory_pressure)
+		return false;
+
+	if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
+	    mem_cgroup_under_socket_pressure(sk->sk_memcg))
+		return true;
+
+	return !!READ_ONCE(*sk->sk_prot->memory_pressure);
+}
+
+static inline long
+proto_memory_allocated(const struct proto *prot)
+{
+	return max(0L, atomic_long_read(prot->memory_allocated));
+}
+
+static inline long
+sk_memory_allocated(const struct sock *sk)
+{
+	return proto_memory_allocated(sk->sk_prot);
+}
+
+static inline void proto_memory_pcpu_drain(struct proto *proto)
+{
+	int val = this_cpu_xchg(*proto->per_cpu_fw_alloc, 0);
+
+	if (val)
+		atomic_long_add(val, proto->memory_allocated);
+}
+
+static inline void
+sk_memory_allocated_add(const struct sock *sk, int val)
+{
+	struct proto *proto = sk->sk_prot;
+
+	val = this_cpu_add_return(*proto->per_cpu_fw_alloc, val);
+
+	if (unlikely(val >= READ_ONCE(net_hotdata.sysctl_mem_pcpu_rsv)))
+		proto_memory_pcpu_drain(proto);
+}
+
+static inline void
+sk_memory_allocated_sub(const struct sock *sk, int val)
+{
+	struct proto *proto = sk->sk_prot;
+
+	val = this_cpu_sub_return(*proto->per_cpu_fw_alloc, val);
+
+	if (unlikely(val <= -READ_ONCE(net_hotdata.sysctl_mem_pcpu_rsv)))
+		proto_memory_pcpu_drain(proto);
+}
+
+#endif /* _PROTO_MEMORY_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index 48bcc845202f..0450494a1766 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1371,75 +1371,6 @@ static inline int sk_under_cgroup_hierarchy(struct sock *sk,
 #endif
 }
 
-static inline bool sk_has_memory_pressure(const struct sock *sk)
-{
-	return sk->sk_prot->memory_pressure != NULL;
-}
-
-static inline bool sk_under_global_memory_pressure(const struct sock *sk)
-{
-	return sk->sk_prot->memory_pressure &&
-		!!READ_ONCE(*sk->sk_prot->memory_pressure);
-}
-
-static inline bool sk_under_memory_pressure(const struct sock *sk)
-{
-	if (!sk->sk_prot->memory_pressure)
-		return false;
-
-	if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
-	    mem_cgroup_under_socket_pressure(sk->sk_memcg))
-		return true;
-
-	return !!READ_ONCE(*sk->sk_prot->memory_pressure);
-}
-
-static inline long
-proto_memory_allocated(const struct proto *prot)
-{
-	return max(0L, atomic_long_read(prot->memory_allocated));
-}
-
-static inline long
-sk_memory_allocated(const struct sock *sk)
-{
-	return proto_memory_allocated(sk->sk_prot);
-}
-
-/* 1 MB per cpu, in page units */
-#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT))
-extern int sysctl_mem_pcpu_rsv;
-
-static inline void proto_memory_pcpu_drain(struct proto *proto)
-{
-	int val = this_cpu_xchg(*proto->per_cpu_fw_alloc, 0);
-
-	if (val)
-		atomic_long_add(val, proto->memory_allocated);
-}
-
-static inline void
-sk_memory_allocated_add(const struct sock *sk, int val)
-{
-	struct proto *proto = sk->sk_prot;
-
-	val = this_cpu_add_return(*proto->per_cpu_fw_alloc, val);
-
-	if (unlikely(val >= READ_ONCE(sysctl_mem_pcpu_rsv)))
-		proto_memory_pcpu_drain(proto);
-}
-
-static inline void
-sk_memory_allocated_sub(const struct sock *sk, int val)
-{
-	struct proto *proto = sk->sk_prot;
-
-	val = this_cpu_sub_return(*proto->per_cpu_fw_alloc, val);
-
-	if (unlikely(val <= -READ_ONCE(sysctl_mem_pcpu_rsv)))
-		proto_memory_pcpu_drain(proto);
-}
-
 #define SK_ALLOC_PERCPU_COUNTER_BATCH 16
 
 static inline void sk_sockets_allocated_dec(struct sock *sk)
@@ -1466,15 +1397,6 @@ proto_sockets_allocated_sum_positive(struct proto *prot)
 	return percpu_counter_sum_positive(prot->sockets_allocated);
 }
 
-static inline bool
-proto_memory_pressure(struct proto *prot)
-{
-	if (!prot->memory_pressure)
-		return false;
-	return !!READ_ONCE(*prot->memory_pressure);
-}
-
-
 #ifdef CONFIG_PROC_FS
 #define PROTO_INUSE_NR	64	/* should be enough for the first time */
 struct prot_inuse {
diff --git a/include/net/tcp.h b/include/net/tcp.h
index fe98fb01879b..0a51e6a45bce 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -296,14 +296,6 @@ static inline bool between(__u32 seq1, __u32 seq2, __u32 seq3)
 	return seq3 - seq2 >= seq1 - seq2;
 }
 
-static inline bool tcp_out_of_memory(struct sock *sk)
-{
-	if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
-	    sk_memory_allocated(sk) > sk_prot_mem_limits(sk, 2))
-		return true;
-	return false;
-}
-
 static inline void tcp_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
 {
 	sk_wmem_queued_add(sk, -skb->truesize);
@@ -316,7 +308,7 @@ static inline void tcp_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
 
 void sk_forced_mem_schedule(struct sock *sk, int size);
 
-bool tcp_check_oom(struct sock *sk, int shift);
+bool tcp_check_oom(const struct sock *sk, int shift);
 
 
 extern struct proto tcp_prot;
diff --git a/net/core/dev.c b/net/core/dev.c
index c9e59eff8ec8..cd7ba50eac15 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4450,7 +4450,6 @@ EXPORT_SYMBOL(__dev_direct_xmit);
  *************************************************************************/
 static DEFINE_PER_CPU(struct task_struct *, backlog_napi);
 
-unsigned int sysctl_skb_defer_max __read_mostly = 64;
 int weight_p __read_mostly = 64;           /* old backlog weight */
 int dev_weight_rx_bias __read_mostly = 1;  /* bias for backlog weight */
 int dev_weight_tx_bias __read_mostly = 1;  /* bias for output_queue quota */
diff --git a/net/core/dev.h b/net/core/dev.h
index 8572d2c8dc4a..b7b518bc2be5 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -36,7 +36,6 @@ int dev_addr_init(struct net_device *dev);
 void dev_addr_check(struct net_device *dev);
 
 /* sysctls not referred to from outside net/core/ */
-extern unsigned int	sysctl_skb_defer_max;
 extern int		netdev_unregister_timeout_secs;
 extern int		weight_p;
 extern int		dev_weight_rx_bias;
diff --git a/net/core/hotdata.c b/net/core/hotdata.c
index c8a7a451c18a..d0aaaaa556f2 100644
--- a/net/core/hotdata.c
+++ b/net/core/hotdata.c
@@ -1,9 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
-#include <net/hotdata.h>
 #include <linux/cache.h>
 #include <linux/jiffies.h>
 #include <linux/list.h>
-
+#include <net/hotdata.h>
+#include <net/proto_memory.h>
 
 struct net_hotdata net_hotdata __cacheline_aligned = {
 	.offload_base = LIST_HEAD_INIT(net_hotdata.offload_base),
@@ -18,5 +18,8 @@ struct net_hotdata net_hotdata __cacheline_aligned = {
 	.max_backlog = 1000,
 	.dev_tx_weight = 64,
 	.dev_rx_weight = 64,
+	.sysctl_max_skb_frags = MAX_SKB_FRAGS,
+	.sysctl_skb_defer_max = 64,
+	.sysctl_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE
 };
 EXPORT_SYMBOL(net_hotdata);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 0c8b82750000..5f382e94b4d1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -109,9 +109,6 @@ static struct kmem_cache *skbuff_ext_cache __ro_after_init;
 #define SKB_SMALL_HEAD_HEADROOM						\
 	SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE)
 
-int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
-EXPORT_SYMBOL(sysctl_max_skb_frags);
-
 /* kcm_write_msgs() relies on casting paged frags to bio_vec to use
  * iov_iter_bvec(). These static asserts ensure the cast is valid is long as the
  * netmem is a page.
@@ -6988,7 +6985,7 @@ nodefer:	kfree_skb_napi_cache(skb);
 	DEBUG_NET_WARN_ON_ONCE(skb->destructor);
 
 	sd = &per_cpu(softnet_data, cpu);
-	defer_max = READ_ONCE(sysctl_skb_defer_max);
+	defer_max = READ_ONCE(net_hotdata.sysctl_skb_defer_max);
 	if (READ_ONCE(sd->defer_count) >= defer_max)
 		goto nodefer;
 
@@ -7040,7 +7037,7 @@ static void skb_splice_csum_page(struct sk_buff *skb, struct page *page,
 ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter,
 			     ssize_t maxsize, gfp_t gfp)
 {
-	size_t frag_limit = READ_ONCE(sysctl_max_skb_frags);
+	size_t frag_limit = READ_ONCE(net_hotdata.sysctl_max_skb_frags);
 	struct page *pages[8], **ppages = pages;
 	ssize_t spliced = 0, ret = 0;
 	unsigned int i;
diff --git a/net/core/sock.c b/net/core/sock.c
index fe9195186c13..8d6e638b5426 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -127,6 +127,7 @@
 #include <net/net_namespace.h>
 #include <net/request_sock.h>
 #include <net/sock.h>
+#include <net/proto_memory.h>
 #include <linux/net_tstamp.h>
 #include <net/xfrm.h>
 #include <linux/ipsec.h>
@@ -283,7 +284,6 @@ __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
 EXPORT_SYMBOL(sysctl_rmem_max);
 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
-int sysctl_mem_pcpu_rsv __read_mostly = SK_MEMORY_PCPU_RESERVE;
 
 int sysctl_tstamp_allow_data __read_mostly = 1;
 
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 903ab4a51c17..6da5995ac86a 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -24,6 +24,7 @@
 #include <net/busy_poll.h>
 #include <net/pkt_sched.h>
 #include <net/hotdata.h>
+#include <net/proto_memory.h>
 #include <net/rps.h>
 
 #include "dev.h"
@@ -415,7 +416,7 @@ static struct ctl_table net_core_table[] = {
 	},
 	{
 		.procname	= "mem_pcpu_rsv",
-		.data		= &sysctl_mem_pcpu_rsv,
+		.data		= &net_hotdata.sysctl_mem_pcpu_rsv,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
@@ -595,7 +596,7 @@ static struct ctl_table net_core_table[] = {
 	},
 	{
 		.procname	= "max_skb_frags",
-		.data		= &sysctl_max_skb_frags,
+		.data		= &net_hotdata.sysctl_max_skb_frags,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
@@ -654,7 +655,7 @@ static struct ctl_table net_core_table[] = {
 	},
 	{
 		.procname	= "skb_defer_max",
-		.data		= &sysctl_skb_defer_max,
+		.data		= &net_hotdata.sysctl_skb_defer_max,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 914bc9c35cc7..6c4664c681ca 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -33,6 +33,7 @@
 #include <net/protocol.h>
 #include <net/tcp.h>
 #include <net/mptcp.h>
+#include <net/proto_memory.h>
 #include <net/udp.h>
 #include <net/udplite.h>
 #include <linux/bottom_half.h>
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4ec0f4feee00..e1f0efbb29d6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -272,6 +272,7 @@
 #include <net/inet_common.h>
 #include <net/tcp.h>
 #include <net/mptcp.h>
+#include <net/proto_memory.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
 #include <net/sock.h>
@@ -280,6 +281,7 @@
 #include <linux/uaccess.h>
 #include <asm/ioctls.h>
 #include <net/busy_poll.h>
+#include <net/hotdata.h>
 #include <net/rps.h>
 
 /* Track pending CMSGs. */
@@ -1188,7 +1190,7 @@ new_segment:
 
 			if (!skb_can_coalesce(skb, i, pfrag->page,
 					      pfrag->offset)) {
-				if (i >= READ_ONCE(sysctl_max_skb_frags)) {
+				if (i >= READ_ONCE(net_hotdata.sysctl_max_skb_frags)) {
 					tcp_mark_push(tp, skb);
 					goto new_segment;
 				}
@@ -2751,7 +2753,15 @@ static bool tcp_too_many_orphans(int shift)
 		READ_ONCE(sysctl_tcp_max_orphans);
 }
 
-bool tcp_check_oom(struct sock *sk, int shift)
+static bool tcp_out_of_memory(const struct sock *sk)
+{
+	if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
+	    sk_memory_allocated(sk) > sk_prot_mem_limits(sk, 2))
+		return true;
+	return false;
+}
+
+bool tcp_check_oom(const struct sock *sk, int shift)
 {
 	bool too_many_orphans, out_of_socket_memory;
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 53e1150f706f..ad8fa129fcfe 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -72,6 +72,7 @@
 #include <linux/prefetch.h>
 #include <net/dst.h>
 #include <net/tcp.h>
+#include <net/proto_memory.h>
 #include <net/inet_common.h>
 #include <linux/ipsec.h>
 #include <asm/unaligned.h>
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ea7ad7d99245..57edf66ff91b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -39,6 +39,7 @@
 
 #include <net/tcp.h>
 #include <net/mptcp.h>
+#include <net/proto_memory.h>
 
 #include <linux/compiler.h>
 #include <linux/gfp.h>
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 4b13ca362efa..aff17597e6a7 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -20,6 +20,7 @@
 #include <net/transp_v6.h>
 #endif
 #include <net/mptcp.h>
+#include <net/hotdata.h>
 #include <net/xfrm.h>
 #include <asm/ioctls.h>
 #include "protocol.h"
@@ -1272,7 +1273,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
 
 		i = skb_shinfo(skb)->nr_frags;
 		can_coalesce = skb_can_coalesce(skb, i, dfrag->page, offset);
-		if (!can_coalesce && i >= READ_ONCE(sysctl_max_skb_frags)) {
+		if (!can_coalesce && i >= READ_ONCE(net_hotdata.sysctl_max_skb_frags)) {
 			tcp_mark_push(tcp_sk(ssk), skb);
 			goto alloc_skb;
 		}
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 08fdf1251f46..5adf0c0a6c1a 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -38,6 +38,7 @@
 #include <linux/inet.h>
 #include <linux/slab.h>
 #include <net/sock.h>
+#include <net/proto_memory.h>
 #include <net/inet_ecn.h>
 #include <linux/skbuff.h>
 #include <net/sctp/sctp.h>
author	Jakub Kicinski <kuba@kernel.org>	2024-04-30 18:46:54 -0700
committer	Jakub Kicinski <kuba@kernel.org>	2024-04-30 18:46:55 -0700
commit	e7b1b0786f5c1c74e60b1d66d2ffb325f8ba9ed6 (patch)
tree	6cc12ade57310fb99b3a64b72516bee33488c1b3
parent	05d6d492097c55f2d153fc3fd33cbe78e1e28e0a (diff)
parent	c204fef97ee62ca3310f43d12bbda4eb10266c7c (diff)